From b03de33657a6ba4f198e54fdc40072c31c340ce6 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 4 Jun 2026 15:09:31 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: rbelanec/train_record_42_1779354541 Source: Original Platform --- .gitattributes | 36 + README.md | 81 + all_results.json | 13 + config.json | 39 + eval_results.json | 8 + generation_config.json | 12 + model.safetensors | 3 + special_tokens_map.json | 26 + tokenizer.json | 3 + tokenizer_config.json | 2069 ++++ train.yaml | 55 + train_results.json | 9 + trainer_log.jsonl | 3144 +++++ trainer_state.json | 25207 ++++++++++++++++++++++++++++++++++++++ training_args.bin | 3 + training_eval_loss.png | Bin 0 -> 39979 bytes training_loss.png | Bin 0 -> 41354 bytes 17 files changed, 30708 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.yaml create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 training_eval_loss.png create mode 100644 training_loss.png diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..cb72055 --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +library_name: transformers +license: llama3.2 +base_model: meta-llama/Llama-3.2-1B-Instruct +tags: +- peft-factory +- full +- llama-factory +- generated_from_trainer +model-index: +- name: train_record_42_1779354541 + results: [] +--- + + + +# train_record_42_1779354541 + +This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the record dataset. +It achieves the following results on the evaluation set: +- Loss: 0.3557 +- Num Input Tokens Seen: 49166912 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-06 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen | +|:-------------:|:------:|:-----:|:---------------:|:-----------------:| +| 0.7605 | 0.0501 | 782 | 0.6366 | 2474432 | +| 0.6538 | 0.1001 | 1564 | 0.5419 | 4931328 | +| 0.5239 | 0.1502 | 2346 | 0.5114 | 7397056 | +| 0.5889 | 0.2002 | 3128 | 0.4917 | 9832064 | +| 0.4277 | 0.2503 | 3910 | 0.4677 | 12304064 | +| 0.3708 | 0.3004 | 4692 | 0.4652 | 14775488 | +| 0.5873 | 0.3504 | 5474 | 0.4432 | 17259840 | +| 0.3556 | 0.4005 | 6256 | 0.4279 | 19707456 | +| 0.3775 | 0.4505 | 7038 | 0.4363 | 22178432 | +| 0.3997 | 0.5006 | 7820 | 0.4178 | 24646208 | +| 0.3435 | 0.5507 | 8602 | 0.4014 | 27101056 | +| 0.4129 | 0.6007 | 9384 | 0.3946 | 29544576 | +| 0.324 | 0.6508 | 10166 | 0.3816 | 32010176 | +| 0.4286 | 0.7009 | 10948 | 0.3744 | 34475136 | +| 0.3097 | 0.7509 | 11730 | 0.3673 | 36931648 | +| 0.3395 | 0.8010 | 12512 | 0.3655 | 39382144 | +| 0.2868 | 0.8510 | 13294 | 0.3591 | 41847872 | +| 0.3511 | 0.9011 | 14076 | 0.3564 | 44318848 | +| 0.2686 | 0.9512 | 14858 | 0.3557 | 46767552 | + + +### Framework versions + +- Transformers 4.51.3 +- Pytorch 2.10.0+cu128 +- Datasets 4.0.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..dc8a802 --- /dev/null +++ b/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 1.0, + "eval_loss": 0.35565948486328125, + "eval_runtime": 50.5621, + "eval_samples_per_second": 274.633, + "eval_steps_per_second": 34.334, + "num_input_tokens_seen": 49166912, + "total_flos": 2.8707953551107686e+17, + "train_loss": 0.44674425404505724, + "train_runtime": 6032.4024, + "train_samples_per_second": 20.716, + "train_steps_per_second": 2.59 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5a2b93f --- /dev/null +++ b/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..25084bb --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "eval_loss": 0.35565948486328125, + "eval_runtime": 50.5621, + "eval_samples_per_second": 274.633, + "eval_steps_per_second": 34.334, + "num_input_tokens_seen": 49166912 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2b8ae57 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.3" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..30c306a --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d63d3e22800cc73cb89b691aba36424099be348d0d6d44bd1e831da74c6a7b9 +size 4943274328 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14daf45 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ddc3ce0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2069 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.yaml b/train.yaml new file mode 100644 index 0000000..5b670d9 --- /dev/null +++ b/train.yaml @@ -0,0 +1,55 @@ +seed: 42 + +### model +model_name_or_path: meta-llama/Llama-3.2-1B-Instruct +trust_remote_code: true +flash_attn: auto +use_cache: false + +### method +stage: sft +do_train: true +finetuning_type: full + +### dataset +dataset: record +template: llama3 +cutoff_len: 2048 +overwrite_cache: true +preprocessing_num_workers: 4 +dataloader_num_workers: 4 +packing: false + +### output +output_dir: saves_bts_preliminary/base/llama-3.2-1b-instruct/train_record_42_1779354541 +logging_steps: 5 +save_steps: 0.05 +overwrite_output_dir: true +save_only_model: false +plot_loss: true +include_num_input_tokens_seen: true +push_to_hub: true +push_to_hub_organization: rbelanec +load_best_model_at_end: true +save_total_limit: 1 + +### train +per_device_train_batch_size: 8 +learning_rate: 2.0e-6 +num_train_epochs: 1 +weight_decay: 1.0e-2 +lr_scheduler_type: cosine +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +warmup_ratio: 0.1 +optim: adamw_torch +report_to: +- wandb +run_name: base_llama-3.2-1b-instruct_train_record_42_1779354541 + +### eval +per_device_eval_batch_size: 8 +eval_strategy: steps +eval_steps: 0.05 +val_size: 0.1 \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..0da4453 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "num_input_tokens_seen": 49166912, + "total_flos": 2.8707953551107686e+17, + "train_loss": 0.44674425404505724, + "train_runtime": 6032.4024, + "train_samples_per_second": 20.716, + "train_steps_per_second": 2.59 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..267d782 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,3144 @@ +{"current_steps": 5, "total_steps": 15621, "loss": 2.1538, "lr": 5.118362124120281e-09, "epoch": 0.0003200819409768901, "percentage": 0.03, "elapsed_time": "0:00:01", "remaining_time": "1:38:49", "throughput": 8089.76, "total_tokens": 15360} +{"current_steps": 10, "total_steps": 15621, "loss": 2.3529, "lr": 1.1516314779270634e-08, "epoch": 0.0006401638819537802, "percentage": 0.06, "elapsed_time": "0:00:02", "remaining_time": "1:07:18", "throughput": 12024.73, "total_tokens": 31104} +{"current_steps": 15, "total_steps": 15621, "loss": 2.1426, "lr": 1.7914267434420987e-08, "epoch": 0.0009602458229306702, "percentage": 0.1, "elapsed_time": "0:00:03", "remaining_time": "0:56:14", "throughput": 14246.73, "total_tokens": 46208} +{"current_steps": 20, "total_steps": 15621, "loss": 2.7631, "lr": 2.431222008957134e-08, "epoch": 0.0012803277639075604, "percentage": 0.13, "elapsed_time": "0:00:03", "remaining_time": "0:51:11", "throughput": 15865.7, "total_tokens": 62464} +{"current_steps": 25, "total_steps": 15621, "loss": 2.1363, "lr": 3.071017274472169e-08, "epoch": 0.0016004097048844504, "percentage": 0.16, "elapsed_time": "0:00:04", "remaining_time": "0:48:28", "throughput": 16965.52, "total_tokens": 79104} +{"current_steps": 30, "total_steps": 15621, "loss": 2.2293, "lr": 3.710812539987204e-08, "epoch": 0.0019204916458613404, "percentage": 0.19, "elapsed_time": "0:00:05", "remaining_time": "0:46:14", "throughput": 17775.24, "total_tokens": 94912} +{"current_steps": 35, "total_steps": 15621, "loss": 2.3549, "lr": 4.350607805502239e-08, "epoch": 0.0022405735868382304, "percentage": 0.22, "elapsed_time": "0:00:06", "remaining_time": "0:44:40", "throughput": 18401.99, "total_tokens": 110784} +{"current_steps": 40, "total_steps": 15621, "loss": 2.1602, "lr": 4.990403071017274e-08, "epoch": 0.002560655527815121, "percentage": 0.26, "elapsed_time": "0:00:06", "remaining_time": "0:43:17", "throughput": 18849.37, "total_tokens": 125696} +{"current_steps": 45, "total_steps": 15621, "loss": 2.1132, "lr": 5.6301983365323095e-08, "epoch": 0.002880737468792011, "percentage": 0.29, "elapsed_time": "0:00:07", "remaining_time": "0:42:12", "throughput": 19225.89, "total_tokens": 140672} +{"current_steps": 50, "total_steps": 15621, "loss": 2.0446, "lr": 6.269993602047345e-08, "epoch": 0.003200819409768901, "percentage": 0.32, "elapsed_time": "0:00:07", "remaining_time": "0:41:20", "throughput": 19518.19, "total_tokens": 155456} +{"current_steps": 55, "total_steps": 15621, "loss": 1.9213, "lr": 6.90978886756238e-08, "epoch": 0.003520901350745791, "percentage": 0.35, "elapsed_time": "0:00:08", "remaining_time": "0:40:43", "throughput": 19783.79, "total_tokens": 170816} +{"current_steps": 60, "total_steps": 15621, "loss": 2.0365, "lr": 7.549584133077414e-08, "epoch": 0.003840983291722681, "percentage": 0.38, "elapsed_time": "0:00:09", "remaining_time": "0:40:03", "throughput": 19975.64, "total_tokens": 185088} +{"current_steps": 65, "total_steps": 15621, "loss": 1.6514, "lr": 8.18937939859245e-08, "epoch": 0.004161065232699571, "percentage": 0.42, "elapsed_time": "0:00:09", "remaining_time": "0:39:36", "throughput": 20182.21, "total_tokens": 200384} +{"current_steps": 70, "total_steps": 15621, "loss": 1.6765, "lr": 8.829174664107485e-08, "epoch": 0.004481147173676461, "percentage": 0.45, "elapsed_time": "0:00:10", "remaining_time": "0:39:13", "throughput": 20362.21, "total_tokens": 215744} +{"current_steps": 75, "total_steps": 15621, "loss": 1.6609, "lr": 9.468969929622521e-08, "epoch": 0.004801229114653352, "percentage": 0.48, "elapsed_time": "0:00:11", "remaining_time": "0:38:51", "throughput": 20487.78, "total_tokens": 230400} +{"current_steps": 80, "total_steps": 15621, "loss": 1.326, "lr": 1.0108765195137556e-07, "epoch": 0.005121311055630242, "percentage": 0.51, "elapsed_time": "0:00:11", "remaining_time": "0:38:37", "throughput": 20669.1, "total_tokens": 246592} +{"current_steps": 85, "total_steps": 15621, "loss": 1.1489, "lr": 1.074856046065259e-07, "epoch": 0.005441392996607132, "percentage": 0.54, "elapsed_time": "0:00:12", "remaining_time": "0:38:23", "throughput": 20810.68, "total_tokens": 262272} +{"current_steps": 90, "total_steps": 15621, "loss": 1.0445, "lr": 1.1388355726167625e-07, "epoch": 0.005761474937584022, "percentage": 0.58, "elapsed_time": "0:00:13", "remaining_time": "0:38:10", "throughput": 20927.01, "total_tokens": 277760} +{"current_steps": 95, "total_steps": 15621, "loss": 1.2459, "lr": 1.202815099168266e-07, "epoch": 0.006081556878560912, "percentage": 0.61, "elapsed_time": "0:00:13", "remaining_time": "0:37:56", "throughput": 21030.82, "total_tokens": 292992} +{"current_steps": 100, "total_steps": 15621, "loss": 1.1182, "lr": 1.2667946257197694e-07, "epoch": 0.006401638819537802, "percentage": 0.64, "elapsed_time": "0:00:14", "remaining_time": "0:37:41", "throughput": 21126.68, "total_tokens": 307840} +{"current_steps": 105, "total_steps": 15621, "loss": 1.0297, "lr": 1.3307741522712732e-07, "epoch": 0.006721720760514692, "percentage": 0.67, "elapsed_time": "0:00:15", "remaining_time": "0:37:29", "throughput": 21216.69, "total_tokens": 323008} +{"current_steps": 110, "total_steps": 15621, "loss": 1.051, "lr": 1.3947536788227767e-07, "epoch": 0.007041802701491582, "percentage": 0.7, "elapsed_time": "0:00:15", "remaining_time": "0:37:24", "throughput": 21324.08, "total_tokens": 339456} +{"current_steps": 115, "total_steps": 15621, "loss": 1.1295, "lr": 1.45873320537428e-07, "epoch": 0.007361884642468472, "percentage": 0.74, "elapsed_time": "0:00:16", "remaining_time": "0:37:15", "throughput": 21396.2, "total_tokens": 354816} +{"current_steps": 120, "total_steps": 15621, "loss": 0.8247, "lr": 1.5227127319257838e-07, "epoch": 0.007681966583445362, "percentage": 0.77, "elapsed_time": "0:00:17", "remaining_time": "0:37:04", "throughput": 21453.95, "total_tokens": 369472} +{"current_steps": 125, "total_steps": 15621, "loss": 0.9577, "lr": 1.586692258477287e-07, "epoch": 0.008002048524422252, "percentage": 0.8, "elapsed_time": "0:00:17", "remaining_time": "0:36:56", "throughput": 21515.33, "total_tokens": 384768} +{"current_steps": 130, "total_steps": 15621, "loss": 1.0328, "lr": 1.6506717850287908e-07, "epoch": 0.008322130465399142, "percentage": 0.83, "elapsed_time": "0:00:18", "remaining_time": "0:36:50", "throughput": 21577.63, "total_tokens": 400192} +{"current_steps": 135, "total_steps": 15621, "loss": 0.8953, "lr": 1.7146513115802943e-07, "epoch": 0.008642212406376032, "percentage": 0.86, "elapsed_time": "0:00:19", "remaining_time": "0:36:46", "throughput": 21655.51, "total_tokens": 416640} +{"current_steps": 140, "total_steps": 15621, "loss": 0.8265, "lr": 1.7786308381317976e-07, "epoch": 0.008962294347352922, "percentage": 0.9, "elapsed_time": "0:00:19", "remaining_time": "0:36:42", "throughput": 21718.54, "total_tokens": 432640} +{"current_steps": 145, "total_steps": 15621, "loss": 0.8983, "lr": 1.8426103646833014e-07, "epoch": 0.009282376288329812, "percentage": 0.93, "elapsed_time": "0:00:20", "remaining_time": "0:36:39", "throughput": 21769.13, "total_tokens": 448640} +{"current_steps": 150, "total_steps": 15621, "loss": 0.9503, "lr": 1.9065898912348046e-07, "epoch": 0.009602458229306703, "percentage": 0.96, "elapsed_time": "0:00:21", "remaining_time": "0:36:35", "throughput": 21818.09, "total_tokens": 464448} +{"current_steps": 155, "total_steps": 15621, "loss": 0.858, "lr": 1.9705694177863084e-07, "epoch": 0.009922540170283593, "percentage": 0.99, "elapsed_time": "0:00:21", "remaining_time": "0:36:29", "throughput": 21853.99, "total_tokens": 479488} +{"current_steps": 160, "total_steps": 15621, "loss": 0.7304, "lr": 2.034548944337812e-07, "epoch": 0.010242622111260483, "percentage": 1.02, "elapsed_time": "0:00:22", "remaining_time": "0:36:25", "throughput": 21900.34, "total_tokens": 495296} +{"current_steps": 165, "total_steps": 15621, "loss": 0.7848, "lr": 2.0985284708893152e-07, "epoch": 0.010562704052237373, "percentage": 1.06, "elapsed_time": "0:00:23", "remaining_time": "0:36:18", "throughput": 21935.33, "total_tokens": 510144} +{"current_steps": 170, "total_steps": 15621, "loss": 0.8469, "lr": 2.162507997440819e-07, "epoch": 0.010882785993214263, "percentage": 1.09, "elapsed_time": "0:00:23", "remaining_time": "0:36:12", "throughput": 21959.47, "total_tokens": 524928} +{"current_steps": 175, "total_steps": 15621, "loss": 0.7184, "lr": 2.2264875239923222e-07, "epoch": 0.011202867934191153, "percentage": 1.12, "elapsed_time": "0:00:24", "remaining_time": "0:36:11", "throughput": 22007.51, "total_tokens": 541504} +{"current_steps": 180, "total_steps": 15621, "loss": 0.7354, "lr": 2.290467050543826e-07, "epoch": 0.011522949875168043, "percentage": 1.15, "elapsed_time": "0:00:25", "remaining_time": "0:36:05", "throughput": 22031.18, "total_tokens": 556096} +{"current_steps": 185, "total_steps": 15621, "loss": 0.7351, "lr": 2.3544465770953295e-07, "epoch": 0.011843031816144933, "percentage": 1.18, "elapsed_time": "0:00:25", "remaining_time": "0:36:04", "throughput": 22076.87, "total_tokens": 572736} +{"current_steps": 190, "total_steps": 15621, "loss": 0.853, "lr": 2.418426103646833e-07, "epoch": 0.012163113757121823, "percentage": 1.22, "elapsed_time": "0:00:26", "remaining_time": "0:36:01", "throughput": 22106.49, "total_tokens": 588352} +{"current_steps": 195, "total_steps": 15621, "loss": 0.9742, "lr": 2.4824056301983363e-07, "epoch": 0.012483195698098713, "percentage": 1.25, "elapsed_time": "0:00:27", "remaining_time": "0:35:57", "throughput": 22129.73, "total_tokens": 603520} +{"current_steps": 200, "total_steps": 15621, "loss": 0.8167, "lr": 2.54638515674984e-07, "epoch": 0.012803277639075603, "percentage": 1.28, "elapsed_time": "0:00:27", "remaining_time": "0:35:55", "throughput": 22158.86, "total_tokens": 619392} +{"current_steps": 205, "total_steps": 15621, "loss": 0.7996, "lr": 2.6103646833013433e-07, "epoch": 0.013123359580052493, "percentage": 1.31, "elapsed_time": "0:00:28", "remaining_time": "0:35:53", "throughput": 22194.59, "total_tokens": 635456} +{"current_steps": 210, "total_steps": 15621, "loss": 0.8732, "lr": 2.6743442098528466e-07, "epoch": 0.013443441521029383, "percentage": 1.34, "elapsed_time": "0:00:29", "remaining_time": "0:35:49", "throughput": 22217.52, "total_tokens": 650880} +{"current_steps": 215, "total_steps": 15621, "loss": 0.8244, "lr": 2.7383237364043504e-07, "epoch": 0.013763523462006273, "percentage": 1.38, "elapsed_time": "0:00:29", "remaining_time": "0:35:48", "throughput": 22239.4, "total_tokens": 666688} +{"current_steps": 220, "total_steps": 15621, "loss": 0.7909, "lr": 2.802303262955854e-07, "epoch": 0.014083605402983163, "percentage": 1.41, "elapsed_time": "0:00:30", "remaining_time": "0:35:44", "throughput": 22261.79, "total_tokens": 682112} +{"current_steps": 225, "total_steps": 15621, "loss": 0.8318, "lr": 2.866282789507358e-07, "epoch": 0.014403687343960053, "percentage": 1.44, "elapsed_time": "0:00:31", "remaining_time": "0:35:42", "throughput": 22286.5, "total_tokens": 697728} +{"current_steps": 230, "total_steps": 15621, "loss": 0.6828, "lr": 2.9302623160588607e-07, "epoch": 0.014723769284936943, "percentage": 1.47, "elapsed_time": "0:00:31", "remaining_time": "0:35:37", "throughput": 22307.42, "total_tokens": 712704} +{"current_steps": 235, "total_steps": 15621, "loss": 0.9689, "lr": 2.9942418426103644e-07, "epoch": 0.015043851225913833, "percentage": 1.5, "elapsed_time": "0:00:32", "remaining_time": "0:35:37", "throughput": 22337.18, "total_tokens": 729408} +{"current_steps": 240, "total_steps": 15621, "loss": 0.7854, "lr": 3.058221369161868e-07, "epoch": 0.015363933166890723, "percentage": 1.54, "elapsed_time": "0:00:33", "remaining_time": "0:35:36", "throughput": 22360.45, "total_tokens": 745344} +{"current_steps": 245, "total_steps": 15621, "loss": 0.7013, "lr": 3.1222008957133715e-07, "epoch": 0.015684015107867613, "percentage": 1.57, "elapsed_time": "0:00:34", "remaining_time": "0:35:36", "throughput": 22400.54, "total_tokens": 762688} +{"current_steps": 250, "total_steps": 15621, "loss": 0.7128, "lr": 3.186180422264875e-07, "epoch": 0.016004097048844503, "percentage": 1.6, "elapsed_time": "0:00:34", "remaining_time": "0:35:36", "throughput": 22426.34, "total_tokens": 779392} +{"current_steps": 255, "total_steps": 15621, "loss": 0.7911, "lr": 3.2501599488163785e-07, "epoch": 0.016324178989821393, "percentage": 1.63, "elapsed_time": "0:00:35", "remaining_time": "0:35:32", "throughput": 22437.61, "total_tokens": 794112} +{"current_steps": 260, "total_steps": 15621, "loss": 0.8429, "lr": 3.314139475367882e-07, "epoch": 0.016644260930798283, "percentage": 1.66, "elapsed_time": "0:00:36", "remaining_time": "0:35:31", "throughput": 22459.48, "total_tokens": 810112} +{"current_steps": 265, "total_steps": 15621, "loss": 0.8602, "lr": 3.3781190019193855e-07, "epoch": 0.016964342871775173, "percentage": 1.7, "elapsed_time": "0:00:36", "remaining_time": "0:35:28", "throughput": 22473.94, "total_tokens": 825472} +{"current_steps": 270, "total_steps": 15621, "loss": 0.9818, "lr": 3.4420985284708893e-07, "epoch": 0.017284424812752063, "percentage": 1.73, "elapsed_time": "0:00:37", "remaining_time": "0:35:24", "throughput": 22481.36, "total_tokens": 840128} +{"current_steps": 275, "total_steps": 15621, "loss": 0.7846, "lr": 3.5060780550223926e-07, "epoch": 0.017604506753728953, "percentage": 1.76, "elapsed_time": "0:00:38", "remaining_time": "0:35:21", "throughput": 22491.1, "total_tokens": 855104} +{"current_steps": 280, "total_steps": 15621, "loss": 0.8039, "lr": 3.570057581573896e-07, "epoch": 0.017924588694705843, "percentage": 1.79, "elapsed_time": "0:00:38", "remaining_time": "0:35:20", "throughput": 22504.33, "total_tokens": 870848} +{"current_steps": 285, "total_steps": 15621, "loss": 0.7489, "lr": 3.6340371081253996e-07, "epoch": 0.018244670635682733, "percentage": 1.82, "elapsed_time": "0:00:39", "remaining_time": "0:35:16", "throughput": 22514.64, "total_tokens": 885760} +{"current_steps": 290, "total_steps": 15621, "loss": 0.705, "lr": 3.6980166346769034e-07, "epoch": 0.018564752576659623, "percentage": 1.86, "elapsed_time": "0:00:39", "remaining_time": "0:35:14", "throughput": 22524.41, "total_tokens": 900928} +{"current_steps": 295, "total_steps": 15621, "loss": 0.7869, "lr": 3.7619961612284067e-07, "epoch": 0.018884834517636517, "percentage": 1.89, "elapsed_time": "0:00:40", "remaining_time": "0:35:11", "throughput": 22538.15, "total_tokens": 915968} +{"current_steps": 300, "total_steps": 15621, "loss": 0.9906, "lr": 3.8259756877799104e-07, "epoch": 0.019204916458613407, "percentage": 1.92, "elapsed_time": "0:00:41", "remaining_time": "0:35:11", "throughput": 22567.41, "total_tokens": 933056} +{"current_steps": 305, "total_steps": 15621, "loss": 0.737, "lr": 3.889955214331414e-07, "epoch": 0.019524998399590297, "percentage": 1.95, "elapsed_time": "0:00:42", "remaining_time": "0:35:09", "throughput": 22576.97, "total_tokens": 948416} +{"current_steps": 310, "total_steps": 15621, "loss": 0.7708, "lr": 3.953934740882917e-07, "epoch": 0.019845080340567187, "percentage": 1.98, "elapsed_time": "0:00:42", "remaining_time": "0:35:06", "throughput": 22581.04, "total_tokens": 962880} +{"current_steps": 315, "total_steps": 15621, "loss": 0.8126, "lr": 4.0179142674344207e-07, "epoch": 0.020165162281544077, "percentage": 2.02, "elapsed_time": "0:00:43", "remaining_time": "0:35:06", "throughput": 22603.14, "total_tokens": 979904} +{"current_steps": 320, "total_steps": 15621, "loss": 0.8299, "lr": 4.0818937939859245e-07, "epoch": 0.020485244222520967, "percentage": 2.05, "elapsed_time": "0:00:44", "remaining_time": "0:35:04", "throughput": 22609.5, "total_tokens": 995136} +{"current_steps": 325, "total_steps": 15621, "loss": 0.771, "lr": 4.145873320537428e-07, "epoch": 0.020805326163497857, "percentage": 2.08, "elapsed_time": "0:00:44", "remaining_time": "0:35:03", "throughput": 22622.46, "total_tokens": 1011008} +{"current_steps": 330, "total_steps": 15621, "loss": 0.831, "lr": 4.2098528470889315e-07, "epoch": 0.021125408104474747, "percentage": 2.11, "elapsed_time": "0:00:45", "remaining_time": "0:35:01", "throughput": 22623.01, "total_tokens": 1025792} +{"current_steps": 335, "total_steps": 15621, "loss": 0.6929, "lr": 4.273832373640435e-07, "epoch": 0.021445490045451637, "percentage": 2.14, "elapsed_time": "0:00:46", "remaining_time": "0:35:01", "throughput": 22643.26, "total_tokens": 1042944} +{"current_steps": 340, "total_steps": 15621, "loss": 0.8271, "lr": 4.3378119001919386e-07, "epoch": 0.021765571986428527, "percentage": 2.18, "elapsed_time": "0:00:46", "remaining_time": "0:35:00", "throughput": 22653.2, "total_tokens": 1058688} +{"current_steps": 345, "total_steps": 15621, "loss": 0.7202, "lr": 4.401791426743442e-07, "epoch": 0.022085653927405417, "percentage": 2.21, "elapsed_time": "0:00:47", "remaining_time": "0:34:59", "throughput": 22663.56, "total_tokens": 1074560} +{"current_steps": 350, "total_steps": 15621, "loss": 0.6947, "lr": 4.4657709532949456e-07, "epoch": 0.022405735868382307, "percentage": 2.24, "elapsed_time": "0:00:48", "remaining_time": "0:34:57", "throughput": 22669.54, "total_tokens": 1089728} +{"current_steps": 355, "total_steps": 15621, "loss": 0.9107, "lr": 4.5297504798464494e-07, "epoch": 0.022725817809359197, "percentage": 2.27, "elapsed_time": "0:00:48", "remaining_time": "0:34:55", "throughput": 22676.44, "total_tokens": 1105024} +{"current_steps": 360, "total_steps": 15621, "loss": 0.7831, "lr": 4.593730006397952e-07, "epoch": 0.023045899750336087, "percentage": 2.3, "elapsed_time": "0:00:49", "remaining_time": "0:34:54", "throughput": 22686.17, "total_tokens": 1121088} +{"current_steps": 365, "total_steps": 15621, "loss": 0.7062, "lr": 4.657709532949456e-07, "epoch": 0.023365981691312977, "percentage": 2.34, "elapsed_time": "0:00:50", "remaining_time": "0:34:53", "throughput": 22697.86, "total_tokens": 1136896} +{"current_steps": 370, "total_steps": 15621, "loss": 0.7133, "lr": 4.7216890595009597e-07, "epoch": 0.023686063632289867, "percentage": 2.37, "elapsed_time": "0:00:50", "remaining_time": "0:34:53", "throughput": 22710.36, "total_tokens": 1153280} +{"current_steps": 375, "total_steps": 15621, "loss": 0.8307, "lr": 4.785668586052463e-07, "epoch": 0.024006145573266757, "percentage": 2.4, "elapsed_time": "0:00:51", "remaining_time": "0:34:52", "throughput": 22722.94, "total_tokens": 1169536} +{"current_steps": 380, "total_steps": 15621, "loss": 0.7573, "lr": 4.849648112603967e-07, "epoch": 0.024326227514243647, "percentage": 2.43, "elapsed_time": "0:00:52", "remaining_time": "0:34:51", "throughput": 22728.9, "total_tokens": 1185088} +{"current_steps": 385, "total_steps": 15621, "loss": 0.6599, "lr": 4.91362763915547e-07, "epoch": 0.024646309455220537, "percentage": 2.46, "elapsed_time": "0:00:52", "remaining_time": "0:34:50", "throughput": 22732.94, "total_tokens": 1200832} +{"current_steps": 390, "total_steps": 15621, "loss": 0.6614, "lr": 4.977607165706974e-07, "epoch": 0.024966391396197427, "percentage": 2.5, "elapsed_time": "0:00:53", "remaining_time": "0:34:48", "throughput": 22741.12, "total_tokens": 1216320} +{"current_steps": 395, "total_steps": 15621, "loss": 0.7446, "lr": 5.041586692258478e-07, "epoch": 0.025286473337174317, "percentage": 2.53, "elapsed_time": "0:00:54", "remaining_time": "0:34:48", "throughput": 22749.93, "total_tokens": 1232832} +{"current_steps": 400, "total_steps": 15621, "loss": 0.7023, "lr": 5.10556621880998e-07, "epoch": 0.025606555278151207, "percentage": 2.56, "elapsed_time": "0:00:54", "remaining_time": "0:34:47", "throughput": 22757.08, "total_tokens": 1248384} +{"current_steps": 405, "total_steps": 15621, "loss": 0.672, "lr": 5.169545745361484e-07, "epoch": 0.025926637219128097, "percentage": 2.59, "elapsed_time": "0:00:55", "remaining_time": "0:34:45", "throughput": 22765.31, "total_tokens": 1263936} +{"current_steps": 410, "total_steps": 15621, "loss": 1.1224, "lr": 5.233525271912988e-07, "epoch": 0.026246719160104987, "percentage": 2.62, "elapsed_time": "0:00:56", "remaining_time": "0:35:09", "throughput": 22756.92, "total_tokens": 1294208} +{"current_steps": 415, "total_steps": 15621, "loss": 0.7874, "lr": 5.297504798464492e-07, "epoch": 0.026566801101081877, "percentage": 2.66, "elapsed_time": "0:00:57", "remaining_time": "0:35:07", "throughput": 22761.38, "total_tokens": 1309120} +{"current_steps": 420, "total_steps": 15621, "loss": 0.8574, "lr": 5.361484325015994e-07, "epoch": 0.026886883042058767, "percentage": 2.69, "elapsed_time": "0:00:58", "remaining_time": "0:35:05", "throughput": 22766.09, "total_tokens": 1324224} +{"current_steps": 425, "total_steps": 15621, "loss": 0.683, "lr": 5.425463851567498e-07, "epoch": 0.027206964983035656, "percentage": 2.72, "elapsed_time": "0:00:58", "remaining_time": "0:35:05", "throughput": 22777.8, "total_tokens": 1341056} +{"current_steps": 430, "total_steps": 15621, "loss": 0.7444, "lr": 5.489443378119002e-07, "epoch": 0.027527046924012546, "percentage": 2.75, "elapsed_time": "0:00:59", "remaining_time": "0:35:03", "throughput": 22782.36, "total_tokens": 1356544} +{"current_steps": 435, "total_steps": 15621, "loss": 0.6717, "lr": 5.553422904670505e-07, "epoch": 0.027847128864989436, "percentage": 2.78, "elapsed_time": "0:01:00", "remaining_time": "0:35:01", "throughput": 22786.66, "total_tokens": 1371840} +{"current_steps": 440, "total_steps": 15621, "loss": 0.6805, "lr": 5.61740243122201e-07, "epoch": 0.028167210805966326, "percentage": 2.82, "elapsed_time": "0:01:00", "remaining_time": "0:34:59", "throughput": 22789.83, "total_tokens": 1386816} +{"current_steps": 445, "total_steps": 15621, "loss": 0.744, "lr": 5.681381957773512e-07, "epoch": 0.028487292746943216, "percentage": 2.85, "elapsed_time": "0:01:01", "remaining_time": "0:34:57", "throughput": 22793.51, "total_tokens": 1401792} +{"current_steps": 450, "total_steps": 15621, "loss": 0.6219, "lr": 5.745361484325015e-07, "epoch": 0.028807374687920106, "percentage": 2.88, "elapsed_time": "0:01:02", "remaining_time": "0:34:55", "throughput": 22797.26, "total_tokens": 1416896} +{"current_steps": 455, "total_steps": 15621, "loss": 0.7479, "lr": 5.80934101087652e-07, "epoch": 0.029127456628896996, "percentage": 2.91, "elapsed_time": "0:01:02", "remaining_time": "0:34:54", "throughput": 22801.44, "total_tokens": 1432704} +{"current_steps": 460, "total_steps": 15621, "loss": 0.7009, "lr": 5.873320537428022e-07, "epoch": 0.029447538569873886, "percentage": 2.94, "elapsed_time": "0:01:03", "remaining_time": "0:34:53", "throughput": 22805.41, "total_tokens": 1448384} +{"current_steps": 465, "total_steps": 15621, "loss": 0.7189, "lr": 5.937300063979526e-07, "epoch": 0.029767620510850776, "percentage": 2.98, "elapsed_time": "0:01:04", "remaining_time": "0:34:52", "throughput": 22816.13, "total_tokens": 1464832} +{"current_steps": 470, "total_steps": 15621, "loss": 0.678, "lr": 6.00127959053103e-07, "epoch": 0.030087702451827666, "percentage": 3.01, "elapsed_time": "0:01:04", "remaining_time": "0:34:50", "throughput": 22815.42, "total_tokens": 1479424} +{"current_steps": 475, "total_steps": 15621, "loss": 0.7233, "lr": 6.065259117082533e-07, "epoch": 0.030407784392804556, "percentage": 3.04, "elapsed_time": "0:01:05", "remaining_time": "0:34:48", "throughput": 22816.5, "total_tokens": 1494336} +{"current_steps": 480, "total_steps": 15621, "loss": 0.6771, "lr": 6.129238643634037e-07, "epoch": 0.030727866333781446, "percentage": 3.07, "elapsed_time": "0:01:06", "remaining_time": "0:34:46", "throughput": 22817.77, "total_tokens": 1509184} +{"current_steps": 485, "total_steps": 15621, "loss": 0.8088, "lr": 6.19321817018554e-07, "epoch": 0.031047948274758336, "percentage": 3.1, "elapsed_time": "0:01:06", "remaining_time": "0:34:45", "throughput": 22825.17, "total_tokens": 1525504} +{"current_steps": 490, "total_steps": 15621, "loss": 0.5954, "lr": 6.257197696737044e-07, "epoch": 0.031368030215735226, "percentage": 3.14, "elapsed_time": "0:01:07", "remaining_time": "0:34:44", "throughput": 22832.99, "total_tokens": 1541504} +{"current_steps": 495, "total_steps": 15621, "loss": 0.6166, "lr": 6.321177223288548e-07, "epoch": 0.03168811215671212, "percentage": 3.17, "elapsed_time": "0:01:08", "remaining_time": "0:34:43", "throughput": 22838.98, "total_tokens": 1557184} +{"current_steps": 500, "total_steps": 15621, "loss": 0.7699, "lr": 6.385156749840051e-07, "epoch": 0.032008194097689006, "percentage": 3.2, "elapsed_time": "0:01:08", "remaining_time": "0:34:42", "throughput": 22847.08, "total_tokens": 1573440} +{"current_steps": 505, "total_steps": 15621, "loss": 0.8718, "lr": 6.449136276391554e-07, "epoch": 0.0323282760386659, "percentage": 3.23, "elapsed_time": "0:01:09", "remaining_time": "0:34:41", "throughput": 22851.08, "total_tokens": 1588736} +{"current_steps": 510, "total_steps": 15621, "loss": 0.6977, "lr": 6.513115802943058e-07, "epoch": 0.032648357979642786, "percentage": 3.26, "elapsed_time": "0:01:10", "remaining_time": "0:34:39", "throughput": 22856.01, "total_tokens": 1604352} +{"current_steps": 515, "total_steps": 15621, "loss": 0.6582, "lr": 6.577095329494562e-07, "epoch": 0.03296843992061968, "percentage": 3.3, "elapsed_time": "0:01:10", "remaining_time": "0:34:37", "throughput": 22856.19, "total_tokens": 1618816} +{"current_steps": 520, "total_steps": 15621, "loss": 0.7409, "lr": 6.641074856046065e-07, "epoch": 0.033288521861596566, "percentage": 3.33, "elapsed_time": "0:01:11", "remaining_time": "0:34:37", "throughput": 22868.94, "total_tokens": 1635648} +{"current_steps": 525, "total_steps": 15621, "loss": 0.7248, "lr": 6.705054382597568e-07, "epoch": 0.03360860380257346, "percentage": 3.36, "elapsed_time": "0:01:12", "remaining_time": "0:34:36", "throughput": 22871.74, "total_tokens": 1651328} +{"current_steps": 530, "total_steps": 15621, "loss": 0.7271, "lr": 6.769033909149072e-07, "epoch": 0.033928685743550346, "percentage": 3.39, "elapsed_time": "0:01:12", "remaining_time": "0:34:36", "throughput": 22883.84, "total_tokens": 1668928} +{"current_steps": 535, "total_steps": 15621, "loss": 0.6188, "lr": 6.833013435700575e-07, "epoch": 0.03424876768452724, "percentage": 3.42, "elapsed_time": "0:01:13", "remaining_time": "0:34:36", "throughput": 22891.17, "total_tokens": 1685504} +{"current_steps": 540, "total_steps": 15621, "loss": 0.7016, "lr": 6.89699296225208e-07, "epoch": 0.034568849625504126, "percentage": 3.46, "elapsed_time": "0:01:14", "remaining_time": "0:34:35", "throughput": 22898.17, "total_tokens": 1701952} +{"current_steps": 545, "total_steps": 15621, "loss": 0.7293, "lr": 6.960972488803583e-07, "epoch": 0.03488893156648102, "percentage": 3.49, "elapsed_time": "0:01:14", "remaining_time": "0:34:34", "throughput": 22899.64, "total_tokens": 1716992} +{"current_steps": 550, "total_steps": 15621, "loss": 0.583, "lr": 7.024952015355085e-07, "epoch": 0.035209013507457906, "percentage": 3.52, "elapsed_time": "0:01:15", "remaining_time": "0:34:32", "throughput": 22899.33, "total_tokens": 1732160} +{"current_steps": 555, "total_steps": 15621, "loss": 0.656, "lr": 7.08893154190659e-07, "epoch": 0.0355290954484348, "percentage": 3.55, "elapsed_time": "0:01:16", "remaining_time": "0:34:32", "throughput": 22904.56, "total_tokens": 1748416} +{"current_steps": 560, "total_steps": 15621, "loss": 0.7074, "lr": 7.152911068458093e-07, "epoch": 0.035849177389411686, "percentage": 3.58, "elapsed_time": "0:01:16", "remaining_time": "0:34:30", "throughput": 22906.88, "total_tokens": 1763776} +{"current_steps": 565, "total_steps": 15621, "loss": 0.7046, "lr": 7.216890595009597e-07, "epoch": 0.03616925933038858, "percentage": 3.62, "elapsed_time": "0:01:17", "remaining_time": "0:34:30", "throughput": 22914.21, "total_tokens": 1780160} +{"current_steps": 570, "total_steps": 15621, "loss": 0.6024, "lr": 7.2808701215611e-07, "epoch": 0.036489341271365466, "percentage": 3.65, "elapsed_time": "0:01:18", "remaining_time": "0:34:29", "throughput": 22919.37, "total_tokens": 1795968} +{"current_steps": 575, "total_steps": 15621, "loss": 0.6319, "lr": 7.344849648112603e-07, "epoch": 0.03680942321234236, "percentage": 3.68, "elapsed_time": "0:01:19", "remaining_time": "0:34:31", "throughput": 22936.59, "total_tokens": 1815424} +{"current_steps": 580, "total_steps": 15621, "loss": 0.8256, "lr": 7.408829174664107e-07, "epoch": 0.037129505153319246, "percentage": 3.71, "elapsed_time": "0:01:19", "remaining_time": "0:34:30", "throughput": 22941.71, "total_tokens": 1831936} +{"current_steps": 585, "total_steps": 15621, "loss": 0.6147, "lr": 7.472808701215611e-07, "epoch": 0.03744958709429614, "percentage": 3.74, "elapsed_time": "0:01:20", "remaining_time": "0:34:29", "throughput": 22942.23, "total_tokens": 1847424} +{"current_steps": 590, "total_steps": 15621, "loss": 0.7338, "lr": 7.536788227767114e-07, "epoch": 0.03776966903527303, "percentage": 3.78, "elapsed_time": "0:01:21", "remaining_time": "0:34:28", "throughput": 22942.12, "total_tokens": 1862400} +{"current_steps": 595, "total_steps": 15621, "loss": 0.7698, "lr": 7.600767754318617e-07, "epoch": 0.03808975097624992, "percentage": 3.81, "elapsed_time": "0:01:21", "remaining_time": "0:34:26", "throughput": 22942.22, "total_tokens": 1876928} +{"current_steps": 600, "total_steps": 15621, "loss": 0.6403, "lr": 7.664747280870121e-07, "epoch": 0.03840983291722681, "percentage": 3.84, "elapsed_time": "0:01:22", "remaining_time": "0:34:25", "throughput": 22943.97, "total_tokens": 1892608} +{"current_steps": 605, "total_steps": 15621, "loss": 0.7287, "lr": 7.728726807421625e-07, "epoch": 0.0387299148582037, "percentage": 3.87, "elapsed_time": "0:01:23", "remaining_time": "0:34:25", "throughput": 22952.95, "total_tokens": 1909696} +{"current_steps": 610, "total_steps": 15621, "loss": 0.7582, "lr": 7.792706333973129e-07, "epoch": 0.03904999679918059, "percentage": 3.9, "elapsed_time": "0:01:23", "remaining_time": "0:34:23", "throughput": 22953.26, "total_tokens": 1924864} +{"current_steps": 615, "total_steps": 15621, "loss": 0.5567, "lr": 7.856685860524632e-07, "epoch": 0.03937007874015748, "percentage": 3.94, "elapsed_time": "0:01:24", "remaining_time": "0:34:22", "throughput": 22955.9, "total_tokens": 1939968} +{"current_steps": 620, "total_steps": 15621, "loss": 0.711, "lr": 7.920665387076135e-07, "epoch": 0.03969016068113437, "percentage": 3.97, "elapsed_time": "0:01:25", "remaining_time": "0:34:20", "throughput": 22956.37, "total_tokens": 1955136} +{"current_steps": 625, "total_steps": 15621, "loss": 0.7024, "lr": 7.984644913627639e-07, "epoch": 0.04001024262211126, "percentage": 4.0, "elapsed_time": "0:01:25", "remaining_time": "0:34:19", "throughput": 22958.58, "total_tokens": 1970880} +{"current_steps": 630, "total_steps": 15621, "loss": 0.583, "lr": 8.048624440179143e-07, "epoch": 0.04033032456308815, "percentage": 4.03, "elapsed_time": "0:01:26", "remaining_time": "0:34:18", "throughput": 22961.3, "total_tokens": 1986752} +{"current_steps": 635, "total_steps": 15621, "loss": 0.5988, "lr": 8.112603966730645e-07, "epoch": 0.04065040650406504, "percentage": 4.07, "elapsed_time": "0:01:27", "remaining_time": "0:34:17", "throughput": 22961.9, "total_tokens": 2001856} +{"current_steps": 640, "total_steps": 15621, "loss": 0.6581, "lr": 8.17658349328215e-07, "epoch": 0.04097048844504193, "percentage": 4.1, "elapsed_time": "0:01:27", "remaining_time": "0:34:18", "throughput": 22973.14, "total_tokens": 2019968} +{"current_steps": 645, "total_steps": 15621, "loss": 0.7118, "lr": 8.240563019833653e-07, "epoch": 0.04129057038601882, "percentage": 4.13, "elapsed_time": "0:01:28", "remaining_time": "0:34:16", "throughput": 22974.99, "total_tokens": 2035328} +{"current_steps": 650, "total_steps": 15621, "loss": 0.6236, "lr": 8.304542546385156e-07, "epoch": 0.04161065232699571, "percentage": 4.16, "elapsed_time": "0:01:29", "remaining_time": "0:34:19", "throughput": 22988.72, "total_tokens": 2055168} +{"current_steps": 655, "total_steps": 15621, "loss": 0.7478, "lr": 8.36852207293666e-07, "epoch": 0.0419307342679726, "percentage": 4.19, "elapsed_time": "0:01:30", "remaining_time": "0:34:18", "throughput": 22993.6, "total_tokens": 2071808} +{"current_steps": 660, "total_steps": 15621, "loss": 0.6519, "lr": 8.432501599488163e-07, "epoch": 0.04225081620894949, "percentage": 4.23, "elapsed_time": "0:01:30", "remaining_time": "0:34:17", "throughput": 22996.44, "total_tokens": 2087424} +{"current_steps": 665, "total_steps": 15621, "loss": 0.7696, "lr": 8.496481126039667e-07, "epoch": 0.04257089814992638, "percentage": 4.26, "elapsed_time": "0:01:31", "remaining_time": "0:34:16", "throughput": 22997.51, "total_tokens": 2102592} +{"current_steps": 670, "total_steps": 15621, "loss": 0.6487, "lr": 8.560460652591171e-07, "epoch": 0.04289098009090327, "percentage": 4.29, "elapsed_time": "0:01:32", "remaining_time": "0:34:15", "throughput": 23004.76, "total_tokens": 2119488} +{"current_steps": 675, "total_steps": 15621, "loss": 0.6395, "lr": 8.624440179142674e-07, "epoch": 0.04321106203188016, "percentage": 4.32, "elapsed_time": "0:01:32", "remaining_time": "0:34:15", "throughput": 23010.71, "total_tokens": 2136000} +{"current_steps": 680, "total_steps": 15621, "loss": 0.7432, "lr": 8.688419705694177e-07, "epoch": 0.04353114397285705, "percentage": 4.35, "elapsed_time": "0:01:33", "remaining_time": "0:34:14", "throughput": 23015.92, "total_tokens": 2152448} +{"current_steps": 685, "total_steps": 15621, "loss": 0.6236, "lr": 8.752399232245681e-07, "epoch": 0.04385122591383394, "percentage": 4.39, "elapsed_time": "0:01:34", "remaining_time": "0:34:13", "throughput": 23018.64, "total_tokens": 2168000} +{"current_steps": 690, "total_steps": 15621, "loss": 0.5919, "lr": 8.816378758797185e-07, "epoch": 0.04417130785481083, "percentage": 4.42, "elapsed_time": "0:01:34", "remaining_time": "0:34:12", "throughput": 23018.88, "total_tokens": 2183552} +{"current_steps": 695, "total_steps": 15621, "loss": 0.7068, "lr": 8.880358285348688e-07, "epoch": 0.04449138979578772, "percentage": 4.45, "elapsed_time": "0:01:35", "remaining_time": "0:34:11", "throughput": 23023.12, "total_tokens": 2199488} +{"current_steps": 700, "total_steps": 15621, "loss": 0.6095, "lr": 8.944337811900191e-07, "epoch": 0.04481147173676461, "percentage": 4.48, "elapsed_time": "0:01:36", "remaining_time": "0:34:10", "throughput": 23025.65, "total_tokens": 2215296} +{"current_steps": 705, "total_steps": 15621, "loss": 0.7106, "lr": 9.008317338451695e-07, "epoch": 0.0451315536777415, "percentage": 4.51, "elapsed_time": "0:01:36", "remaining_time": "0:34:09", "throughput": 23026.33, "total_tokens": 2230016} +{"current_steps": 710, "total_steps": 15621, "loss": 0.666, "lr": 9.072296865003198e-07, "epoch": 0.04545163561871839, "percentage": 4.55, "elapsed_time": "0:01:37", "remaining_time": "0:34:07", "throughput": 23025.56, "total_tokens": 2245056} +{"current_steps": 715, "total_steps": 15621, "loss": 0.6173, "lr": 9.136276391554703e-07, "epoch": 0.04577171755969528, "percentage": 4.58, "elapsed_time": "0:01:38", "remaining_time": "0:34:07", "throughput": 23029.05, "total_tokens": 2261248} +{"current_steps": 720, "total_steps": 15621, "loss": 0.65, "lr": 9.200255918106205e-07, "epoch": 0.04609179950067217, "percentage": 4.61, "elapsed_time": "0:01:38", "remaining_time": "0:34:06", "throughput": 23033.38, "total_tokens": 2278016} +{"current_steps": 725, "total_steps": 15621, "loss": 0.5715, "lr": 9.264235444657708e-07, "epoch": 0.04641188144164906, "percentage": 4.64, "elapsed_time": "0:01:39", "remaining_time": "0:34:05", "throughput": 23031.9, "total_tokens": 2292800} +{"current_steps": 730, "total_steps": 15621, "loss": 0.5988, "lr": 9.328214971209213e-07, "epoch": 0.04673196338262595, "percentage": 4.67, "elapsed_time": "0:01:40", "remaining_time": "0:34:04", "throughput": 23033.09, "total_tokens": 2308224} +{"current_steps": 735, "total_steps": 15621, "loss": 0.7054, "lr": 9.392194497760716e-07, "epoch": 0.04705204532360284, "percentage": 4.71, "elapsed_time": "0:01:40", "remaining_time": "0:34:04", "throughput": 23040.87, "total_tokens": 2325760} +{"current_steps": 740, "total_steps": 15621, "loss": 0.6911, "lr": 9.456174024312221e-07, "epoch": 0.04737212726457973, "percentage": 4.74, "elapsed_time": "0:01:41", "remaining_time": "0:34:03", "throughput": 23044.07, "total_tokens": 2341632} +{"current_steps": 745, "total_steps": 15621, "loss": 0.7079, "lr": 9.520153550863723e-07, "epoch": 0.04769220920555662, "percentage": 4.77, "elapsed_time": "0:01:42", "remaining_time": "0:34:02", "throughput": 23047.16, "total_tokens": 2357504} +{"current_steps": 750, "total_steps": 15621, "loss": 0.7284, "lr": 9.584133077415226e-07, "epoch": 0.04801229114653351, "percentage": 4.8, "elapsed_time": "0:01:42", "remaining_time": "0:34:01", "throughput": 23045.5, "total_tokens": 2372608} +{"current_steps": 755, "total_steps": 15621, "loss": 0.5945, "lr": 9.64811260396673e-07, "epoch": 0.0483323730875104, "percentage": 4.83, "elapsed_time": "0:01:43", "remaining_time": "0:34:00", "throughput": 23048.64, "total_tokens": 2388352} +{"current_steps": 760, "total_steps": 15621, "loss": 0.7012, "lr": 9.712092130518234e-07, "epoch": 0.04865245502848729, "percentage": 4.87, "elapsed_time": "0:01:44", "remaining_time": "0:33:59", "throughput": 23051.31, "total_tokens": 2404480} +{"current_steps": 765, "total_steps": 15621, "loss": 0.5138, "lr": 9.776071657069737e-07, "epoch": 0.04897253696946418, "percentage": 4.9, "elapsed_time": "0:01:44", "remaining_time": "0:33:58", "throughput": 23052.49, "total_tokens": 2419648} +{"current_steps": 770, "total_steps": 15621, "loss": 0.71, "lr": 9.840051183621241e-07, "epoch": 0.04929261891044107, "percentage": 4.93, "elapsed_time": "0:01:45", "remaining_time": "0:33:57", "throughput": 23053.81, "total_tokens": 2435584} +{"current_steps": 775, "total_steps": 15621, "loss": 0.6251, "lr": 9.904030710172743e-07, "epoch": 0.04961270085141796, "percentage": 4.96, "elapsed_time": "0:01:46", "remaining_time": "0:33:56", "throughput": 23055.26, "total_tokens": 2451072} +{"current_steps": 780, "total_steps": 15621, "loss": 0.7605, "lr": 9.968010236724249e-07, "epoch": 0.04993278279239485, "percentage": 4.99, "elapsed_time": "0:01:47", "remaining_time": "0:33:56", "throughput": 23061.16, "total_tokens": 2467968} +{"current_steps": 782, "total_steps": 15621, "eval_loss": 0.6365677118301392, "epoch": 0.05006081556878561, "percentage": 5.01, "elapsed_time": "0:02:37", "remaining_time": "0:49:55", "throughput": 15673.76, "total_tokens": 2474432} +{"current_steps": 785, "total_steps": 15621, "loss": 0.6742, "lr": 1.0031989763275752e-06, "epoch": 0.05025286473337175, "percentage": 5.03, "elapsed_time": "0:03:20", "remaining_time": "1:03:04", "throughput": 12409.36, "total_tokens": 2484928} +{"current_steps": 790, "total_steps": 15621, "loss": 0.6802, "lr": 1.0095969289827256e-06, "epoch": 0.05057294667434863, "percentage": 5.06, "elapsed_time": "0:03:20", "remaining_time": "1:02:52", "throughput": 12448.82, "total_tokens": 2501504} +{"current_steps": 795, "total_steps": 15621, "loss": 0.5171, "lr": 1.0159948816378758e-06, "epoch": 0.050893028615325527, "percentage": 5.09, "elapsed_time": "0:03:21", "remaining_time": "1:02:40", "throughput": 12490.55, "total_tokens": 2518848} +{"current_steps": 800, "total_steps": 15621, "loss": 0.5474, "lr": 1.0223928342930262e-06, "epoch": 0.05121311055630241, "percentage": 5.12, "elapsed_time": "0:03:22", "remaining_time": "1:02:29", "throughput": 12530.13, "total_tokens": 2535680} +{"current_steps": 805, "total_steps": 15621, "loss": 0.6751, "lr": 1.0287907869481766e-06, "epoch": 0.051533192497279307, "percentage": 5.15, "elapsed_time": "0:03:23", "remaining_time": "1:02:16", "throughput": 12564.35, "total_tokens": 2550976} +{"current_steps": 810, "total_steps": 15621, "loss": 0.5552, "lr": 1.035188739603327e-06, "epoch": 0.05185327443825619, "percentage": 5.19, "elapsed_time": "0:03:23", "remaining_time": "1:02:04", "throughput": 12599.99, "total_tokens": 2566656} +{"current_steps": 815, "total_steps": 15621, "loss": 0.6319, "lr": 1.0415866922584773e-06, "epoch": 0.052173356379233086, "percentage": 5.22, "elapsed_time": "0:03:24", "remaining_time": "1:01:52", "throughput": 12632.75, "total_tokens": 2581568} +{"current_steps": 820, "total_steps": 15621, "loss": 0.64, "lr": 1.0479846449136277e-06, "epoch": 0.05249343832020997, "percentage": 5.25, "elapsed_time": "0:03:25", "remaining_time": "1:01:40", "throughput": 12665.84, "total_tokens": 2596608} +{"current_steps": 825, "total_steps": 15621, "loss": 0.672, "lr": 1.0543825975687779e-06, "epoch": 0.052813520261186866, "percentage": 5.28, "elapsed_time": "0:03:25", "remaining_time": "1:01:28", "throughput": 12699.74, "total_tokens": 2612032} +{"current_steps": 830, "total_steps": 15621, "loss": 0.7367, "lr": 1.0607805502239282e-06, "epoch": 0.05313360220216375, "percentage": 5.31, "elapsed_time": "0:03:26", "remaining_time": "1:01:16", "throughput": 12733.45, "total_tokens": 2627264} +{"current_steps": 835, "total_steps": 15621, "loss": 0.549, "lr": 1.0671785028790788e-06, "epoch": 0.053453684143140646, "percentage": 5.35, "elapsed_time": "0:03:27", "remaining_time": "1:01:05", "throughput": 12768.92, "total_tokens": 2643264} +{"current_steps": 840, "total_steps": 15621, "loss": 0.542, "lr": 1.073576455534229e-06, "epoch": 0.05377376608411753, "percentage": 5.38, "elapsed_time": "0:03:27", "remaining_time": "1:00:54", "throughput": 12804.24, "total_tokens": 2659264} +{"current_steps": 845, "total_steps": 15621, "loss": 0.5724, "lr": 1.0799744081893794e-06, "epoch": 0.054093848025094426, "percentage": 5.41, "elapsed_time": "0:03:28", "remaining_time": "1:00:42", "throughput": 12835.16, "total_tokens": 2673856} +{"current_steps": 850, "total_steps": 15621, "loss": 0.4715, "lr": 1.0863723608445297e-06, "epoch": 0.05441392996607131, "percentage": 5.44, "elapsed_time": "0:03:28", "remaining_time": "1:00:31", "throughput": 12865.5, "total_tokens": 2688448} +{"current_steps": 855, "total_steps": 15621, "loss": 0.6158, "lr": 1.09277031349968e-06, "epoch": 0.054734011907048206, "percentage": 5.47, "elapsed_time": "0:03:29", "remaining_time": "1:00:20", "throughput": 12898.27, "total_tokens": 2703872} +{"current_steps": 860, "total_steps": 15621, "loss": 0.6116, "lr": 1.0991682661548305e-06, "epoch": 0.05505409384802509, "percentage": 5.51, "elapsed_time": "0:03:30", "remaining_time": "1:00:09", "throughput": 12929.88, "total_tokens": 2719040} +{"current_steps": 865, "total_steps": 15621, "loss": 0.6182, "lr": 1.1055662188099809e-06, "epoch": 0.055374175789001986, "percentage": 5.54, "elapsed_time": "0:03:30", "remaining_time": "0:59:58", "throughput": 12964.73, "total_tokens": 2735168} +{"current_steps": 870, "total_steps": 15621, "loss": 0.5281, "lr": 1.111964171465131e-06, "epoch": 0.05569425772997887, "percentage": 5.57, "elapsed_time": "0:03:31", "remaining_time": "0:59:48", "throughput": 12997.16, "total_tokens": 2750592} +{"current_steps": 875, "total_steps": 15621, "loss": 0.6583, "lr": 1.1183621241202814e-06, "epoch": 0.056014339670955766, "percentage": 5.6, "elapsed_time": "0:03:32", "remaining_time": "0:59:38", "throughput": 13032.38, "total_tokens": 2767232} +{"current_steps": 880, "total_steps": 15621, "loss": 0.6623, "lr": 1.1247600767754318e-06, "epoch": 0.05633442161193265, "percentage": 5.63, "elapsed_time": "0:03:33", "remaining_time": "0:59:29", "throughput": 13070.01, "total_tokens": 2784768} +{"current_steps": 885, "total_steps": 15621, "loss": 0.5782, "lr": 1.1311580294305822e-06, "epoch": 0.056654503552909546, "percentage": 5.67, "elapsed_time": "0:03:33", "remaining_time": "0:59:18", "throughput": 13100.67, "total_tokens": 2799872} +{"current_steps": 890, "total_steps": 15621, "loss": 0.6444, "lr": 1.1375559820857326e-06, "epoch": 0.05697458549388643, "percentage": 5.7, "elapsed_time": "0:03:34", "remaining_time": "0:59:08", "throughput": 13133.87, "total_tokens": 2816000} +{"current_steps": 895, "total_steps": 15621, "loss": 0.5844, "lr": 1.143953934740883e-06, "epoch": 0.057294667434863326, "percentage": 5.73, "elapsed_time": "0:03:35", "remaining_time": "0:58:58", "throughput": 13165.7, "total_tokens": 2831744} +{"current_steps": 900, "total_steps": 15621, "loss": 0.6139, "lr": 1.150351887396033e-06, "epoch": 0.05761474937584021, "percentage": 5.76, "elapsed_time": "0:03:35", "remaining_time": "0:58:49", "throughput": 13197.22, "total_tokens": 2847424} +{"current_steps": 905, "total_steps": 15621, "loss": 0.6182, "lr": 1.1567498400511835e-06, "epoch": 0.057934831316817106, "percentage": 5.79, "elapsed_time": "0:03:36", "remaining_time": "0:58:38", "throughput": 13226.79, "total_tokens": 2862272} +{"current_steps": 910, "total_steps": 15621, "loss": 0.4962, "lr": 1.163147792706334e-06, "epoch": 0.05825491325779399, "percentage": 5.83, "elapsed_time": "0:03:37", "remaining_time": "0:58:28", "throughput": 13255.74, "total_tokens": 2877120} +{"current_steps": 915, "total_steps": 15621, "loss": 0.5176, "lr": 1.1695457453614842e-06, "epoch": 0.058574995198770886, "percentage": 5.86, "elapsed_time": "0:03:37", "remaining_time": "0:58:19", "throughput": 13292.08, "total_tokens": 2894592} +{"current_steps": 920, "total_steps": 15621, "loss": 0.6171, "lr": 1.1759436980166346e-06, "epoch": 0.05889507713974777, "percentage": 5.89, "elapsed_time": "0:03:38", "remaining_time": "0:58:10", "throughput": 13321.84, "total_tokens": 2909888} +{"current_steps": 925, "total_steps": 15621, "loss": 0.6226, "lr": 1.182341650671785e-06, "epoch": 0.059215159080724666, "percentage": 5.92, "elapsed_time": "0:03:39", "remaining_time": "0:58:01", "throughput": 13352.17, "total_tokens": 2925632} +{"current_steps": 930, "total_steps": 15621, "loss": 0.6205, "lr": 1.1887396033269352e-06, "epoch": 0.05953524102170155, "percentage": 5.95, "elapsed_time": "0:03:39", "remaining_time": "0:57:52", "throughput": 13383.98, "total_tokens": 2941760} +{"current_steps": 935, "total_steps": 15621, "loss": 0.7098, "lr": 1.1951375559820858e-06, "epoch": 0.059855322962678446, "percentage": 5.99, "elapsed_time": "0:03:40", "remaining_time": "0:57:42", "throughput": 13414.66, "total_tokens": 2957376} +{"current_steps": 940, "total_steps": 15621, "loss": 0.5669, "lr": 1.2015355086372361e-06, "epoch": 0.06017540490365533, "percentage": 6.02, "elapsed_time": "0:03:41", "remaining_time": "0:57:33", "throughput": 13444.05, "total_tokens": 2972800} +{"current_steps": 945, "total_steps": 15621, "loss": 0.7169, "lr": 1.2079334612923863e-06, "epoch": 0.060495486844632226, "percentage": 6.05, "elapsed_time": "0:03:41", "remaining_time": "0:57:24", "throughput": 13474.2, "total_tokens": 2988480} +{"current_steps": 950, "total_steps": 15621, "loss": 0.6581, "lr": 1.2143314139475367e-06, "epoch": 0.06081556878560911, "percentage": 6.08, "elapsed_time": "0:03:42", "remaining_time": "0:57:15", "throughput": 13504.78, "total_tokens": 3004480} +{"current_steps": 955, "total_steps": 15621, "loss": 0.4512, "lr": 1.220729366602687e-06, "epoch": 0.061135650726586006, "percentage": 6.11, "elapsed_time": "0:03:43", "remaining_time": "0:57:06", "throughput": 13534.97, "total_tokens": 3020288} +{"current_steps": 960, "total_steps": 15621, "loss": 0.5964, "lr": 1.2271273192578374e-06, "epoch": 0.06145573266756289, "percentage": 6.15, "elapsed_time": "0:03:43", "remaining_time": "0:56:58", "throughput": 13563.89, "total_tokens": 3035968} +{"current_steps": 965, "total_steps": 15621, "loss": 0.7436, "lr": 1.2335252719129878e-06, "epoch": 0.061775814608539786, "percentage": 6.18, "elapsed_time": "0:03:44", "remaining_time": "0:56:49", "throughput": 13593.47, "total_tokens": 3051776} +{"current_steps": 970, "total_steps": 15621, "loss": 0.5567, "lr": 1.2399232245681382e-06, "epoch": 0.06209589654951667, "percentage": 6.21, "elapsed_time": "0:03:45", "remaining_time": "0:56:40", "throughput": 13620.15, "total_tokens": 3066560} +{"current_steps": 975, "total_steps": 15621, "loss": 0.6348, "lr": 1.2463211772232884e-06, "epoch": 0.062415978490493566, "percentage": 6.24, "elapsed_time": "0:03:45", "remaining_time": "0:56:32", "throughput": 13650.17, "total_tokens": 3082496} +{"current_steps": 980, "total_steps": 15621, "loss": 0.6289, "lr": 1.2527191298784387e-06, "epoch": 0.06273606043147045, "percentage": 6.27, "elapsed_time": "0:03:46", "remaining_time": "0:56:23", "throughput": 13677.97, "total_tokens": 3097856} +{"current_steps": 985, "total_steps": 15621, "loss": 0.5081, "lr": 1.2591170825335893e-06, "epoch": 0.06305614237244735, "percentage": 6.31, "elapsed_time": "0:03:47", "remaining_time": "0:56:15", "throughput": 13706.76, "total_tokens": 3113664} +{"current_steps": 990, "total_steps": 15621, "loss": 0.6276, "lr": 1.2655150351887395e-06, "epoch": 0.06337622431342424, "percentage": 6.34, "elapsed_time": "0:03:47", "remaining_time": "0:56:07", "throughput": 13735.52, "total_tokens": 3129792} +{"current_steps": 995, "total_steps": 15621, "loss": 0.5917, "lr": 1.2719129878438899e-06, "epoch": 0.06369630625440113, "percentage": 6.37, "elapsed_time": "0:03:48", "remaining_time": "0:55:59", "throughput": 13762.81, "total_tokens": 3145024} +{"current_steps": 1000, "total_steps": 15621, "loss": 0.7761, "lr": 1.2783109404990402e-06, "epoch": 0.06401638819537801, "percentage": 6.4, "elapsed_time": "0:03:49", "remaining_time": "0:55:51", "throughput": 13791.92, "total_tokens": 3161216} +{"current_steps": 1005, "total_steps": 15621, "loss": 0.4107, "lr": 1.2847088931541904e-06, "epoch": 0.0643364701363549, "percentage": 6.43, "elapsed_time": "0:03:49", "remaining_time": "0:55:43", "throughput": 13820.09, "total_tokens": 3176960} +{"current_steps": 1010, "total_steps": 15621, "loss": 0.6098, "lr": 1.291106845809341e-06, "epoch": 0.0646565520773318, "percentage": 6.47, "elapsed_time": "0:03:50", "remaining_time": "0:55:35", "throughput": 13849.22, "total_tokens": 3193088} +{"current_steps": 1015, "total_steps": 15621, "loss": 0.6776, "lr": 1.2975047984644914e-06, "epoch": 0.0649766340183087, "percentage": 6.5, "elapsed_time": "0:03:51", "remaining_time": "0:55:28", "throughput": 13879.96, "total_tokens": 3210112} +{"current_steps": 1020, "total_steps": 15621, "loss": 0.508, "lr": 1.3039027511196418e-06, "epoch": 0.06529671595928557, "percentage": 6.53, "elapsed_time": "0:03:51", "remaining_time": "0:55:19", "throughput": 13905.02, "total_tokens": 3224768} +{"current_steps": 1025, "total_steps": 15621, "loss": 0.6266, "lr": 1.310300703774792e-06, "epoch": 0.06561679790026247, "percentage": 6.56, "elapsed_time": "0:03:52", "remaining_time": "0:55:11", "throughput": 13931.6, "total_tokens": 3240128} +{"current_steps": 1030, "total_steps": 15621, "loss": 0.5607, "lr": 1.3166986564299423e-06, "epoch": 0.06593687984123936, "percentage": 6.59, "elapsed_time": "0:03:53", "remaining_time": "0:55:04", "throughput": 13960.95, "total_tokens": 3256576} +{"current_steps": 1035, "total_steps": 15621, "loss": 0.5973, "lr": 1.3230966090850929e-06, "epoch": 0.06625696178221625, "percentage": 6.63, "elapsed_time": "0:03:53", "remaining_time": "0:54:56", "throughput": 13988.01, "total_tokens": 3272384} +{"current_steps": 1040, "total_steps": 15621, "loss": 0.4099, "lr": 1.329494561740243e-06, "epoch": 0.06657704372319313, "percentage": 6.66, "elapsed_time": "0:03:54", "remaining_time": "0:54:49", "throughput": 14015.89, "total_tokens": 3288512} +{"current_steps": 1045, "total_steps": 15621, "loss": 0.4889, "lr": 1.3358925143953934e-06, "epoch": 0.06689712566417003, "percentage": 6.69, "elapsed_time": "0:03:55", "remaining_time": "0:54:42", "throughput": 14048.0, "total_tokens": 3306304} +{"current_steps": 1050, "total_steps": 15621, "loss": 0.4795, "lr": 1.3422904670505438e-06, "epoch": 0.06721720760514692, "percentage": 6.72, "elapsed_time": "0:03:56", "remaining_time": "0:54:35", "throughput": 14072.86, "total_tokens": 3321344} +{"current_steps": 1055, "total_steps": 15621, "loss": 0.6207, "lr": 1.348688419705694e-06, "epoch": 0.06753728954612381, "percentage": 6.75, "elapsed_time": "0:03:56", "remaining_time": "0:54:28", "throughput": 14102.83, "total_tokens": 3338560} +{"current_steps": 1060, "total_steps": 15621, "loss": 0.5093, "lr": 1.3550863723608446e-06, "epoch": 0.06785737148710069, "percentage": 6.79, "elapsed_time": "0:03:57", "remaining_time": "0:54:20", "throughput": 14125.91, "total_tokens": 3353152} +{"current_steps": 1065, "total_steps": 15621, "loss": 0.5401, "lr": 1.361484325015995e-06, "epoch": 0.06817745342807759, "percentage": 6.82, "elapsed_time": "0:03:58", "remaining_time": "0:54:13", "throughput": 14153.27, "total_tokens": 3369536} +{"current_steps": 1070, "total_steps": 15621, "loss": 0.6053, "lr": 1.3678822776711451e-06, "epoch": 0.06849753536905448, "percentage": 6.85, "elapsed_time": "0:03:58", "remaining_time": "0:54:06", "throughput": 14177.96, "total_tokens": 3384832} +{"current_steps": 1075, "total_steps": 15621, "loss": 0.494, "lr": 1.3742802303262955e-06, "epoch": 0.06881761731003137, "percentage": 6.88, "elapsed_time": "0:03:59", "remaining_time": "0:53:59", "throughput": 14200.76, "total_tokens": 3399424} +{"current_steps": 1080, "total_steps": 15621, "loss": 0.6625, "lr": 1.3806781829814459e-06, "epoch": 0.06913769925100825, "percentage": 6.91, "elapsed_time": "0:04:00", "remaining_time": "0:53:52", "throughput": 14229.84, "total_tokens": 3416704} +{"current_steps": 1085, "total_steps": 15621, "loss": 0.5524, "lr": 1.3870761356365963e-06, "epoch": 0.06945778119198515, "percentage": 6.95, "elapsed_time": "0:04:00", "remaining_time": "0:53:45", "throughput": 14253.45, "total_tokens": 3431552} +{"current_steps": 1090, "total_steps": 15621, "loss": 0.6509, "lr": 1.3934740882917466e-06, "epoch": 0.06977786313296204, "percentage": 6.98, "elapsed_time": "0:04:01", "remaining_time": "0:53:38", "throughput": 14279.05, "total_tokens": 3447488} +{"current_steps": 1095, "total_steps": 15621, "loss": 0.5205, "lr": 1.399872040946897e-06, "epoch": 0.07009794507393893, "percentage": 7.01, "elapsed_time": "0:04:02", "remaining_time": "0:53:31", "throughput": 14304.57, "total_tokens": 3463424} +{"current_steps": 1100, "total_steps": 15621, "loss": 0.6568, "lr": 1.4062699936020472e-06, "epoch": 0.07041802701491581, "percentage": 7.04, "elapsed_time": "0:04:02", "remaining_time": "0:53:25", "throughput": 14330.71, "total_tokens": 3479680} +{"current_steps": 1105, "total_steps": 15621, "loss": 0.5165, "lr": 1.4126679462571976e-06, "epoch": 0.0707381089558927, "percentage": 7.07, "elapsed_time": "0:04:03", "remaining_time": "0:53:18", "throughput": 14356.3, "total_tokens": 3495552} +{"current_steps": 1110, "total_steps": 15621, "loss": 0.4769, "lr": 1.4190658989123481e-06, "epoch": 0.0710581908968696, "percentage": 7.11, "elapsed_time": "0:04:04", "remaining_time": "0:53:11", "throughput": 14380.31, "total_tokens": 3510976} +{"current_steps": 1115, "total_steps": 15621, "loss": 0.6453, "lr": 1.4254638515674983e-06, "epoch": 0.0713782728378465, "percentage": 7.14, "elapsed_time": "0:04:04", "remaining_time": "0:53:04", "throughput": 14403.03, "total_tokens": 3526016} +{"current_steps": 1120, "total_steps": 15621, "loss": 0.5647, "lr": 1.4318618042226487e-06, "epoch": 0.07169835477882337, "percentage": 7.17, "elapsed_time": "0:04:05", "remaining_time": "0:52:57", "throughput": 14424.95, "total_tokens": 3540544} +{"current_steps": 1125, "total_steps": 15621, "loss": 0.6074, "lr": 1.438259756877799e-06, "epoch": 0.07201843671980027, "percentage": 7.2, "elapsed_time": "0:04:06", "remaining_time": "0:52:51", "throughput": 14449.58, "total_tokens": 3556416} +{"current_steps": 1130, "total_steps": 15621, "loss": 0.5063, "lr": 1.4446577095329492e-06, "epoch": 0.07233851866077716, "percentage": 7.23, "elapsed_time": "0:04:06", "remaining_time": "0:52:44", "throughput": 14474.08, "total_tokens": 3572096} +{"current_steps": 1135, "total_steps": 15621, "loss": 0.4966, "lr": 1.4510556621880998e-06, "epoch": 0.07265860060175405, "percentage": 7.27, "elapsed_time": "0:04:07", "remaining_time": "0:52:38", "throughput": 14497.77, "total_tokens": 3587712} +{"current_steps": 1140, "total_steps": 15621, "loss": 0.5503, "lr": 1.4574536148432502e-06, "epoch": 0.07297868254273093, "percentage": 7.3, "elapsed_time": "0:04:08", "remaining_time": "0:52:32", "throughput": 14525.65, "total_tokens": 3605056} +{"current_steps": 1145, "total_steps": 15621, "loss": 0.6826, "lr": 1.4638515674984004e-06, "epoch": 0.07329876448370783, "percentage": 7.33, "elapsed_time": "0:04:08", "remaining_time": "0:52:26", "throughput": 14550.55, "total_tokens": 3621184} +{"current_steps": 1150, "total_steps": 15621, "loss": 0.5917, "lr": 1.4702495201535507e-06, "epoch": 0.07361884642468472, "percentage": 7.36, "elapsed_time": "0:04:09", "remaining_time": "0:52:19", "throughput": 14570.73, "total_tokens": 3635392} +{"current_steps": 1155, "total_steps": 15621, "loss": 0.5064, "lr": 1.4766474728087011e-06, "epoch": 0.07393892836566161, "percentage": 7.39, "elapsed_time": "0:04:10", "remaining_time": "0:52:12", "throughput": 14591.93, "total_tokens": 3649984} +{"current_steps": 1160, "total_steps": 15621, "loss": 0.5239, "lr": 1.4830454254638515e-06, "epoch": 0.07425901030663849, "percentage": 7.43, "elapsed_time": "0:04:10", "remaining_time": "0:52:06", "throughput": 14616.15, "total_tokens": 3665920} +{"current_steps": 1165, "total_steps": 15621, "loss": 0.4573, "lr": 1.4894433781190019e-06, "epoch": 0.07457909224761539, "percentage": 7.46, "elapsed_time": "0:04:11", "remaining_time": "0:52:00", "throughput": 14636.19, "total_tokens": 3680256} +{"current_steps": 1170, "total_steps": 15621, "loss": 0.5284, "lr": 1.4958413307741523e-06, "epoch": 0.07489917418859228, "percentage": 7.49, "elapsed_time": "0:04:12", "remaining_time": "0:51:54", "throughput": 14662.84, "total_tokens": 3697536} +{"current_steps": 1175, "total_steps": 15621, "loss": 0.6147, "lr": 1.5022392834293024e-06, "epoch": 0.07521925612956917, "percentage": 7.52, "elapsed_time": "0:04:12", "remaining_time": "0:51:48", "throughput": 14684.77, "total_tokens": 3713088} +{"current_steps": 1180, "total_steps": 15621, "loss": 0.6674, "lr": 1.5086372360844528e-06, "epoch": 0.07553933807054607, "percentage": 7.55, "elapsed_time": "0:04:13", "remaining_time": "0:51:43", "throughput": 14710.48, "total_tokens": 3729920} +{"current_steps": 1185, "total_steps": 15621, "loss": 0.5478, "lr": 1.5150351887396034e-06, "epoch": 0.07585942001152295, "percentage": 7.59, "elapsed_time": "0:04:14", "remaining_time": "0:51:37", "throughput": 14733.52, "total_tokens": 3745664} +{"current_steps": 1190, "total_steps": 15621, "loss": 0.5246, "lr": 1.5214331413947536e-06, "epoch": 0.07617950195249984, "percentage": 7.62, "elapsed_time": "0:04:14", "remaining_time": "0:51:30", "throughput": 14754.45, "total_tokens": 3760576} +{"current_steps": 1195, "total_steps": 15621, "loss": 0.5073, "lr": 1.527831094049904e-06, "epoch": 0.07649958389347673, "percentage": 7.65, "elapsed_time": "0:04:15", "remaining_time": "0:51:25", "throughput": 14777.12, "total_tokens": 3776576} +{"current_steps": 1200, "total_steps": 15621, "loss": 0.5868, "lr": 1.5342290467050543e-06, "epoch": 0.07681966583445363, "percentage": 7.68, "elapsed_time": "0:04:16", "remaining_time": "0:51:19", "throughput": 14799.54, "total_tokens": 3792384} +{"current_steps": 1205, "total_steps": 15621, "loss": 0.6464, "lr": 1.5406269993602045e-06, "epoch": 0.0771397477754305, "percentage": 7.71, "elapsed_time": "0:04:16", "remaining_time": "0:51:13", "throughput": 14818.14, "total_tokens": 3806592} +{"current_steps": 1210, "total_steps": 15621, "loss": 0.5778, "lr": 1.547024952015355e-06, "epoch": 0.0774598297164074, "percentage": 7.75, "elapsed_time": "0:04:17", "remaining_time": "0:51:07", "throughput": 14839.93, "total_tokens": 3822080} +{"current_steps": 1215, "total_steps": 15621, "loss": 0.5346, "lr": 1.5534229046705055e-06, "epoch": 0.07777991165738429, "percentage": 7.78, "elapsed_time": "0:04:18", "remaining_time": "0:51:01", "throughput": 14860.29, "total_tokens": 3837120} +{"current_steps": 1220, "total_steps": 15621, "loss": 0.6952, "lr": 1.5598208573256556e-06, "epoch": 0.07809999359836119, "percentage": 7.81, "elapsed_time": "0:04:18", "remaining_time": "0:50:55", "throughput": 14882.3, "total_tokens": 3852864} +{"current_steps": 1225, "total_steps": 15621, "loss": 0.5148, "lr": 1.566218809980806e-06, "epoch": 0.07842007553933807, "percentage": 7.84, "elapsed_time": "0:04:19", "remaining_time": "0:50:50", "throughput": 14905.6, "total_tokens": 3869184} +{"current_steps": 1230, "total_steps": 15621, "loss": 0.5376, "lr": 1.5726167626359564e-06, "epoch": 0.07874015748031496, "percentage": 7.87, "elapsed_time": "0:04:20", "remaining_time": "0:50:45", "throughput": 14928.04, "total_tokens": 3885248} +{"current_steps": 1235, "total_steps": 15621, "loss": 0.4261, "lr": 1.5790147152911068e-06, "epoch": 0.07906023942129185, "percentage": 7.91, "elapsed_time": "0:04:20", "remaining_time": "0:50:39", "throughput": 14948.8, "total_tokens": 3900416} +{"current_steps": 1240, "total_steps": 15621, "loss": 0.553, "lr": 1.5854126679462571e-06, "epoch": 0.07938032136226875, "percentage": 7.94, "elapsed_time": "0:04:21", "remaining_time": "0:50:33", "throughput": 14970.39, "total_tokens": 3916096} +{"current_steps": 1245, "total_steps": 15621, "loss": 0.6992, "lr": 1.5918106206014075e-06, "epoch": 0.07970040330324563, "percentage": 7.97, "elapsed_time": "0:04:22", "remaining_time": "0:50:28", "throughput": 14995.17, "total_tokens": 3933312} +{"current_steps": 1250, "total_steps": 15621, "loss": 0.615, "lr": 1.5982085732565577e-06, "epoch": 0.08002048524422252, "percentage": 8.0, "elapsed_time": "0:04:22", "remaining_time": "0:50:23", "throughput": 15017.58, "total_tokens": 3949440} +{"current_steps": 1255, "total_steps": 15621, "loss": 0.681, "lr": 1.604606525911708e-06, "epoch": 0.08034056718519941, "percentage": 8.03, "elapsed_time": "0:04:23", "remaining_time": "0:50:18", "throughput": 15038.83, "total_tokens": 3964992} +{"current_steps": 1260, "total_steps": 15621, "loss": 0.733, "lr": 1.6110044785668586e-06, "epoch": 0.0806606491261763, "percentage": 8.07, "elapsed_time": "0:04:24", "remaining_time": "0:50:12", "throughput": 15062.36, "total_tokens": 3981696} +{"current_steps": 1265, "total_steps": 15621, "loss": 0.6123, "lr": 1.617402431222009e-06, "epoch": 0.08098073106715319, "percentage": 8.1, "elapsed_time": "0:04:25", "remaining_time": "0:50:07", "throughput": 15083.29, "total_tokens": 3997248} +{"current_steps": 1270, "total_steps": 15621, "loss": 0.6693, "lr": 1.6238003838771592e-06, "epoch": 0.08130081300813008, "percentage": 8.13, "elapsed_time": "0:04:25", "remaining_time": "0:50:01", "throughput": 15101.53, "total_tokens": 4011648} +{"current_steps": 1275, "total_steps": 15621, "loss": 0.5273, "lr": 1.6301983365323096e-06, "epoch": 0.08162089494910697, "percentage": 8.16, "elapsed_time": "0:04:26", "remaining_time": "0:49:56", "throughput": 15123.88, "total_tokens": 4028160} +{"current_steps": 1280, "total_steps": 15621, "loss": 0.5358, "lr": 1.63659628918746e-06, "epoch": 0.08194097689008387, "percentage": 8.19, "elapsed_time": "0:04:27", "remaining_time": "0:49:51", "throughput": 15143.93, "total_tokens": 4043584} +{"current_steps": 1285, "total_steps": 15621, "loss": 0.7184, "lr": 1.6429942418426103e-06, "epoch": 0.08226105883106075, "percentage": 8.23, "elapsed_time": "0:04:27", "remaining_time": "0:49:46", "throughput": 15164.79, "total_tokens": 4059456} +{"current_steps": 1290, "total_steps": 15621, "loss": 0.6079, "lr": 1.6493921944977607e-06, "epoch": 0.08258114077203764, "percentage": 8.26, "elapsed_time": "0:04:28", "remaining_time": "0:49:41", "throughput": 15187.4, "total_tokens": 4076096} +{"current_steps": 1295, "total_steps": 15621, "loss": 0.6143, "lr": 1.655790147152911e-06, "epoch": 0.08290122271301453, "percentage": 8.29, "elapsed_time": "0:04:29", "remaining_time": "0:49:37", "throughput": 15211.64, "total_tokens": 4093568} +{"current_steps": 1300, "total_steps": 15621, "loss": 0.66, "lr": 1.6621880998080612e-06, "epoch": 0.08322130465399143, "percentage": 8.32, "elapsed_time": "0:04:29", "remaining_time": "0:49:31", "throughput": 15231.45, "total_tokens": 4108864} +{"current_steps": 1305, "total_steps": 15621, "loss": 0.6375, "lr": 1.6685860524632116e-06, "epoch": 0.0835413865949683, "percentage": 8.35, "elapsed_time": "0:04:30", "remaining_time": "0:49:26", "throughput": 15250.84, "total_tokens": 4124224} +{"current_steps": 1310, "total_steps": 15621, "loss": 0.5497, "lr": 1.6749840051183622e-06, "epoch": 0.0838614685359452, "percentage": 8.39, "elapsed_time": "0:04:31", "remaining_time": "0:49:21", "throughput": 15269.44, "total_tokens": 4139008} +{"current_steps": 1315, "total_steps": 15621, "loss": 0.5553, "lr": 1.6813819577735124e-06, "epoch": 0.08418155047692209, "percentage": 8.42, "elapsed_time": "0:04:31", "remaining_time": "0:49:16", "throughput": 15290.21, "total_tokens": 4155008} +{"current_steps": 1320, "total_steps": 15621, "loss": 0.4854, "lr": 1.6877799104286628e-06, "epoch": 0.08450163241789899, "percentage": 8.45, "elapsed_time": "0:04:32", "remaining_time": "0:49:11", "throughput": 15314.13, "total_tokens": 4172544} +{"current_steps": 1325, "total_steps": 15621, "loss": 0.6347, "lr": 1.6941778630838131e-06, "epoch": 0.08482171435887587, "percentage": 8.48, "elapsed_time": "0:04:33", "remaining_time": "0:49:06", "throughput": 15334.55, "total_tokens": 4188416} +{"current_steps": 1330, "total_steps": 15621, "loss": 0.4947, "lr": 1.7005758157389633e-06, "epoch": 0.08514179629985276, "percentage": 8.51, "elapsed_time": "0:04:33", "remaining_time": "0:49:01", "throughput": 15351.17, "total_tokens": 4202560} +{"current_steps": 1335, "total_steps": 15621, "loss": 0.5618, "lr": 1.706973768394114e-06, "epoch": 0.08546187824082965, "percentage": 8.55, "elapsed_time": "0:04:34", "remaining_time": "0:48:57", "throughput": 15372.84, "total_tokens": 4219392} +{"current_steps": 1340, "total_steps": 15621, "loss": 0.4941, "lr": 1.7133717210492643e-06, "epoch": 0.08578196018180655, "percentage": 8.58, "elapsed_time": "0:04:35", "remaining_time": "0:48:52", "throughput": 15392.67, "total_tokens": 4235328} +{"current_steps": 1345, "total_steps": 15621, "loss": 0.5828, "lr": 1.7197696737044144e-06, "epoch": 0.08610204212278343, "percentage": 8.61, "elapsed_time": "0:04:35", "remaining_time": "0:48:47", "throughput": 15410.97, "total_tokens": 4250368} +{"current_steps": 1350, "total_steps": 15621, "loss": 0.4594, "lr": 1.7261676263595648e-06, "epoch": 0.08642212406376032, "percentage": 8.64, "elapsed_time": "0:04:36", "remaining_time": "0:48:42", "throughput": 15430.02, "total_tokens": 4265856} +{"current_steps": 1355, "total_steps": 15621, "loss": 0.6623, "lr": 1.7325655790147152e-06, "epoch": 0.08674220600473721, "percentage": 8.67, "elapsed_time": "0:04:37", "remaining_time": "0:48:37", "throughput": 15449.77, "total_tokens": 4281792} +{"current_steps": 1360, "total_steps": 15621, "loss": 0.5861, "lr": 1.7389635316698656e-06, "epoch": 0.0870622879457141, "percentage": 8.71, "elapsed_time": "0:04:37", "remaining_time": "0:48:33", "throughput": 15468.27, "total_tokens": 4297088} +{"current_steps": 1365, "total_steps": 15621, "loss": 0.5083, "lr": 1.745361484325016e-06, "epoch": 0.087382369886691, "percentage": 8.74, "elapsed_time": "0:04:38", "remaining_time": "0:48:28", "throughput": 15485.87, "total_tokens": 4312192} +{"current_steps": 1370, "total_steps": 15621, "loss": 0.4848, "lr": 1.7517594369801663e-06, "epoch": 0.08770245182766788, "percentage": 8.77, "elapsed_time": "0:04:39", "remaining_time": "0:48:23", "throughput": 15502.74, "total_tokens": 4326720} +{"current_steps": 1375, "total_steps": 15621, "loss": 0.7944, "lr": 1.7581573896353165e-06, "epoch": 0.08802253376864477, "percentage": 8.8, "elapsed_time": "0:04:39", "remaining_time": "0:48:18", "throughput": 15520.42, "total_tokens": 4341760} +{"current_steps": 1380, "total_steps": 15621, "loss": 0.6211, "lr": 1.7645553422904669e-06, "epoch": 0.08834261570962167, "percentage": 8.83, "elapsed_time": "0:04:40", "remaining_time": "0:48:13", "throughput": 15540.14, "total_tokens": 4357760} +{"current_steps": 1385, "total_steps": 15621, "loss": 0.6269, "lr": 1.7709532949456175e-06, "epoch": 0.08866269765059856, "percentage": 8.87, "elapsed_time": "0:04:41", "remaining_time": "0:48:09", "throughput": 15559.53, "total_tokens": 4373824} +{"current_steps": 1390, "total_steps": 15621, "loss": 0.4764, "lr": 1.7773512476007676e-06, "epoch": 0.08898277959157544, "percentage": 8.9, "elapsed_time": "0:04:41", "remaining_time": "0:48:04", "throughput": 15577.39, "total_tokens": 4388992} +{"current_steps": 1395, "total_steps": 15621, "loss": 0.5266, "lr": 1.783749200255918e-06, "epoch": 0.08930286153255233, "percentage": 8.93, "elapsed_time": "0:04:42", "remaining_time": "0:48:00", "throughput": 15594.89, "total_tokens": 4404288} +{"current_steps": 1400, "total_steps": 15621, "loss": 0.5359, "lr": 1.7901471529110684e-06, "epoch": 0.08962294347352923, "percentage": 8.96, "elapsed_time": "0:04:43", "remaining_time": "0:47:55", "throughput": 15613.53, "total_tokens": 4419840} +{"current_steps": 1405, "total_steps": 15621, "loss": 0.5118, "lr": 1.7965451055662186e-06, "epoch": 0.08994302541450612, "percentage": 8.99, "elapsed_time": "0:04:43", "remaining_time": "0:47:50", "throughput": 15631.41, "total_tokens": 4435200} +{"current_steps": 1410, "total_steps": 15621, "loss": 0.61, "lr": 1.8029430582213691e-06, "epoch": 0.090263107355483, "percentage": 9.03, "elapsed_time": "0:04:44", "remaining_time": "0:47:46", "throughput": 15648.54, "total_tokens": 4450368} +{"current_steps": 1415, "total_steps": 15621, "loss": 0.4899, "lr": 1.8093410108765195e-06, "epoch": 0.09058318929645989, "percentage": 9.06, "elapsed_time": "0:04:45", "remaining_time": "0:47:41", "throughput": 15666.91, "total_tokens": 4466048} +{"current_steps": 1420, "total_steps": 15621, "loss": 0.6001, "lr": 1.8157389635316697e-06, "epoch": 0.09090327123743679, "percentage": 9.09, "elapsed_time": "0:04:45", "remaining_time": "0:47:37", "throughput": 15685.53, "total_tokens": 4481920} +{"current_steps": 1425, "total_steps": 15621, "loss": 0.5666, "lr": 1.82213691618682e-06, "epoch": 0.09122335317841368, "percentage": 9.12, "elapsed_time": "0:04:46", "remaining_time": "0:47:33", "throughput": 15704.84, "total_tokens": 4498112} +{"current_steps": 1430, "total_steps": 15621, "loss": 0.4314, "lr": 1.8285348688419704e-06, "epoch": 0.09154343511939056, "percentage": 9.15, "elapsed_time": "0:04:47", "remaining_time": "0:47:29", "throughput": 15726.24, "total_tokens": 4515648} +{"current_steps": 1435, "total_steps": 15621, "loss": 0.5809, "lr": 1.8349328214971208e-06, "epoch": 0.09186351706036745, "percentage": 9.19, "elapsed_time": "0:04:47", "remaining_time": "0:47:25", "throughput": 15744.98, "total_tokens": 4531840} +{"current_steps": 1440, "total_steps": 15621, "loss": 0.5463, "lr": 1.8413307741522712e-06, "epoch": 0.09218359900134435, "percentage": 9.22, "elapsed_time": "0:04:48", "remaining_time": "0:47:21", "throughput": 15762.7, "total_tokens": 4547456} +{"current_steps": 1445, "total_steps": 15621, "loss": 0.5548, "lr": 1.8477287268074216e-06, "epoch": 0.09250368094232124, "percentage": 9.25, "elapsed_time": "0:04:49", "remaining_time": "0:47:16", "throughput": 15780.93, "total_tokens": 4563328} +{"current_steps": 1450, "total_steps": 15621, "loss": 0.5856, "lr": 1.8541266794625718e-06, "epoch": 0.09282376288329812, "percentage": 9.28, "elapsed_time": "0:04:49", "remaining_time": "0:47:12", "throughput": 15798.94, "total_tokens": 4579392} +{"current_steps": 1455, "total_steps": 15621, "loss": 0.6159, "lr": 1.8605246321177221e-06, "epoch": 0.09314384482427501, "percentage": 9.31, "elapsed_time": "0:04:50", "remaining_time": "0:47:08", "throughput": 15817.43, "total_tokens": 4595584} +{"current_steps": 1460, "total_steps": 15621, "loss": 0.5707, "lr": 1.8669225847728727e-06, "epoch": 0.0934639267652519, "percentage": 9.35, "elapsed_time": "0:04:51", "remaining_time": "0:47:04", "throughput": 15832.4, "total_tokens": 4610112} +{"current_steps": 1465, "total_steps": 15621, "loss": 0.7073, "lr": 1.8733205374280229e-06, "epoch": 0.0937840087062288, "percentage": 9.38, "elapsed_time": "0:04:51", "remaining_time": "0:47:00", "throughput": 15851.09, "total_tokens": 4626432} +{"current_steps": 1470, "total_steps": 15621, "loss": 0.5799, "lr": 1.8797184900831733e-06, "epoch": 0.09410409064720568, "percentage": 9.41, "elapsed_time": "0:04:52", "remaining_time": "0:46:56", "throughput": 15867.81, "total_tokens": 4641792} +{"current_steps": 1475, "total_steps": 15621, "loss": 0.4144, "lr": 1.8861164427383236e-06, "epoch": 0.09442417258818257, "percentage": 9.44, "elapsed_time": "0:04:53", "remaining_time": "0:46:51", "throughput": 15883.78, "total_tokens": 4656896} +{"current_steps": 1480, "total_steps": 15621, "loss": 0.6021, "lr": 1.8925143953934738e-06, "epoch": 0.09474425452915947, "percentage": 9.47, "elapsed_time": "0:04:53", "remaining_time": "0:46:48", "throughput": 15901.99, "total_tokens": 4673472} +{"current_steps": 1485, "total_steps": 15621, "loss": 0.4252, "lr": 1.8989123480486244e-06, "epoch": 0.09506433647013636, "percentage": 9.51, "elapsed_time": "0:04:54", "remaining_time": "0:46:43", "throughput": 15918.41, "total_tokens": 4688896} +{"current_steps": 1490, "total_steps": 15621, "loss": 0.6809, "lr": 1.9053103007037748e-06, "epoch": 0.09538441841111324, "percentage": 9.54, "elapsed_time": "0:04:55", "remaining_time": "0:46:39", "throughput": 15935.16, "total_tokens": 4704576} +{"current_steps": 1495, "total_steps": 15621, "loss": 0.6032, "lr": 1.911708253358925e-06, "epoch": 0.09570450035209013, "percentage": 9.57, "elapsed_time": "0:04:55", "remaining_time": "0:46:35", "throughput": 15949.94, "total_tokens": 4719040} +{"current_steps": 1500, "total_steps": 15621, "loss": 0.4781, "lr": 1.9181062060140753e-06, "epoch": 0.09602458229306703, "percentage": 9.6, "elapsed_time": "0:04:56", "remaining_time": "0:46:31", "throughput": 15965.19, "total_tokens": 4733696} +{"current_steps": 1505, "total_steps": 15621, "loss": 0.6128, "lr": 1.9245041586692255e-06, "epoch": 0.09634466423404392, "percentage": 9.63, "elapsed_time": "0:04:57", "remaining_time": "0:46:27", "throughput": 15981.46, "total_tokens": 4748992} +{"current_steps": 1510, "total_steps": 15621, "loss": 0.6961, "lr": 1.930902111324376e-06, "epoch": 0.0966647461750208, "percentage": 9.67, "elapsed_time": "0:04:57", "remaining_time": "0:46:23", "throughput": 15998.8, "total_tokens": 4764992} +{"current_steps": 1515, "total_steps": 15621, "loss": 0.6847, "lr": 1.9373000639795267e-06, "epoch": 0.09698482811599769, "percentage": 9.7, "elapsed_time": "0:04:58", "remaining_time": "0:46:19", "throughput": 16014.73, "total_tokens": 4780352} +{"current_steps": 1520, "total_steps": 15621, "loss": 0.5486, "lr": 1.943698016634677e-06, "epoch": 0.09730491005697459, "percentage": 9.73, "elapsed_time": "0:04:59", "remaining_time": "0:46:15", "throughput": 16031.23, "total_tokens": 4796224} +{"current_steps": 1525, "total_steps": 15621, "loss": 0.5423, "lr": 1.950095969289827e-06, "epoch": 0.09762499199795148, "percentage": 9.76, "elapsed_time": "0:04:59", "remaining_time": "0:46:11", "throughput": 16047.29, "total_tokens": 4811840} +{"current_steps": 1530, "total_steps": 15621, "loss": 0.5833, "lr": 1.9564939219449776e-06, "epoch": 0.09794507393892836, "percentage": 9.79, "elapsed_time": "0:05:00", "remaining_time": "0:46:07", "throughput": 16061.66, "total_tokens": 4826432} +{"current_steps": 1535, "total_steps": 15621, "loss": 0.3899, "lr": 1.9628918746001278e-06, "epoch": 0.09826515587990525, "percentage": 9.83, "elapsed_time": "0:05:01", "remaining_time": "0:46:03", "throughput": 16077.67, "total_tokens": 4841920} +{"current_steps": 1540, "total_steps": 15621, "loss": 0.5976, "lr": 1.9692898272552783e-06, "epoch": 0.09858523782088215, "percentage": 9.86, "elapsed_time": "0:05:01", "remaining_time": "0:45:59", "throughput": 16093.97, "total_tokens": 4857536} +{"current_steps": 1545, "total_steps": 15621, "loss": 0.6862, "lr": 1.9756877799104285e-06, "epoch": 0.09890531976185904, "percentage": 9.89, "elapsed_time": "0:05:02", "remaining_time": "0:45:55", "throughput": 16110.53, "total_tokens": 4873408} +{"current_steps": 1550, "total_steps": 15621, "loss": 0.5992, "lr": 1.9820857325655787e-06, "epoch": 0.09922540170283592, "percentage": 9.92, "elapsed_time": "0:05:03", "remaining_time": "0:45:52", "throughput": 16127.39, "total_tokens": 4889536} +{"current_steps": 1555, "total_steps": 15621, "loss": 0.6222, "lr": 1.9884836852207293e-06, "epoch": 0.09954548364381281, "percentage": 9.95, "elapsed_time": "0:05:03", "remaining_time": "0:45:48", "throughput": 16142.07, "total_tokens": 4904448} +{"current_steps": 1560, "total_steps": 15621, "loss": 0.6538, "lr": 1.99488163787588e-06, "epoch": 0.0998655655847897, "percentage": 9.99, "elapsed_time": "0:05:04", "remaining_time": "0:45:44", "throughput": 16156.87, "total_tokens": 4919616} +{"current_steps": 1564, "total_steps": 15621, "eval_loss": 0.5419119000434875, "epoch": 0.10012163113757122, "percentage": 10.01, "elapsed_time": "0:05:55", "remaining_time": "0:53:16", "throughput": 13867.35, "total_tokens": 4931328} +{"current_steps": 1565, "total_steps": 15621, "loss": 0.5142, "lr": 1.9999999750297625e-06, "epoch": 0.1001856475257666, "percentage": 10.02, "elapsed_time": "0:06:35", "remaining_time": "0:59:15", "throughput": 12465.62, "total_tokens": 4934144} +{"current_steps": 1570, "total_steps": 15621, "loss": 0.5243, "lr": 1.9999991010715873e-06, "epoch": 0.1005057294667435, "percentage": 10.05, "elapsed_time": "0:06:36", "remaining_time": "0:59:08", "throughput": 12484.85, "total_tokens": 4950272} +{"current_steps": 1575, "total_steps": 15621, "loss": 0.5354, "lr": 1.999996978602793e-06, "epoch": 0.10082581140772037, "percentage": 10.08, "elapsed_time": "0:06:37", "remaining_time": "0:59:01", "throughput": 12501.82, "total_tokens": 4965056} +{"current_steps": 1580, "total_steps": 15621, "loss": 0.5617, "lr": 1.99999360762603e-06, "epoch": 0.10114589334869727, "percentage": 10.11, "elapsed_time": "0:06:37", "remaining_time": "0:58:55", "throughput": 12519.02, "total_tokens": 4980160} +{"current_steps": 1585, "total_steps": 15621, "loss": 0.4574, "lr": 1.9999889881455065e-06, "epoch": 0.10146597528967416, "percentage": 10.15, "elapsed_time": "0:06:38", "remaining_time": "0:58:49", "throughput": 12538.98, "total_tokens": 4996992} +{"current_steps": 1590, "total_steps": 15621, "loss": 0.5212, "lr": 1.9999831201669897e-06, "epoch": 0.10178605723065105, "percentage": 10.18, "elapsed_time": "0:06:39", "remaining_time": "0:58:42", "throughput": 12557.24, "total_tokens": 5012608} +{"current_steps": 1595, "total_steps": 15621, "loss": 0.4917, "lr": 1.9999760036978067e-06, "epoch": 0.10210613917162793, "percentage": 10.21, "elapsed_time": "0:06:39", "remaining_time": "0:58:36", "throughput": 12574.66, "total_tokens": 5027840} +{"current_steps": 1600, "total_steps": 15621, "loss": 0.5698, "lr": 1.9999676387468417e-06, "epoch": 0.10242622111260483, "percentage": 10.24, "elapsed_time": "0:06:40", "remaining_time": "0:58:29", "throughput": 12591.65, "total_tokens": 5042752} +{"current_steps": 1605, "total_steps": 15621, "loss": 0.5443, "lr": 1.999958025324539e-06, "epoch": 0.10274630305358172, "percentage": 10.27, "elapsed_time": "0:06:41", "remaining_time": "0:58:23", "throughput": 12610.02, "total_tokens": 5058624} +{"current_steps": 1610, "total_steps": 15621, "loss": 0.6261, "lr": 1.999947163442901e-06, "epoch": 0.10306638499455861, "percentage": 10.31, "elapsed_time": "0:06:41", "remaining_time": "0:58:17", "throughput": 12629.24, "total_tokens": 5075008} +{"current_steps": 1615, "total_steps": 15621, "loss": 0.5363, "lr": 1.9999350531154884e-06, "epoch": 0.10338646693553549, "percentage": 10.34, "elapsed_time": "0:06:42", "remaining_time": "0:58:10", "throughput": 12647.49, "total_tokens": 5090880} +{"current_steps": 1620, "total_steps": 15621, "loss": 0.5701, "lr": 1.9999216943574223e-06, "epoch": 0.10370654887651239, "percentage": 10.37, "elapsed_time": "0:06:43", "remaining_time": "0:58:04", "throughput": 12665.56, "total_tokens": 5106816} +{"current_steps": 1625, "total_steps": 15621, "loss": 0.463, "lr": 1.9999070871853796e-06, "epoch": 0.10402663081748928, "percentage": 10.4, "elapsed_time": "0:06:43", "remaining_time": "0:57:58", "throughput": 12685.49, "total_tokens": 5123904} +{"current_steps": 1630, "total_steps": 15621, "loss": 0.4856, "lr": 1.9998912316175986e-06, "epoch": 0.10434671275846617, "percentage": 10.43, "elapsed_time": "0:06:44", "remaining_time": "0:57:52", "throughput": 12704.11, "total_tokens": 5140160} +{"current_steps": 1635, "total_steps": 15621, "loss": 0.5123, "lr": 1.9998741276738752e-06, "epoch": 0.10466679469944305, "percentage": 10.47, "elapsed_time": "0:06:45", "remaining_time": "0:57:46", "throughput": 12722.2, "total_tokens": 5156288} +{"current_steps": 1640, "total_steps": 15621, "loss": 0.5907, "lr": 1.999855775375563e-06, "epoch": 0.10498687664041995, "percentage": 10.5, "elapsed_time": "0:06:45", "remaining_time": "0:57:40", "throughput": 12739.58, "total_tokens": 5171776} +{"current_steps": 1645, "total_steps": 15621, "loss": 0.6812, "lr": 1.999836174745576e-06, "epoch": 0.10530695858139684, "percentage": 10.53, "elapsed_time": "0:06:46", "remaining_time": "0:57:35", "throughput": 12760.16, "total_tokens": 5189504} +{"current_steps": 1650, "total_steps": 15621, "loss": 0.5825, "lr": 1.9998153258083853e-06, "epoch": 0.10562704052237373, "percentage": 10.56, "elapsed_time": "0:06:47", "remaining_time": "0:57:29", "throughput": 12777.59, "total_tokens": 5205056} +{"current_steps": 1655, "total_steps": 15621, "loss": 0.5911, "lr": 1.9997932285900214e-06, "epoch": 0.10594712246335061, "percentage": 10.59, "elapsed_time": "0:06:48", "remaining_time": "0:57:23", "throughput": 12798.0, "total_tokens": 5222656} +{"current_steps": 1660, "total_steps": 15621, "loss": 0.6352, "lr": 1.9997698831180726e-06, "epoch": 0.1062672044043275, "percentage": 10.63, "elapsed_time": "0:06:48", "remaining_time": "0:57:17", "throughput": 12816.04, "total_tokens": 5238848} +{"current_steps": 1665, "total_steps": 15621, "loss": 0.5226, "lr": 1.999745289421686e-06, "epoch": 0.1065872863453044, "percentage": 10.66, "elapsed_time": "0:06:49", "remaining_time": "0:57:12", "throughput": 12834.64, "total_tokens": 5255296} +{"current_steps": 1670, "total_steps": 15621, "loss": 0.7595, "lr": 1.9997194475315674e-06, "epoch": 0.10690736828628129, "percentage": 10.69, "elapsed_time": "0:06:50", "remaining_time": "0:57:06", "throughput": 12850.97, "total_tokens": 5270336} +{"current_steps": 1675, "total_steps": 15621, "loss": 0.4864, "lr": 1.9996923574799808e-06, "epoch": 0.10722745022725817, "percentage": 10.72, "elapsed_time": "0:06:50", "remaining_time": "0:57:00", "throughput": 12869.32, "total_tokens": 5286720} +{"current_steps": 1680, "total_steps": 15621, "loss": 0.6553, "lr": 1.9996640193007476e-06, "epoch": 0.10754753216823507, "percentage": 10.75, "elapsed_time": "0:06:51", "remaining_time": "0:56:54", "throughput": 12885.32, "total_tokens": 5301632} +{"current_steps": 1685, "total_steps": 15621, "loss": 0.402, "lr": 1.9996344330292495e-06, "epoch": 0.10786761410921196, "percentage": 10.79, "elapsed_time": "0:06:52", "remaining_time": "0:56:48", "throughput": 12901.27, "total_tokens": 5316544} +{"current_steps": 1690, "total_steps": 15621, "loss": 0.5449, "lr": 1.9996035987024245e-06, "epoch": 0.10818769605018885, "percentage": 10.82, "elapsed_time": "0:06:52", "remaining_time": "0:56:42", "throughput": 12918.56, "total_tokens": 5332544} +{"current_steps": 1695, "total_steps": 15621, "loss": 0.5498, "lr": 1.99957151635877e-06, "epoch": 0.10850777799116573, "percentage": 10.85, "elapsed_time": "0:06:53", "remaining_time": "0:56:36", "throughput": 12935.17, "total_tokens": 5348096} +{"current_steps": 1700, "total_steps": 15621, "loss": 0.6298, "lr": 1.999538186038341e-06, "epoch": 0.10882785993214263, "percentage": 10.88, "elapsed_time": "0:06:54", "remaining_time": "0:56:30", "throughput": 12949.89, "total_tokens": 5362368} +{"current_steps": 1705, "total_steps": 15621, "loss": 0.5357, "lr": 1.999503607782751e-06, "epoch": 0.10914794187311952, "percentage": 10.91, "elapsed_time": "0:06:54", "remaining_time": "0:56:25", "throughput": 12966.93, "total_tokens": 5378176} +{"current_steps": 1710, "total_steps": 15621, "loss": 0.5219, "lr": 1.999467781635171e-06, "epoch": 0.10946802381409641, "percentage": 10.95, "elapsed_time": "0:06:55", "remaining_time": "0:56:19", "throughput": 12985.05, "total_tokens": 5394752} +{"current_steps": 1715, "total_steps": 15621, "loss": 0.7002, "lr": 1.9994307076403306e-06, "epoch": 0.10978810575507329, "percentage": 10.98, "elapsed_time": "0:06:56", "remaining_time": "0:56:14", "throughput": 13004.43, "total_tokens": 5412160} +{"current_steps": 1720, "total_steps": 15621, "loss": 0.5297, "lr": 1.999392385844517e-06, "epoch": 0.11010818769605019, "percentage": 11.01, "elapsed_time": "0:06:56", "remaining_time": "0:56:08", "throughput": 13021.08, "total_tokens": 5427840} +{"current_steps": 1725, "total_steps": 15621, "loss": 0.4006, "lr": 1.9993528162955753e-06, "epoch": 0.11042826963702708, "percentage": 11.04, "elapsed_time": "0:06:57", "remaining_time": "0:56:03", "throughput": 13038.74, "total_tokens": 5444224} +{"current_steps": 1730, "total_steps": 15621, "loss": 0.5775, "lr": 1.9993119990429095e-06, "epoch": 0.11074835157800397, "percentage": 11.07, "elapsed_time": "0:06:58", "remaining_time": "0:55:57", "throughput": 13054.87, "total_tokens": 5459648} +{"current_steps": 1735, "total_steps": 15621, "loss": 0.7845, "lr": 1.9992699341374794e-06, "epoch": 0.11106843351898085, "percentage": 11.11, "elapsed_time": "0:06:58", "remaining_time": "0:55:52", "throughput": 13070.78, "total_tokens": 5475008} +{"current_steps": 1740, "total_steps": 15621, "loss": 0.533, "lr": 1.9992266216318033e-06, "epoch": 0.11138851545995775, "percentage": 11.14, "elapsed_time": "0:06:59", "remaining_time": "0:55:47", "throughput": 13088.17, "total_tokens": 5491456} +{"current_steps": 1745, "total_steps": 15621, "loss": 0.5745, "lr": 1.9991820615799583e-06, "epoch": 0.11170859740093464, "percentage": 11.17, "elapsed_time": "0:07:00", "remaining_time": "0:55:41", "throughput": 13105.06, "total_tokens": 5507520} +{"current_steps": 1750, "total_steps": 15621, "loss": 0.6964, "lr": 1.999136254037578e-06, "epoch": 0.11202867934191153, "percentage": 11.2, "elapsed_time": "0:07:00", "remaining_time": "0:55:36", "throughput": 13121.14, "total_tokens": 5523072} +{"current_steps": 1755, "total_steps": 15621, "loss": 0.5134, "lr": 1.999089199061853e-06, "epoch": 0.11234876128288843, "percentage": 11.23, "elapsed_time": "0:07:01", "remaining_time": "0:55:30", "throughput": 13136.64, "total_tokens": 5538304} +{"current_steps": 1760, "total_steps": 15621, "loss": 0.4639, "lr": 1.9990408967115326e-06, "epoch": 0.1126688432238653, "percentage": 11.27, "elapsed_time": "0:07:02", "remaining_time": "0:55:25", "throughput": 13152.79, "total_tokens": 5553920} +{"current_steps": 1765, "total_steps": 15621, "loss": 0.4624, "lr": 1.998991347046922e-06, "epoch": 0.1129889251648422, "percentage": 11.3, "elapsed_time": "0:07:02", "remaining_time": "0:55:20", "throughput": 13168.57, "total_tokens": 5569344} +{"current_steps": 1770, "total_steps": 15621, "loss": 0.5057, "lr": 1.9989405501298857e-06, "epoch": 0.11330900710581909, "percentage": 11.33, "elapsed_time": "0:07:03", "remaining_time": "0:55:14", "throughput": 13186.11, "total_tokens": 5585856} +{"current_steps": 1775, "total_steps": 15621, "loss": 0.5777, "lr": 1.9988885060238436e-06, "epoch": 0.11362908904679599, "percentage": 11.36, "elapsed_time": "0:07:04", "remaining_time": "0:55:10", "throughput": 13205.54, "total_tokens": 5603840} +{"current_steps": 1780, "total_steps": 15621, "loss": 0.5185, "lr": 1.9988352147937735e-06, "epoch": 0.11394917098777287, "percentage": 11.39, "elapsed_time": "0:07:05", "remaining_time": "0:55:05", "throughput": 13222.59, "total_tokens": 5620352} +{"current_steps": 1785, "total_steps": 15621, "loss": 0.5548, "lr": 1.99878067650621e-06, "epoch": 0.11426925292874976, "percentage": 11.43, "elapsed_time": "0:07:05", "remaining_time": "0:55:00", "throughput": 13238.94, "total_tokens": 5636544} +{"current_steps": 1790, "total_steps": 15621, "loss": 0.5416, "lr": 1.998724891229245e-06, "epoch": 0.11458933486972665, "percentage": 11.46, "elapsed_time": "0:07:06", "remaining_time": "0:54:55", "throughput": 13255.36, "total_tokens": 5652672} +{"current_steps": 1795, "total_steps": 15621, "loss": 0.5025, "lr": 1.998667859032527e-06, "epoch": 0.11490941681070355, "percentage": 11.49, "elapsed_time": "0:07:07", "remaining_time": "0:54:49", "throughput": 13270.88, "total_tokens": 5668224} +{"current_steps": 1800, "total_steps": 15621, "loss": 0.4544, "lr": 1.9986095799872613e-06, "epoch": 0.11522949875168043, "percentage": 11.52, "elapsed_time": "0:07:07", "remaining_time": "0:54:44", "throughput": 13287.61, "total_tokens": 5684480} +{"current_steps": 1805, "total_steps": 15621, "loss": 0.4475, "lr": 1.99855005416621e-06, "epoch": 0.11554958069265732, "percentage": 11.55, "elapsed_time": "0:07:08", "remaining_time": "0:54:39", "throughput": 13304.24, "total_tokens": 5700864} +{"current_steps": 1810, "total_steps": 15621, "loss": 0.6003, "lr": 1.998489281643692e-06, "epoch": 0.11586966263363421, "percentage": 11.59, "elapsed_time": "0:07:09", "remaining_time": "0:54:34", "throughput": 13319.54, "total_tokens": 5716224} +{"current_steps": 1815, "total_steps": 15621, "loss": 0.4876, "lr": 1.998427262495582e-06, "epoch": 0.1161897445746111, "percentage": 11.62, "elapsed_time": "0:07:09", "remaining_time": "0:54:29", "throughput": 13336.75, "total_tokens": 5733056} +{"current_steps": 1820, "total_steps": 15621, "loss": 0.6507, "lr": 1.9983639967993124e-06, "epoch": 0.11650982651558799, "percentage": 11.65, "elapsed_time": "0:07:10", "remaining_time": "0:54:24", "throughput": 13352.83, "total_tokens": 5749120} +{"current_steps": 1825, "total_steps": 15621, "loss": 0.7451, "lr": 1.99829948463387e-06, "epoch": 0.11682990845656488, "percentage": 11.68, "elapsed_time": "0:07:11", "remaining_time": "0:54:19", "throughput": 13367.2, "total_tokens": 5763968} +{"current_steps": 1830, "total_steps": 15621, "loss": 0.5556, "lr": 1.9982337260798e-06, "epoch": 0.11714999039754177, "percentage": 11.71, "elapsed_time": "0:07:11", "remaining_time": "0:54:14", "throughput": 13382.47, "total_tokens": 5779520} +{"current_steps": 1835, "total_steps": 15621, "loss": 0.5874, "lr": 1.998166721219203e-06, "epoch": 0.11747007233851867, "percentage": 11.75, "elapsed_time": "0:07:12", "remaining_time": "0:54:10", "throughput": 13402.58, "total_tokens": 5798848} +{"current_steps": 1840, "total_steps": 15621, "loss": 0.5069, "lr": 1.9980984701357338e-06, "epoch": 0.11779015427949555, "percentage": 11.78, "elapsed_time": "0:07:13", "remaining_time": "0:54:05", "throughput": 13417.15, "total_tokens": 5813952} +{"current_steps": 1845, "total_steps": 15621, "loss": 0.4306, "lr": 1.998028972914606e-06, "epoch": 0.11811023622047244, "percentage": 11.81, "elapsed_time": "0:07:14", "remaining_time": "0:54:00", "throughput": 13432.89, "total_tokens": 5830016} +{"current_steps": 1850, "total_steps": 15621, "loss": 0.5965, "lr": 1.9979582296425877e-06, "epoch": 0.11843031816144933, "percentage": 11.84, "elapsed_time": "0:07:14", "remaining_time": "0:53:55", "throughput": 13447.5, "total_tokens": 5845312} +{"current_steps": 1855, "total_steps": 15621, "loss": 0.5894, "lr": 1.9978862404080022e-06, "epoch": 0.11875040010242623, "percentage": 11.88, "elapsed_time": "0:07:15", "remaining_time": "0:53:50", "throughput": 13462.16, "total_tokens": 5860672} +{"current_steps": 1860, "total_steps": 15621, "loss": 0.5369, "lr": 1.9978130053007295e-06, "epoch": 0.1190704820434031, "percentage": 11.91, "elapsed_time": "0:07:16", "remaining_time": "0:53:45", "throughput": 13476.35, "total_tokens": 5875776} +{"current_steps": 1865, "total_steps": 15621, "loss": 0.4361, "lr": 1.9977385244122034e-06, "epoch": 0.11939056398438, "percentage": 11.94, "elapsed_time": "0:07:16", "remaining_time": "0:53:40", "throughput": 13491.18, "total_tokens": 5891200} +{"current_steps": 1870, "total_steps": 15621, "loss": 0.4922, "lr": 1.997662797835415e-06, "epoch": 0.11971064592535689, "percentage": 11.97, "elapsed_time": "0:07:17", "remaining_time": "0:53:36", "throughput": 13506.27, "total_tokens": 5907008} +{"current_steps": 1875, "total_steps": 15621, "loss": 0.4561, "lr": 1.9975858256649097e-06, "epoch": 0.12003072786633379, "percentage": 12.0, "elapsed_time": "0:07:18", "remaining_time": "0:53:31", "throughput": 13521.79, "total_tokens": 5923264} +{"current_steps": 1880, "total_steps": 15621, "loss": 0.4952, "lr": 1.997507607996788e-06, "epoch": 0.12035080980731067, "percentage": 12.04, "elapsed_time": "0:07:18", "remaining_time": "0:53:26", "throughput": 13537.73, "total_tokens": 5939648} +{"current_steps": 1885, "total_steps": 15621, "loss": 0.4576, "lr": 1.997428144928706e-06, "epoch": 0.12067089174828756, "percentage": 12.07, "elapsed_time": "0:07:19", "remaining_time": "0:53:22", "throughput": 13553.12, "total_tokens": 5955520} +{"current_steps": 1890, "total_steps": 15621, "loss": 0.5277, "lr": 1.9973474365598736e-06, "epoch": 0.12099097368926445, "percentage": 12.1, "elapsed_time": "0:07:20", "remaining_time": "0:53:17", "throughput": 13567.77, "total_tokens": 5971072} +{"current_steps": 1895, "total_steps": 15621, "loss": 0.5794, "lr": 1.9972654829910568e-06, "epoch": 0.12131105563024135, "percentage": 12.13, "elapsed_time": "0:07:20", "remaining_time": "0:53:12", "throughput": 13583.17, "total_tokens": 5987264} +{"current_steps": 1900, "total_steps": 15621, "loss": 0.6246, "lr": 1.9971822843245748e-06, "epoch": 0.12163113757121823, "percentage": 12.16, "elapsed_time": "0:07:21", "remaining_time": "0:53:08", "throughput": 13597.92, "total_tokens": 6002880} +{"current_steps": 1905, "total_steps": 15621, "loss": 0.5281, "lr": 1.997097840664303e-06, "epoch": 0.12195121951219512, "percentage": 12.2, "elapsed_time": "0:07:22", "remaining_time": "0:53:03", "throughput": 13614.04, "total_tokens": 6019520} +{"current_steps": 1910, "total_steps": 15621, "loss": 0.5722, "lr": 1.99701215211567e-06, "epoch": 0.12227130145317201, "percentage": 12.23, "elapsed_time": "0:07:22", "remaining_time": "0:52:59", "throughput": 13629.72, "total_tokens": 6035904} +{"current_steps": 1915, "total_steps": 15621, "loss": 0.6162, "lr": 1.9969252187856587e-06, "epoch": 0.1225913833941489, "percentage": 12.26, "elapsed_time": "0:07:23", "remaining_time": "0:52:54", "throughput": 13643.38, "total_tokens": 6050816} +{"current_steps": 1920, "total_steps": 15621, "loss": 0.414, "lr": 1.9968370407828065e-06, "epoch": 0.12291146533512579, "percentage": 12.29, "elapsed_time": "0:07:24", "remaining_time": "0:52:49", "throughput": 13657.36, "total_tokens": 6065920} +{"current_steps": 1925, "total_steps": 15621, "loss": 0.5995, "lr": 1.996747618217205e-06, "epoch": 0.12323154727610268, "percentage": 12.32, "elapsed_time": "0:07:24", "remaining_time": "0:52:44", "throughput": 13672.05, "total_tokens": 6081728} +{"current_steps": 1930, "total_steps": 15621, "loss": 0.492, "lr": 1.9966569512004987e-06, "epoch": 0.12355162921707957, "percentage": 12.36, "elapsed_time": "0:07:25", "remaining_time": "0:52:40", "throughput": 13686.9, "total_tokens": 6097472} +{"current_steps": 1935, "total_steps": 15621, "loss": 0.5079, "lr": 1.996565039845887e-06, "epoch": 0.12387171115805647, "percentage": 12.39, "elapsed_time": "0:07:26", "remaining_time": "0:52:35", "throughput": 13701.37, "total_tokens": 6113152} +{"current_steps": 1940, "total_steps": 15621, "loss": 0.6364, "lr": 1.996471884268122e-06, "epoch": 0.12419179309903335, "percentage": 12.42, "elapsed_time": "0:07:26", "remaining_time": "0:52:31", "throughput": 13716.79, "total_tokens": 6129408} +{"current_steps": 1945, "total_steps": 15621, "loss": 0.5506, "lr": 1.9963774845835097e-06, "epoch": 0.12451187504001024, "percentage": 12.45, "elapsed_time": "0:07:27", "remaining_time": "0:52:26", "throughput": 13731.17, "total_tokens": 6144896} +{"current_steps": 1950, "total_steps": 15621, "loss": 0.5895, "lr": 1.996281840909909e-06, "epoch": 0.12483195698098713, "percentage": 12.48, "elapsed_time": "0:07:28", "remaining_time": "0:52:22", "throughput": 13745.12, "total_tokens": 6160256} +{"current_steps": 1955, "total_steps": 15621, "loss": 0.6389, "lr": 1.9961849533667322e-06, "epoch": 0.12515203892196403, "percentage": 12.52, "elapsed_time": "0:07:28", "remaining_time": "0:52:17", "throughput": 13758.52, "total_tokens": 6175104} +{"current_steps": 1960, "total_steps": 15621, "loss": 0.5267, "lr": 1.9960868220749447e-06, "epoch": 0.1254721208629409, "percentage": 12.55, "elapsed_time": "0:07:29", "remaining_time": "0:52:12", "throughput": 13772.08, "total_tokens": 6190272} +{"current_steps": 1965, "total_steps": 15621, "loss": 0.5836, "lr": 1.9959874471570644e-06, "epoch": 0.1257922028039178, "percentage": 12.58, "elapsed_time": "0:07:30", "remaining_time": "0:52:08", "throughput": 13786.23, "total_tokens": 6205952} +{"current_steps": 1970, "total_steps": 15621, "loss": 0.5619, "lr": 1.9958868287371625e-06, "epoch": 0.1261122847448947, "percentage": 12.61, "elapsed_time": "0:07:30", "remaining_time": "0:52:04", "throughput": 13801.81, "total_tokens": 6222592} +{"current_steps": 1975, "total_steps": 15621, "loss": 0.4804, "lr": 1.9957849669408617e-06, "epoch": 0.12643236668587157, "percentage": 12.64, "elapsed_time": "0:07:31", "remaining_time": "0:51:59", "throughput": 13815.34, "total_tokens": 6237696} +{"current_steps": 1980, "total_steps": 15621, "loss": 0.4947, "lr": 1.995681861895338e-06, "epoch": 0.12675244862684848, "percentage": 12.68, "elapsed_time": "0:07:32", "remaining_time": "0:51:55", "throughput": 13830.52, "total_tokens": 6254080} +{"current_steps": 1985, "total_steps": 15621, "loss": 0.5828, "lr": 1.9955775137293187e-06, "epoch": 0.12707253056782536, "percentage": 12.71, "elapsed_time": "0:07:32", "remaining_time": "0:51:51", "throughput": 13844.83, "total_tokens": 6270016} +{"current_steps": 1990, "total_steps": 15621, "loss": 0.6161, "lr": 1.9954719225730845e-06, "epoch": 0.12739261250880227, "percentage": 12.74, "elapsed_time": "0:07:33", "remaining_time": "0:51:46", "throughput": 13858.37, "total_tokens": 6285184} +{"current_steps": 1995, "total_steps": 15621, "loss": 0.4833, "lr": 1.9953650885584666e-06, "epoch": 0.12771269444977915, "percentage": 12.77, "elapsed_time": "0:07:34", "remaining_time": "0:51:42", "throughput": 13872.37, "total_tokens": 6300992} +{"current_steps": 2000, "total_steps": 15621, "loss": 0.5462, "lr": 1.995257011818849e-06, "epoch": 0.12803277639075603, "percentage": 12.8, "elapsed_time": "0:07:34", "remaining_time": "0:51:37", "throughput": 13884.58, "total_tokens": 6315392} +{"current_steps": 2005, "total_steps": 15621, "loss": 0.4676, "lr": 1.9951476924891666e-06, "epoch": 0.12835285833173293, "percentage": 12.84, "elapsed_time": "0:07:35", "remaining_time": "0:51:33", "throughput": 13898.23, "total_tokens": 6331136} +{"current_steps": 2010, "total_steps": 15621, "loss": 0.5551, "lr": 1.9950371307059056e-06, "epoch": 0.1286729402727098, "percentage": 12.87, "elapsed_time": "0:07:36", "remaining_time": "0:51:29", "throughput": 13912.99, "total_tokens": 6347584} +{"current_steps": 2015, "total_steps": 15621, "loss": 0.5584, "lr": 1.9949253266071036e-06, "epoch": 0.1289930222136867, "percentage": 12.9, "elapsed_time": "0:07:36", "remaining_time": "0:51:25", "throughput": 13926.0, "total_tokens": 6362560} +{"current_steps": 2020, "total_steps": 15621, "loss": 0.5131, "lr": 1.9948122803323503e-06, "epoch": 0.1293131041546636, "percentage": 12.93, "elapsed_time": "0:07:37", "remaining_time": "0:51:20", "throughput": 13940.03, "total_tokens": 6378304} +{"current_steps": 2025, "total_steps": 15621, "loss": 0.5125, "lr": 1.9946979920227844e-06, "epoch": 0.12963318609564048, "percentage": 12.96, "elapsed_time": "0:07:38", "remaining_time": "0:51:16", "throughput": 13953.03, "total_tokens": 6393280} +{"current_steps": 2030, "total_steps": 15621, "loss": 0.5188, "lr": 1.994582461821096e-06, "epoch": 0.1299532680366174, "percentage": 13.0, "elapsed_time": "0:07:38", "remaining_time": "0:51:12", "throughput": 13967.37, "total_tokens": 6409472} +{"current_steps": 2035, "total_steps": 15621, "loss": 0.7149, "lr": 1.9944656898715267e-06, "epoch": 0.13027334997759427, "percentage": 13.03, "elapsed_time": "0:07:39", "remaining_time": "0:51:08", "throughput": 13980.84, "total_tokens": 6424960} +{"current_steps": 2040, "total_steps": 15621, "loss": 0.6082, "lr": 1.994347676319867e-06, "epoch": 0.13059343191857115, "percentage": 13.06, "elapsed_time": "0:07:40", "remaining_time": "0:51:03", "throughput": 13993.7, "total_tokens": 6440000} +{"current_steps": 2045, "total_steps": 15621, "loss": 0.4607, "lr": 1.994228421313459e-06, "epoch": 0.13091351385954805, "percentage": 13.09, "elapsed_time": "0:07:40", "remaining_time": "0:50:59", "throughput": 14009.81, "total_tokens": 6457600} +{"current_steps": 2050, "total_steps": 15621, "loss": 0.5187, "lr": 1.994107925001193e-06, "epoch": 0.13123359580052493, "percentage": 13.12, "elapsed_time": "0:07:41", "remaining_time": "0:50:55", "throughput": 14023.17, "total_tokens": 6473088} +{"current_steps": 2055, "total_steps": 15621, "loss": 0.595, "lr": 1.9939861875335108e-06, "epoch": 0.1315536777415018, "percentage": 13.16, "elapsed_time": "0:07:42", "remaining_time": "0:50:51", "throughput": 14035.26, "total_tokens": 6487680} +{"current_steps": 2060, "total_steps": 15621, "loss": 0.4909, "lr": 1.9938632090624025e-06, "epoch": 0.13187375968247872, "percentage": 13.19, "elapsed_time": "0:07:42", "remaining_time": "0:50:47", "throughput": 14048.73, "total_tokens": 6503296} +{"current_steps": 2065, "total_steps": 15621, "loss": 0.5368, "lr": 1.9937389897414087e-06, "epoch": 0.1321938416234556, "percentage": 13.22, "elapsed_time": "0:07:43", "remaining_time": "0:50:43", "throughput": 14062.17, "total_tokens": 6518912} +{"current_steps": 2070, "total_steps": 15621, "loss": 0.5642, "lr": 1.993613529725618e-06, "epoch": 0.1325139235644325, "percentage": 13.25, "elapsed_time": "0:07:44", "remaining_time": "0:50:39", "throughput": 14075.81, "total_tokens": 6534784} +{"current_steps": 2075, "total_steps": 15621, "loss": 0.5303, "lr": 1.99348682917167e-06, "epoch": 0.13283400550540939, "percentage": 13.28, "elapsed_time": "0:07:44", "remaining_time": "0:50:35", "throughput": 14089.26, "total_tokens": 6550528} +{"current_steps": 2080, "total_steps": 15621, "loss": 0.5475, "lr": 1.99335888823775e-06, "epoch": 0.13315408744638627, "percentage": 13.32, "elapsed_time": "0:07:45", "remaining_time": "0:50:31", "throughput": 14102.47, "total_tokens": 6566144} +{"current_steps": 2085, "total_steps": 15621, "loss": 0.5654, "lr": 1.993229707083595e-06, "epoch": 0.13347416938736317, "percentage": 13.35, "elapsed_time": "0:07:46", "remaining_time": "0:50:27", "throughput": 14118.13, "total_tokens": 6583872} +{"current_steps": 2090, "total_steps": 15621, "loss": 0.4165, "lr": 1.993099285870489e-06, "epoch": 0.13379425132834005, "percentage": 13.38, "elapsed_time": "0:07:47", "remaining_time": "0:50:24", "throughput": 14134.76, "total_tokens": 6602304} +{"current_steps": 2095, "total_steps": 15621, "loss": 0.462, "lr": 1.992967624761264e-06, "epoch": 0.13411433326931693, "percentage": 13.41, "elapsed_time": "0:07:47", "remaining_time": "0:50:20", "throughput": 14148.17, "total_tokens": 6618112} +{"current_steps": 2100, "total_steps": 15621, "loss": 0.6239, "lr": 1.9928347239203014e-06, "epoch": 0.13443441521029384, "percentage": 13.44, "elapsed_time": "0:07:48", "remaining_time": "0:50:16", "throughput": 14163.62, "total_tokens": 6635584} +{"current_steps": 2105, "total_steps": 15621, "loss": 0.5283, "lr": 1.9927005835135282e-06, "epoch": 0.13475449715127072, "percentage": 13.48, "elapsed_time": "0:07:49", "remaining_time": "0:50:12", "throughput": 14179.44, "total_tokens": 6653568} +{"current_steps": 2110, "total_steps": 15621, "loss": 0.4596, "lr": 1.9925652037084214e-06, "epoch": 0.13507457909224763, "percentage": 13.51, "elapsed_time": "0:07:49", "remaining_time": "0:50:08", "throughput": 14191.98, "total_tokens": 6668864} +{"current_steps": 2115, "total_steps": 15621, "loss": 0.4838, "lr": 1.9924285846740037e-06, "epoch": 0.1353946610332245, "percentage": 13.54, "elapsed_time": "0:07:50", "remaining_time": "0:50:05", "throughput": 14204.8, "total_tokens": 6684416} +{"current_steps": 2120, "total_steps": 15621, "loss": 0.5948, "lr": 1.9922907265808452e-06, "epoch": 0.13571474297420139, "percentage": 13.57, "elapsed_time": "0:07:51", "remaining_time": "0:50:00", "throughput": 14217.03, "total_tokens": 6699392} +{"current_steps": 2125, "total_steps": 15621, "loss": 0.544, "lr": 1.9921516296010643e-06, "epoch": 0.1360348249151783, "percentage": 13.6, "elapsed_time": "0:07:51", "remaining_time": "0:49:56", "throughput": 14229.23, "total_tokens": 6714560} +{"current_steps": 2130, "total_steps": 15621, "loss": 0.5678, "lr": 1.9920112939083246e-06, "epoch": 0.13635490685615517, "percentage": 13.64, "elapsed_time": "0:07:52", "remaining_time": "0:49:52", "throughput": 14241.97, "total_tokens": 6729920} +{"current_steps": 2135, "total_steps": 15621, "loss": 0.5607, "lr": 1.9918697196778367e-06, "epoch": 0.13667498879713205, "percentage": 13.67, "elapsed_time": "0:07:53", "remaining_time": "0:49:48", "throughput": 14253.84, "total_tokens": 6744768} +{"current_steps": 2140, "total_steps": 15621, "loss": 0.4531, "lr": 1.9917269070863578e-06, "epoch": 0.13699507073810896, "percentage": 13.7, "elapsed_time": "0:07:53", "remaining_time": "0:49:44", "throughput": 14265.73, "total_tokens": 6759680} +{"current_steps": 2145, "total_steps": 15621, "loss": 0.5091, "lr": 1.9915828563121915e-06, "epoch": 0.13731515267908584, "percentage": 13.73, "elapsed_time": "0:07:54", "remaining_time": "0:49:41", "throughput": 14278.48, "total_tokens": 6775168} +{"current_steps": 2150, "total_steps": 15621, "loss": 0.5144, "lr": 1.9914375675351865e-06, "epoch": 0.13763523462006275, "percentage": 13.76, "elapsed_time": "0:07:55", "remaining_time": "0:49:37", "throughput": 14291.85, "total_tokens": 6791296} +{"current_steps": 2155, "total_steps": 15621, "loss": 0.4326, "lr": 1.991291040936738e-06, "epoch": 0.13795531656103963, "percentage": 13.8, "elapsed_time": "0:07:55", "remaining_time": "0:49:33", "throughput": 14306.67, "total_tokens": 6808640} +{"current_steps": 2160, "total_steps": 15621, "loss": 0.6764, "lr": 1.9911432766997857e-06, "epoch": 0.1382753985020165, "percentage": 13.83, "elapsed_time": "0:07:56", "remaining_time": "0:49:29", "throughput": 14319.16, "total_tokens": 6824064} +{"current_steps": 2165, "total_steps": 15621, "loss": 0.455, "lr": 1.990994275008815e-06, "epoch": 0.1385954804429934, "percentage": 13.86, "elapsed_time": "0:07:57", "remaining_time": "0:49:26", "throughput": 14332.26, "total_tokens": 6839872} +{"current_steps": 2170, "total_steps": 15621, "loss": 0.515, "lr": 1.9908440360498565e-06, "epoch": 0.1389155623839703, "percentage": 13.89, "elapsed_time": "0:07:57", "remaining_time": "0:49:22", "throughput": 14344.94, "total_tokens": 6855744} +{"current_steps": 2175, "total_steps": 15621, "loss": 0.5589, "lr": 1.990692560010485e-06, "epoch": 0.1392356443249472, "percentage": 13.92, "elapsed_time": "0:07:58", "remaining_time": "0:49:18", "throughput": 14355.42, "total_tokens": 6869632} +{"current_steps": 2180, "total_steps": 15621, "loss": 0.4574, "lr": 1.9905398470798206e-06, "epoch": 0.13955572626592408, "percentage": 13.96, "elapsed_time": "0:07:59", "remaining_time": "0:49:14", "throughput": 14368.35, "total_tokens": 6885696} +{"current_steps": 2185, "total_steps": 15621, "loss": 0.37, "lr": 1.990385897448527e-06, "epoch": 0.13987580820690096, "percentage": 13.99, "elapsed_time": "0:07:59", "remaining_time": "0:49:10", "throughput": 14381.13, "total_tokens": 6901504} +{"current_steps": 2190, "total_steps": 15621, "loss": 0.5817, "lr": 1.9902307113088114e-06, "epoch": 0.14019589014787787, "percentage": 14.02, "elapsed_time": "0:08:00", "remaining_time": "0:49:07", "throughput": 14392.99, "total_tokens": 6916480} +{"current_steps": 2195, "total_steps": 15621, "loss": 0.4882, "lr": 1.9900742888544264e-06, "epoch": 0.14051597208885475, "percentage": 14.05, "elapsed_time": "0:08:01", "remaining_time": "0:49:03", "throughput": 14405.71, "total_tokens": 6932416} +{"current_steps": 2200, "total_steps": 15621, "loss": 0.5338, "lr": 1.989916630280667e-06, "epoch": 0.14083605402983163, "percentage": 14.08, "elapsed_time": "0:08:01", "remaining_time": "0:48:59", "throughput": 14419.29, "total_tokens": 6948992} +{"current_steps": 2205, "total_steps": 15621, "loss": 0.464, "lr": 1.989757735784372e-06, "epoch": 0.14115613597080853, "percentage": 14.12, "elapsed_time": "0:08:02", "remaining_time": "0:48:56", "throughput": 14431.52, "total_tokens": 6964416} +{"current_steps": 2210, "total_steps": 15621, "loss": 0.4246, "lr": 1.989597605563923e-06, "epoch": 0.1414762179117854, "percentage": 14.15, "elapsed_time": "0:08:03", "remaining_time": "0:48:52", "throughput": 14444.5, "total_tokens": 6980544} +{"current_steps": 2215, "total_steps": 15621, "loss": 0.5755, "lr": 1.9894362398192437e-06, "epoch": 0.14179629985276232, "percentage": 14.18, "elapsed_time": "0:08:03", "remaining_time": "0:48:49", "throughput": 14458.28, "total_tokens": 6997440} +{"current_steps": 2220, "total_steps": 15621, "loss": 0.4218, "lr": 1.9892736387518023e-06, "epoch": 0.1421163817937392, "percentage": 14.21, "elapsed_time": "0:08:04", "remaining_time": "0:48:45", "throughput": 14470.15, "total_tokens": 7012672} +{"current_steps": 2225, "total_steps": 15621, "loss": 0.4798, "lr": 1.9891098025646075e-06, "epoch": 0.14243646373471608, "percentage": 14.24, "elapsed_time": "0:08:05", "remaining_time": "0:48:41", "throughput": 14481.6, "total_tokens": 7027648} +{"current_steps": 2230, "total_steps": 15621, "loss": 0.5266, "lr": 1.9889447314622105e-06, "epoch": 0.142756545675693, "percentage": 14.28, "elapsed_time": "0:08:05", "remaining_time": "0:48:38", "throughput": 14493.7, "total_tokens": 7043200} +{"current_steps": 2235, "total_steps": 15621, "loss": 0.7416, "lr": 1.9887784256507046e-06, "epoch": 0.14307662761666987, "percentage": 14.31, "elapsed_time": "0:08:06", "remaining_time": "0:48:34", "throughput": 14505.51, "total_tokens": 7058688} +{"current_steps": 2240, "total_steps": 15621, "loss": 0.6734, "lr": 1.988610885337725e-06, "epoch": 0.14339670955764675, "percentage": 14.34, "elapsed_time": "0:08:07", "remaining_time": "0:48:30", "throughput": 14517.27, "total_tokens": 7074048} +{"current_steps": 2245, "total_steps": 15621, "loss": 0.5319, "lr": 1.9884421107324476e-06, "epoch": 0.14371679149862365, "percentage": 14.37, "elapsed_time": "0:08:07", "remaining_time": "0:48:27", "throughput": 14529.55, "total_tokens": 7089792} +{"current_steps": 2250, "total_steps": 15621, "loss": 0.4753, "lr": 1.9882721020455893e-06, "epoch": 0.14403687343960053, "percentage": 14.4, "elapsed_time": "0:08:08", "remaining_time": "0:48:23", "throughput": 14540.5, "total_tokens": 7104640} +{"current_steps": 2255, "total_steps": 15621, "loss": 0.5137, "lr": 1.988100859489408e-06, "epoch": 0.14435695538057744, "percentage": 14.44, "elapsed_time": "0:08:09", "remaining_time": "0:48:20", "throughput": 14552.27, "total_tokens": 7120064} +{"current_steps": 2260, "total_steps": 15621, "loss": 0.4839, "lr": 1.9879283832777017e-06, "epoch": 0.14467703732155432, "percentage": 14.47, "elapsed_time": "0:08:09", "remaining_time": "0:48:16", "throughput": 14563.68, "total_tokens": 7135232} +{"current_steps": 2265, "total_steps": 15621, "loss": 0.5247, "lr": 1.9877546736258096e-06, "epoch": 0.1449971192625312, "percentage": 14.5, "elapsed_time": "0:08:10", "remaining_time": "0:48:12", "throughput": 14574.19, "total_tokens": 7149632} +{"current_steps": 2270, "total_steps": 15621, "loss": 0.4134, "lr": 1.98757973075061e-06, "epoch": 0.1453172012035081, "percentage": 14.53, "elapsed_time": "0:08:11", "remaining_time": "0:48:09", "throughput": 14585.21, "total_tokens": 7164352} +{"current_steps": 2275, "total_steps": 15621, "loss": 0.52, "lr": 1.987403554870521e-06, "epoch": 0.14563728314448499, "percentage": 14.56, "elapsed_time": "0:08:11", "remaining_time": "0:48:05", "throughput": 14596.8, "total_tokens": 7179776} +{"current_steps": 2280, "total_steps": 15621, "loss": 0.423, "lr": 1.9872261462055003e-06, "epoch": 0.14595736508546187, "percentage": 14.6, "elapsed_time": "0:08:12", "remaining_time": "0:48:01", "throughput": 14607.33, "total_tokens": 7194240} +{"current_steps": 2285, "total_steps": 15621, "loss": 0.4393, "lr": 1.987047504977045e-06, "epoch": 0.14627744702643877, "percentage": 14.63, "elapsed_time": "0:08:13", "remaining_time": "0:47:58", "throughput": 14618.91, "total_tokens": 7209472} +{"current_steps": 2290, "total_steps": 15621, "loss": 0.4174, "lr": 1.9868676314081902e-06, "epoch": 0.14659752896741565, "percentage": 14.66, "elapsed_time": "0:08:13", "remaining_time": "0:47:54", "throughput": 14630.79, "total_tokens": 7225088} +{"current_steps": 2295, "total_steps": 15621, "loss": 0.6811, "lr": 1.9866865257235107e-06, "epoch": 0.14691761090839256, "percentage": 14.69, "elapsed_time": "0:08:14", "remaining_time": "0:47:51", "throughput": 14642.66, "total_tokens": 7240704} +{"current_steps": 2300, "total_steps": 15621, "loss": 0.4241, "lr": 1.9865041881491188e-06, "epoch": 0.14723769284936944, "percentage": 14.72, "elapsed_time": "0:08:15", "remaining_time": "0:47:47", "throughput": 14654.45, "total_tokens": 7256000} +{"current_steps": 2305, "total_steps": 15621, "loss": 0.6191, "lr": 1.9863206189126653e-06, "epoch": 0.14755777479034632, "percentage": 14.76, "elapsed_time": "0:08:15", "remaining_time": "0:47:44", "throughput": 14664.72, "total_tokens": 7270336} +{"current_steps": 2310, "total_steps": 15621, "loss": 0.5735, "lr": 1.9861358182433382e-06, "epoch": 0.14787785673132323, "percentage": 14.79, "elapsed_time": "0:08:16", "remaining_time": "0:47:40", "throughput": 14675.98, "total_tokens": 7285440} +{"current_steps": 2315, "total_steps": 15621, "loss": 0.4719, "lr": 1.9859497863718634e-06, "epoch": 0.1481979386723001, "percentage": 14.82, "elapsed_time": "0:08:17", "remaining_time": "0:47:37", "throughput": 14687.54, "total_tokens": 7301120} +{"current_steps": 2320, "total_steps": 15621, "loss": 0.5315, "lr": 1.985762523530504e-06, "epoch": 0.14851802061327699, "percentage": 14.85, "elapsed_time": "0:08:17", "remaining_time": "0:47:33", "throughput": 14698.63, "total_tokens": 7316416} +{"current_steps": 2325, "total_steps": 15621, "loss": 0.4997, "lr": 1.98557402995306e-06, "epoch": 0.1488381025542539, "percentage": 14.88, "elapsed_time": "0:08:18", "remaining_time": "0:47:30", "throughput": 14710.46, "total_tokens": 7332160} +{"current_steps": 2330, "total_steps": 15621, "loss": 0.7101, "lr": 1.985384305874868e-06, "epoch": 0.14915818449523077, "percentage": 14.92, "elapsed_time": "0:08:19", "remaining_time": "0:47:27", "throughput": 14722.02, "total_tokens": 7347776} +{"current_steps": 2335, "total_steps": 15621, "loss": 0.5478, "lr": 1.9851933515328e-06, "epoch": 0.14947826643620768, "percentage": 14.95, "elapsed_time": "0:08:19", "remaining_time": "0:47:23", "throughput": 14733.31, "total_tokens": 7363200} +{"current_steps": 2340, "total_steps": 15621, "loss": 0.475, "lr": 1.985001167165265e-06, "epoch": 0.14979834837718456, "percentage": 14.98, "elapsed_time": "0:08:20", "remaining_time": "0:47:20", "throughput": 14744.87, "total_tokens": 7378752} +{"current_steps": 2345, "total_steps": 15621, "loss": 0.5239, "lr": 1.984807753012208e-06, "epoch": 0.15011843031816144, "percentage": 15.01, "elapsed_time": "0:08:21", "remaining_time": "0:47:16", "throughput": 14755.98, "total_tokens": 7393984} +{"current_steps": 2346, "total_steps": 15621, "eval_loss": 0.5113906264305115, "epoch": 0.15018244670635683, "percentage": 15.02, "elapsed_time": "0:09:11", "remaining_time": "0:52:02", "throughput": 13405.11, "total_tokens": 7397056} +{"current_steps": 2350, "total_steps": 15621, "loss": 0.5882, "lr": 1.9846131093151086e-06, "epoch": 0.15043851225913835, "percentage": 15.04, "elapsed_time": "0:09:49", "remaining_time": "0:55:30", "throughput": 12560.74, "total_tokens": 7408832} +{"current_steps": 2355, "total_steps": 15621, "loss": 0.4612, "lr": 1.9844172363169808e-06, "epoch": 0.15075859420011523, "percentage": 15.08, "elapsed_time": "0:09:50", "remaining_time": "0:55:26", "throughput": 12571.46, "total_tokens": 7423040} +{"current_steps": 2360, "total_steps": 15621, "loss": 0.5148, "lr": 1.9842201342623756e-06, "epoch": 0.15107867614109213, "percentage": 15.11, "elapsed_time": "0:09:51", "remaining_time": "0:55:21", "throughput": 12583.42, "total_tokens": 7438464} +{"current_steps": 2365, "total_steps": 15621, "loss": 0.5219, "lr": 1.9840218033973766e-06, "epoch": 0.151398758082069, "percentage": 15.14, "elapsed_time": "0:09:51", "remaining_time": "0:55:17", "throughput": 12595.28, "total_tokens": 7453824} +{"current_steps": 2370, "total_steps": 15621, "loss": 0.5858, "lr": 1.9838222439696027e-06, "epoch": 0.1517188400230459, "percentage": 15.17, "elapsed_time": "0:09:52", "remaining_time": "0:55:12", "throughput": 12607.22, "total_tokens": 7469312} +{"current_steps": 2375, "total_steps": 15621, "loss": 0.7034, "lr": 1.9836214562282058e-06, "epoch": 0.1520389219640228, "percentage": 15.2, "elapsed_time": "0:09:53", "remaining_time": "0:55:08", "throughput": 12619.53, "total_tokens": 7485120} +{"current_steps": 2380, "total_steps": 15621, "loss": 0.5189, "lr": 1.9834194404238715e-06, "epoch": 0.15235900390499968, "percentage": 15.24, "elapsed_time": "0:09:53", "remaining_time": "0:55:03", "throughput": 12631.31, "total_tokens": 7500416} +{"current_steps": 2385, "total_steps": 15621, "loss": 0.4149, "lr": 1.9832161968088193e-06, "epoch": 0.15267908584597656, "percentage": 15.27, "elapsed_time": "0:09:54", "remaining_time": "0:54:59", "throughput": 12644.06, "total_tokens": 7516672} +{"current_steps": 2390, "total_steps": 15621, "loss": 0.4703, "lr": 1.9830117256368015e-06, "epoch": 0.15299916778695347, "percentage": 15.3, "elapsed_time": "0:09:55", "remaining_time": "0:54:54", "throughput": 12656.65, "total_tokens": 7532800} +{"current_steps": 2395, "total_steps": 15621, "loss": 0.4994, "lr": 1.982806027163102e-06, "epoch": 0.15331924972793035, "percentage": 15.33, "elapsed_time": "0:09:55", "remaining_time": "0:54:50", "throughput": 12668.03, "total_tokens": 7547776} +{"current_steps": 2400, "total_steps": 15621, "loss": 0.5718, "lr": 1.9825991016445386e-06, "epoch": 0.15363933166890725, "percentage": 15.36, "elapsed_time": "0:09:56", "remaining_time": "0:54:45", "throughput": 12679.04, "total_tokens": 7562496} +{"current_steps": 2405, "total_steps": 15621, "loss": 0.5263, "lr": 1.9823909493394594e-06, "epoch": 0.15395941360988413, "percentage": 15.4, "elapsed_time": "0:09:57", "remaining_time": "0:54:41", "throughput": 12690.7, "total_tokens": 7577920} +{"current_steps": 2410, "total_steps": 15621, "loss": 0.5373, "lr": 1.9821815705077455e-06, "epoch": 0.154279495550861, "percentage": 15.43, "elapsed_time": "0:09:57", "remaining_time": "0:54:36", "throughput": 12702.34, "total_tokens": 7593216} +{"current_steps": 2415, "total_steps": 15621, "loss": 0.5752, "lr": 1.9819709654108087e-06, "epoch": 0.15459957749183792, "percentage": 15.46, "elapsed_time": "0:09:58", "remaining_time": "0:54:32", "throughput": 12713.45, "total_tokens": 7608192} +{"current_steps": 2420, "total_steps": 15621, "loss": 0.4606, "lr": 1.981759134311592e-06, "epoch": 0.1549196594328148, "percentage": 15.49, "elapsed_time": "0:09:59", "remaining_time": "0:54:28", "throughput": 12725.98, "total_tokens": 7624448} +{"current_steps": 2425, "total_steps": 15621, "loss": 0.4839, "lr": 1.981546077474569e-06, "epoch": 0.15523974137379168, "percentage": 15.52, "elapsed_time": "0:09:59", "remaining_time": "0:54:23", "throughput": 12737.94, "total_tokens": 7640192} +{"current_steps": 2430, "total_steps": 15621, "loss": 0.534, "lr": 1.981331795165744e-06, "epoch": 0.15555982331476859, "percentage": 15.56, "elapsed_time": "0:10:00", "remaining_time": "0:54:19", "throughput": 12748.87, "total_tokens": 7654848} +{"current_steps": 2435, "total_steps": 15621, "loss": 0.6053, "lr": 1.9811162876526498e-06, "epoch": 0.15587990525574547, "percentage": 15.59, "elapsed_time": "0:10:01", "remaining_time": "0:54:15", "throughput": 12761.0, "total_tokens": 7670848} +{"current_steps": 2440, "total_steps": 15621, "loss": 0.6575, "lr": 1.9808995552043515e-06, "epoch": 0.15619998719672237, "percentage": 15.62, "elapsed_time": "0:10:01", "remaining_time": "0:54:10", "throughput": 12772.33, "total_tokens": 7686016} +{"current_steps": 2445, "total_steps": 15621, "loss": 0.5662, "lr": 1.9806815980914413e-06, "epoch": 0.15652006913769925, "percentage": 15.65, "elapsed_time": "0:10:02", "remaining_time": "0:54:06", "throughput": 12784.26, "total_tokens": 7701760} +{"current_steps": 2450, "total_steps": 15621, "loss": 0.5736, "lr": 1.9804624165860417e-06, "epoch": 0.15684015107867613, "percentage": 15.68, "elapsed_time": "0:10:03", "remaining_time": "0:54:02", "throughput": 12796.33, "total_tokens": 7717760} +{"current_steps": 2455, "total_steps": 15621, "loss": 0.3894, "lr": 1.9802420109618028e-06, "epoch": 0.15716023301965304, "percentage": 15.72, "elapsed_time": "0:10:03", "remaining_time": "0:53:58", "throughput": 12807.94, "total_tokens": 7733376} +{"current_steps": 2460, "total_steps": 15621, "loss": 0.503, "lr": 1.980020381493904e-06, "epoch": 0.15748031496062992, "percentage": 15.75, "elapsed_time": "0:10:04", "remaining_time": "0:53:54", "throughput": 12821.07, "total_tokens": 7750464} +{"current_steps": 2465, "total_steps": 15621, "loss": 0.5024, "lr": 1.979797528459052e-06, "epoch": 0.1578003969016068, "percentage": 15.78, "elapsed_time": "0:10:05", "remaining_time": "0:53:50", "throughput": 12835.31, "total_tokens": 7768576} +{"current_steps": 2470, "total_steps": 15621, "loss": 0.5285, "lr": 1.979573452135482e-06, "epoch": 0.1581204788425837, "percentage": 15.81, "elapsed_time": "0:10:05", "remaining_time": "0:53:46", "throughput": 12847.05, "total_tokens": 7784256} +{"current_steps": 2475, "total_steps": 15621, "loss": 0.3218, "lr": 1.979348152802955e-06, "epoch": 0.15844056078356059, "percentage": 15.84, "elapsed_time": "0:10:06", "remaining_time": "0:53:41", "throughput": 12858.04, "total_tokens": 7799232} +{"current_steps": 2480, "total_steps": 15621, "loss": 0.5854, "lr": 1.979121630742761e-06, "epoch": 0.1587606427245375, "percentage": 15.88, "elapsed_time": "0:10:07", "remaining_time": "0:53:37", "throughput": 12869.86, "total_tokens": 7815040} +{"current_steps": 2485, "total_steps": 15621, "loss": 0.4547, "lr": 1.9788938862377146e-06, "epoch": 0.15908072466551437, "percentage": 15.91, "elapsed_time": "0:10:07", "remaining_time": "0:53:33", "throughput": 12881.12, "total_tokens": 7830400} +{"current_steps": 2490, "total_steps": 15621, "loss": 0.4803, "lr": 1.9786649195721577e-06, "epoch": 0.15940080660649125, "percentage": 15.94, "elapsed_time": "0:10:08", "remaining_time": "0:53:29", "throughput": 12892.83, "total_tokens": 7846336} +{"current_steps": 2495, "total_steps": 15621, "loss": 0.6471, "lr": 1.978434731031958e-06, "epoch": 0.15972088854746816, "percentage": 15.97, "elapsed_time": "0:10:09", "remaining_time": "0:53:25", "throughput": 12904.96, "total_tokens": 7862528} +{"current_steps": 2500, "total_steps": 15621, "loss": 0.4554, "lr": 1.9782033209045085e-06, "epoch": 0.16004097048844504, "percentage": 16.0, "elapsed_time": "0:10:09", "remaining_time": "0:53:21", "throughput": 12918.18, "total_tokens": 7880000} +{"current_steps": 2505, "total_steps": 15621, "loss": 0.4114, "lr": 1.977970689478727e-06, "epoch": 0.16036105242942192, "percentage": 16.04, "elapsed_time": "0:10:10", "remaining_time": "0:53:17", "throughput": 12929.27, "total_tokens": 7895296} +{"current_steps": 2510, "total_steps": 15621, "loss": 0.5963, "lr": 1.9777368370450577e-06, "epoch": 0.16068113437039883, "percentage": 16.07, "elapsed_time": "0:10:11", "remaining_time": "0:53:13", "throughput": 12940.8, "total_tokens": 7911104} +{"current_steps": 2515, "total_steps": 15621, "loss": 0.5129, "lr": 1.9775017638954674e-06, "epoch": 0.1610012163113757, "percentage": 16.1, "elapsed_time": "0:10:11", "remaining_time": "0:53:09", "throughput": 12951.38, "total_tokens": 7925952} +{"current_steps": 2520, "total_steps": 15621, "loss": 0.6004, "lr": 1.9772654703234476e-06, "epoch": 0.1613212982523526, "percentage": 16.13, "elapsed_time": "0:10:12", "remaining_time": "0:53:04", "throughput": 12962.01, "total_tokens": 7940928} +{"current_steps": 2525, "total_steps": 15621, "loss": 0.5638, "lr": 1.977027956624014e-06, "epoch": 0.1616413801933295, "percentage": 16.16, "elapsed_time": "0:10:13", "remaining_time": "0:53:00", "throughput": 12971.96, "total_tokens": 7955200} +{"current_steps": 2530, "total_steps": 15621, "loss": 0.5759, "lr": 1.9767892230937046e-06, "epoch": 0.16196146213430637, "percentage": 16.2, "elapsed_time": "0:10:13", "remaining_time": "0:52:56", "throughput": 12983.27, "total_tokens": 7970944} +{"current_steps": 2535, "total_steps": 15621, "loss": 0.4305, "lr": 1.976549270030581e-06, "epoch": 0.16228154407528328, "percentage": 16.23, "elapsed_time": "0:10:14", "remaining_time": "0:52:52", "throughput": 12993.89, "total_tokens": 7985856} +{"current_steps": 2540, "total_steps": 15621, "loss": 0.4789, "lr": 1.9763080977342286e-06, "epoch": 0.16260162601626016, "percentage": 16.26, "elapsed_time": "0:10:15", "remaining_time": "0:52:48", "throughput": 13004.53, "total_tokens": 8001088} +{"current_steps": 2545, "total_steps": 15621, "loss": 0.4995, "lr": 1.9760657065057527e-06, "epoch": 0.16292170795723707, "percentage": 16.29, "elapsed_time": "0:10:15", "remaining_time": "0:52:44", "throughput": 13016.69, "total_tokens": 8017856} +{"current_steps": 2550, "total_steps": 15621, "loss": 0.4597, "lr": 1.975822096647782e-06, "epoch": 0.16324178989821395, "percentage": 16.32, "elapsed_time": "0:10:16", "remaining_time": "0:52:40", "throughput": 13028.18, "total_tokens": 8033792} +{"current_steps": 2555, "total_steps": 15621, "loss": 0.4952, "lr": 1.975577268464466e-06, "epoch": 0.16356187183919083, "percentage": 16.36, "elapsed_time": "0:10:17", "remaining_time": "0:52:36", "throughput": 13038.05, "total_tokens": 8048256} +{"current_steps": 2560, "total_steps": 15621, "loss": 0.5653, "lr": 1.9753312222614765e-06, "epoch": 0.16388195378016773, "percentage": 16.39, "elapsed_time": "0:10:17", "remaining_time": "0:52:32", "throughput": 13049.01, "total_tokens": 8063680} +{"current_steps": 2565, "total_steps": 15621, "loss": 0.4827, "lr": 1.9750839583460036e-06, "epoch": 0.1642020357211446, "percentage": 16.42, "elapsed_time": "0:10:18", "remaining_time": "0:52:28", "throughput": 13060.53, "total_tokens": 8079744} +{"current_steps": 2570, "total_steps": 15621, "loss": 0.5034, "lr": 1.9748354770267603e-06, "epoch": 0.1645221176621215, "percentage": 16.45, "elapsed_time": "0:10:19", "remaining_time": "0:52:24", "throughput": 13070.96, "total_tokens": 8094656} +{"current_steps": 2575, "total_steps": 15621, "loss": 0.5117, "lr": 1.9745857786139777e-06, "epoch": 0.1648421996030984, "percentage": 16.48, "elapsed_time": "0:10:19", "remaining_time": "0:52:21", "throughput": 13082.21, "total_tokens": 8110528} +{"current_steps": 2580, "total_steps": 15621, "loss": 0.6109, "lr": 1.974334863419408e-06, "epoch": 0.16516228154407528, "percentage": 16.52, "elapsed_time": "0:10:20", "remaining_time": "0:52:17", "throughput": 13093.67, "total_tokens": 8126720} +{"current_steps": 2585, "total_steps": 15621, "loss": 0.5038, "lr": 1.9740827317563212e-06, "epoch": 0.1654823634850522, "percentage": 16.55, "elapsed_time": "0:10:21", "remaining_time": "0:52:13", "throughput": 13103.65, "total_tokens": 8141312} +{"current_steps": 2590, "total_steps": 15621, "loss": 0.485, "lr": 1.973829383939507e-06, "epoch": 0.16580244542602907, "percentage": 16.58, "elapsed_time": "0:10:21", "remaining_time": "0:52:09", "throughput": 13114.32, "total_tokens": 8156736} +{"current_steps": 2595, "total_steps": 15621, "loss": 0.4978, "lr": 1.973574820285273e-06, "epoch": 0.16612252736700595, "percentage": 16.61, "elapsed_time": "0:10:22", "remaining_time": "0:52:05", "throughput": 13125.4, "total_tokens": 8172480} +{"current_steps": 2600, "total_steps": 15621, "loss": 0.581, "lr": 1.9733190411114443e-06, "epoch": 0.16644260930798285, "percentage": 16.64, "elapsed_time": "0:10:23", "remaining_time": "0:52:01", "throughput": 13136.31, "total_tokens": 8188224} +{"current_steps": 2605, "total_steps": 15621, "loss": 0.4388, "lr": 1.9730620467373654e-06, "epoch": 0.16676269124895973, "percentage": 16.68, "elapsed_time": "0:10:24", "remaining_time": "0:51:57", "throughput": 13147.79, "total_tokens": 8204352} +{"current_steps": 2610, "total_steps": 15621, "loss": 0.5835, "lr": 1.9728038374838958e-06, "epoch": 0.1670827731899366, "percentage": 16.71, "elapsed_time": "0:10:24", "remaining_time": "0:51:53", "throughput": 13158.11, "total_tokens": 8219328} +{"current_steps": 2615, "total_steps": 15621, "loss": 0.392, "lr": 1.972544413673413e-06, "epoch": 0.16740285513091352, "percentage": 16.74, "elapsed_time": "0:10:25", "remaining_time": "0:51:50", "throughput": 13168.48, "total_tokens": 8234560} +{"current_steps": 2620, "total_steps": 15621, "loss": 0.5766, "lr": 1.9722837756298108e-06, "epoch": 0.1677229370718904, "percentage": 16.77, "elapsed_time": "0:10:25", "remaining_time": "0:51:46", "throughput": 13178.44, "total_tokens": 8249344} +{"current_steps": 2625, "total_steps": 15621, "loss": 0.551, "lr": 1.972021923678499e-06, "epoch": 0.1680430190128673, "percentage": 16.8, "elapsed_time": "0:10:26", "remaining_time": "0:51:42", "throughput": 13189.73, "total_tokens": 8265600} +{"current_steps": 2630, "total_steps": 15621, "loss": 0.4822, "lr": 1.971758858146403e-06, "epoch": 0.16836310095384419, "percentage": 16.84, "elapsed_time": "0:10:27", "remaining_time": "0:51:38", "throughput": 13199.68, "total_tokens": 8280384} +{"current_steps": 2635, "total_steps": 15621, "loss": 0.4916, "lr": 1.9714945793619626e-06, "epoch": 0.16868318289482107, "percentage": 16.87, "elapsed_time": "0:10:27", "remaining_time": "0:51:34", "throughput": 13209.98, "total_tokens": 8295744} +{"current_steps": 2640, "total_steps": 15621, "loss": 0.52, "lr": 1.971229087655133e-06, "epoch": 0.16900326483579797, "percentage": 16.9, "elapsed_time": "0:10:28", "remaining_time": "0:51:31", "throughput": 13220.89, "total_tokens": 8311680} +{"current_steps": 2645, "total_steps": 15621, "loss": 0.4659, "lr": 1.9709623833573842e-06, "epoch": 0.16932334677677485, "percentage": 16.93, "elapsed_time": "0:10:29", "remaining_time": "0:51:27", "throughput": 13230.88, "total_tokens": 8326592} +{"current_steps": 2650, "total_steps": 15621, "loss": 0.4454, "lr": 1.9706944668016994e-06, "epoch": 0.16964342871775173, "percentage": 16.96, "elapsed_time": "0:10:29", "remaining_time": "0:51:23", "throughput": 13240.96, "total_tokens": 8341632} +{"current_steps": 2655, "total_steps": 15621, "loss": 0.4643, "lr": 1.9704253383225756e-06, "epoch": 0.16996351065872864, "percentage": 17.0, "elapsed_time": "0:10:30", "remaining_time": "0:51:20", "throughput": 13252.6, "total_tokens": 8358400} +{"current_steps": 2660, "total_steps": 15621, "loss": 0.4813, "lr": 1.970154998256023e-06, "epoch": 0.17028359259970552, "percentage": 17.03, "elapsed_time": "0:10:31", "remaining_time": "0:51:16", "throughput": 13263.39, "total_tokens": 8374144} +{"current_steps": 2665, "total_steps": 15621, "loss": 0.4266, "lr": 1.9698834469395644e-06, "epoch": 0.17060367454068243, "percentage": 17.06, "elapsed_time": "0:10:32", "remaining_time": "0:51:12", "throughput": 13273.71, "total_tokens": 8389440} +{"current_steps": 2670, "total_steps": 15621, "loss": 0.5565, "lr": 1.969610684712234e-06, "epoch": 0.1709237564816593, "percentage": 17.09, "elapsed_time": "0:10:32", "remaining_time": "0:51:08", "throughput": 13283.97, "total_tokens": 8404672} +{"current_steps": 2675, "total_steps": 15621, "loss": 0.5696, "lr": 1.9693367119145794e-06, "epoch": 0.17124383842263619, "percentage": 17.12, "elapsed_time": "0:10:33", "remaining_time": "0:51:05", "throughput": 13294.33, "total_tokens": 8420096} +{"current_steps": 2680, "total_steps": 15621, "loss": 0.6647, "lr": 1.969061528888659e-06, "epoch": 0.1715639203636131, "percentage": 17.16, "elapsed_time": "0:10:34", "remaining_time": "0:51:01", "throughput": 13305.35, "total_tokens": 8436288} +{"current_steps": 2685, "total_steps": 15621, "loss": 0.549, "lr": 1.9687851359780415e-06, "epoch": 0.17188400230458997, "percentage": 17.19, "elapsed_time": "0:10:34", "remaining_time": "0:50:58", "throughput": 13316.55, "total_tokens": 8452672} +{"current_steps": 2690, "total_steps": 15621, "loss": 0.4875, "lr": 1.968507533527807e-06, "epoch": 0.17220408424556685, "percentage": 17.22, "elapsed_time": "0:10:35", "remaining_time": "0:50:54", "throughput": 13327.8, "total_tokens": 8469120} +{"current_steps": 2695, "total_steps": 15621, "loss": 0.4694, "lr": 1.9682287218845455e-06, "epoch": 0.17252416618654376, "percentage": 17.25, "elapsed_time": "0:10:36", "remaining_time": "0:50:51", "throughput": 13338.19, "total_tokens": 8484736} +{"current_steps": 2700, "total_steps": 15621, "loss": 0.7448, "lr": 1.967948701396356e-06, "epoch": 0.17284424812752064, "percentage": 17.28, "elapsed_time": "0:10:36", "remaining_time": "0:50:47", "throughput": 13348.83, "total_tokens": 8500480} +{"current_steps": 2705, "total_steps": 15621, "loss": 0.3988, "lr": 1.9676674724128485e-06, "epoch": 0.17316433006849755, "percentage": 17.32, "elapsed_time": "0:10:37", "remaining_time": "0:50:43", "throughput": 13357.9, "total_tokens": 8514624} +{"current_steps": 2710, "total_steps": 15621, "loss": 0.4666, "lr": 1.9673850352851397e-06, "epoch": 0.17348441200947443, "percentage": 17.35, "elapsed_time": "0:10:38", "remaining_time": "0:50:39", "throughput": 13367.67, "total_tokens": 8529664} +{"current_steps": 2715, "total_steps": 15621, "loss": 0.5852, "lr": 1.967101390365856e-06, "epoch": 0.1738044939504513, "percentage": 17.38, "elapsed_time": "0:10:38", "remaining_time": "0:50:36", "throughput": 13378.22, "total_tokens": 8545280} +{"current_steps": 2720, "total_steps": 15621, "loss": 0.4975, "lr": 1.966816538009131e-06, "epoch": 0.1741245758914282, "percentage": 17.41, "elapsed_time": "0:10:39", "remaining_time": "0:50:32", "throughput": 13388.25, "total_tokens": 8560384} +{"current_steps": 2725, "total_steps": 15621, "loss": 0.538, "lr": 1.966530478570607e-06, "epoch": 0.1744446578324051, "percentage": 17.44, "elapsed_time": "0:10:40", "remaining_time": "0:50:29", "throughput": 13399.6, "total_tokens": 8576960} +{"current_steps": 2730, "total_steps": 15621, "loss": 0.4686, "lr": 1.9662432124074325e-06, "epoch": 0.174764739773382, "percentage": 17.48, "elapsed_time": "0:10:40", "remaining_time": "0:50:25", "throughput": 13409.69, "total_tokens": 8592384} +{"current_steps": 2735, "total_steps": 15621, "loss": 0.4889, "lr": 1.965954739878262e-06, "epoch": 0.17508482171435888, "percentage": 17.51, "elapsed_time": "0:10:41", "remaining_time": "0:50:22", "throughput": 13421.0, "total_tokens": 8609024} +{"current_steps": 2740, "total_steps": 15621, "loss": 0.4298, "lr": 1.965665061343257e-06, "epoch": 0.17540490365533576, "percentage": 17.54, "elapsed_time": "0:10:42", "remaining_time": "0:50:18", "throughput": 13431.53, "total_tokens": 8624768} +{"current_steps": 2745, "total_steps": 15621, "loss": 0.4643, "lr": 1.965374177164085e-06, "epoch": 0.17572498559631267, "percentage": 17.57, "elapsed_time": "0:10:42", "remaining_time": "0:50:15", "throughput": 13441.81, "total_tokens": 8640448} +{"current_steps": 2750, "total_steps": 15621, "loss": 0.5569, "lr": 1.9650820877039182e-06, "epoch": 0.17604506753728955, "percentage": 17.6, "elapsed_time": "0:10:43", "remaining_time": "0:50:11", "throughput": 13451.43, "total_tokens": 8655296} +{"current_steps": 2755, "total_steps": 15621, "loss": 0.4903, "lr": 1.9647887933274334e-06, "epoch": 0.17636514947826643, "percentage": 17.64, "elapsed_time": "0:10:44", "remaining_time": "0:50:08", "throughput": 13462.49, "total_tokens": 8671872} +{"current_steps": 2760, "total_steps": 15621, "loss": 0.4835, "lr": 1.9644942944008124e-06, "epoch": 0.17668523141924333, "percentage": 17.67, "elapsed_time": "0:10:44", "remaining_time": "0:50:04", "throughput": 13473.05, "total_tokens": 8687680} +{"current_steps": 2765, "total_steps": 15621, "loss": 0.6033, "lr": 1.96419859129174e-06, "epoch": 0.1770053133602202, "percentage": 17.7, "elapsed_time": "0:10:45", "remaining_time": "0:50:01", "throughput": 13482.81, "total_tokens": 8702912} +{"current_steps": 2770, "total_steps": 15621, "loss": 0.467, "lr": 1.963901684369406e-06, "epoch": 0.17732539530119712, "percentage": 17.73, "elapsed_time": "0:10:46", "remaining_time": "0:49:57", "throughput": 13492.75, "total_tokens": 8718144} +{"current_steps": 2775, "total_steps": 15621, "loss": 0.5107, "lr": 1.9636035740045013e-06, "epoch": 0.177645477242174, "percentage": 17.76, "elapsed_time": "0:10:46", "remaining_time": "0:49:54", "throughput": 13502.36, "total_tokens": 8732992} +{"current_steps": 2780, "total_steps": 15621, "loss": 0.6129, "lr": 1.9633042605692207e-06, "epoch": 0.17796555918315088, "percentage": 17.8, "elapsed_time": "0:10:47", "remaining_time": "0:49:50", "throughput": 13512.91, "total_tokens": 8749056} +{"current_steps": 2785, "total_steps": 15621, "loss": 0.4943, "lr": 1.9630037444372597e-06, "epoch": 0.17828564112412779, "percentage": 17.83, "elapsed_time": "0:10:48", "remaining_time": "0:49:47", "throughput": 13523.51, "total_tokens": 8765184} +{"current_steps": 2790, "total_steps": 15621, "loss": 0.4163, "lr": 1.9627020259838177e-06, "epoch": 0.17860572306510467, "percentage": 17.86, "elapsed_time": "0:10:48", "remaining_time": "0:49:43", "throughput": 13533.39, "total_tokens": 8780480} +{"current_steps": 2795, "total_steps": 15621, "loss": 0.5605, "lr": 1.9623991055855925e-06, "epoch": 0.17892580500608155, "percentage": 17.89, "elapsed_time": "0:10:49", "remaining_time": "0:49:40", "throughput": 13543.75, "total_tokens": 8796352} +{"current_steps": 2800, "total_steps": 15621, "loss": 0.4507, "lr": 1.962094983620784e-06, "epoch": 0.17924588694705845, "percentage": 17.92, "elapsed_time": "0:10:50", "remaining_time": "0:49:36", "throughput": 13552.55, "total_tokens": 8810688} +{"current_steps": 2805, "total_steps": 15621, "loss": 0.4204, "lr": 1.9617896604690925e-06, "epoch": 0.17956596888803533, "percentage": 17.96, "elapsed_time": "0:10:50", "remaining_time": "0:49:33", "throughput": 13562.59, "total_tokens": 8826304} +{"current_steps": 2810, "total_steps": 15621, "loss": 0.4545, "lr": 1.961483136511717e-06, "epoch": 0.17988605082901224, "percentage": 17.99, "elapsed_time": "0:10:51", "remaining_time": "0:49:29", "throughput": 13572.04, "total_tokens": 8841344} +{"current_steps": 2815, "total_steps": 15621, "loss": 0.6135, "lr": 1.9611754121313567e-06, "epoch": 0.18020613276998912, "percentage": 18.02, "elapsed_time": "0:10:52", "remaining_time": "0:49:26", "throughput": 13582.81, "total_tokens": 8857664} +{"current_steps": 2820, "total_steps": 15621, "loss": 0.5854, "lr": 1.960866487712209e-06, "epoch": 0.180526214710966, "percentage": 18.05, "elapsed_time": "0:10:52", "remaining_time": "0:49:23", "throughput": 13592.87, "total_tokens": 8873408} +{"current_steps": 2825, "total_steps": 15621, "loss": 0.4328, "lr": 1.9605563636399695e-06, "epoch": 0.1808462966519429, "percentage": 18.08, "elapsed_time": "0:10:53", "remaining_time": "0:49:19", "throughput": 13603.32, "total_tokens": 8889472} +{"current_steps": 2830, "total_steps": 15621, "loss": 0.6013, "lr": 1.9602450403018315e-06, "epoch": 0.18116637859291979, "percentage": 18.12, "elapsed_time": "0:10:54", "remaining_time": "0:49:16", "throughput": 13612.87, "total_tokens": 8904640} +{"current_steps": 2835, "total_steps": 15621, "loss": 0.4548, "lr": 1.9599325180864864e-06, "epoch": 0.18148646053389667, "percentage": 18.15, "elapsed_time": "0:10:54", "remaining_time": "0:49:13", "throughput": 13622.29, "total_tokens": 8919680} +{"current_steps": 2840, "total_steps": 15621, "loss": 0.446, "lr": 1.9596187973841216e-06, "epoch": 0.18180654247487357, "percentage": 18.18, "elapsed_time": "0:10:55", "remaining_time": "0:49:09", "throughput": 13632.3, "total_tokens": 8935360} +{"current_steps": 2845, "total_steps": 15621, "loss": 0.4871, "lr": 1.959303878586421e-06, "epoch": 0.18212662441585045, "percentage": 18.21, "elapsed_time": "0:10:56", "remaining_time": "0:49:06", "throughput": 13642.79, "total_tokens": 8951552} +{"current_steps": 2850, "total_steps": 15621, "loss": 0.585, "lr": 1.9589877620865647e-06, "epoch": 0.18244670635682736, "percentage": 18.24, "elapsed_time": "0:10:56", "remaining_time": "0:49:03", "throughput": 13653.96, "total_tokens": 8968576} +{"current_steps": 2855, "total_steps": 15621, "loss": 0.4598, "lr": 1.9586704482792277e-06, "epoch": 0.18276678829780424, "percentage": 18.28, "elapsed_time": "0:10:57", "remaining_time": "0:48:59", "throughput": 13663.49, "total_tokens": 8983744} +{"current_steps": 2860, "total_steps": 15621, "loss": 0.4344, "lr": 1.95835193756058e-06, "epoch": 0.18308687023878112, "percentage": 18.31, "elapsed_time": "0:10:58", "remaining_time": "0:48:56", "throughput": 13672.89, "total_tokens": 8999040} +{"current_steps": 2865, "total_steps": 15621, "loss": 0.4269, "lr": 1.9580322303282858e-06, "epoch": 0.18340695217975803, "percentage": 18.34, "elapsed_time": "0:10:58", "remaining_time": "0:48:53", "throughput": 13683.84, "total_tokens": 9015872} +{"current_steps": 2870, "total_steps": 15621, "loss": 0.4106, "lr": 1.9577113269815038e-06, "epoch": 0.1837270341207349, "percentage": 18.37, "elapsed_time": "0:10:59", "remaining_time": "0:48:50", "throughput": 13693.93, "total_tokens": 9031744} +{"current_steps": 2875, "total_steps": 15621, "loss": 0.5936, "lr": 1.957389227920885e-06, "epoch": 0.18404711606171179, "percentage": 18.4, "elapsed_time": "0:11:00", "remaining_time": "0:48:47", "throughput": 13704.04, "total_tokens": 9047872} +{"current_steps": 2880, "total_steps": 15621, "loss": 0.5225, "lr": 1.957065933548574e-06, "epoch": 0.1843671980026887, "percentage": 18.44, "elapsed_time": "0:11:00", "remaining_time": "0:48:43", "throughput": 13713.32, "total_tokens": 9062976} +{"current_steps": 2885, "total_steps": 15621, "loss": 0.5881, "lr": 1.956741444268208e-06, "epoch": 0.18468727994366557, "percentage": 18.47, "elapsed_time": "0:11:01", "remaining_time": "0:48:40", "throughput": 13722.8, "total_tokens": 9078208} +{"current_steps": 2890, "total_steps": 15621, "loss": 0.4778, "lr": 1.9564157604849154e-06, "epoch": 0.18500736188464248, "percentage": 18.5, "elapsed_time": "0:11:02", "remaining_time": "0:48:37", "throughput": 13733.13, "total_tokens": 9094720} +{"current_steps": 2895, "total_steps": 15621, "loss": 0.529, "lr": 1.9560888826053163e-06, "epoch": 0.18532744382561936, "percentage": 18.53, "elapsed_time": "0:11:02", "remaining_time": "0:48:34", "throughput": 13742.84, "total_tokens": 9110336} +{"current_steps": 2900, "total_steps": 15621, "loss": 0.5617, "lr": 1.9557608110375212e-06, "epoch": 0.18564752576659624, "percentage": 18.56, "elapsed_time": "0:11:03", "remaining_time": "0:48:30", "throughput": 13753.34, "total_tokens": 9126912} +{"current_steps": 2905, "total_steps": 15621, "loss": 0.5447, "lr": 1.955431546191132e-06, "epoch": 0.18596760770757315, "percentage": 18.6, "elapsed_time": "0:11:04", "remaining_time": "0:48:27", "throughput": 13762.96, "total_tokens": 9142400} +{"current_steps": 2910, "total_steps": 15621, "loss": 0.5254, "lr": 1.95510108847724e-06, "epoch": 0.18628768964855003, "percentage": 18.63, "elapsed_time": "0:11:04", "remaining_time": "0:48:24", "throughput": 13771.86, "total_tokens": 9157184} +{"current_steps": 2915, "total_steps": 15621, "loss": 0.526, "lr": 1.954769438308424e-06, "epoch": 0.1866077715895269, "percentage": 18.66, "elapsed_time": "0:11:05", "remaining_time": "0:48:21", "throughput": 13782.17, "total_tokens": 9173696} +{"current_steps": 2920, "total_steps": 15621, "loss": 0.5085, "lr": 1.954436596098754e-06, "epoch": 0.1869278535305038, "percentage": 18.69, "elapsed_time": "0:11:06", "remaining_time": "0:48:18", "throughput": 13792.35, "total_tokens": 9190080} +{"current_steps": 2925, "total_steps": 15621, "loss": 0.5828, "lr": 1.9541025622637875e-06, "epoch": 0.1872479354714807, "percentage": 18.72, "elapsed_time": "0:11:06", "remaining_time": "0:48:14", "throughput": 13800.62, "total_tokens": 9204352} +{"current_steps": 2930, "total_steps": 15621, "loss": 0.6086, "lr": 1.95376733722057e-06, "epoch": 0.1875680174124576, "percentage": 18.76, "elapsed_time": "0:11:07", "remaining_time": "0:48:11", "throughput": 13809.54, "total_tokens": 9219200} +{"current_steps": 2935, "total_steps": 15621, "loss": 0.4778, "lr": 1.9534309213876337e-06, "epoch": 0.18788809935343448, "percentage": 18.79, "elapsed_time": "0:11:08", "remaining_time": "0:48:08", "throughput": 13818.06, "total_tokens": 9233600} +{"current_steps": 2940, "total_steps": 15621, "loss": 0.4369, "lr": 1.953093315184997e-06, "epoch": 0.18820818129441136, "percentage": 18.82, "elapsed_time": "0:11:08", "remaining_time": "0:48:05", "throughput": 13827.91, "total_tokens": 9249536} +{"current_steps": 2945, "total_steps": 15621, "loss": 0.6525, "lr": 1.952754519034166e-06, "epoch": 0.18852826323538827, "percentage": 18.85, "elapsed_time": "0:11:09", "remaining_time": "0:48:01", "throughput": 13836.66, "total_tokens": 9264256} +{"current_steps": 2950, "total_steps": 15621, "loss": 0.4542, "lr": 1.9524145333581313e-06, "epoch": 0.18884834517636515, "percentage": 18.88, "elapsed_time": "0:11:10", "remaining_time": "0:47:58", "throughput": 13845.81, "total_tokens": 9279488} +{"current_steps": 2955, "total_steps": 15621, "loss": 0.5187, "lr": 1.952073358581369e-06, "epoch": 0.18916842711734205, "percentage": 18.92, "elapsed_time": "0:11:10", "remaining_time": "0:47:55", "throughput": 13854.6, "total_tokens": 9294336} +{"current_steps": 2960, "total_steps": 15621, "loss": 0.5615, "lr": 1.95173099512984e-06, "epoch": 0.18948850905831893, "percentage": 18.95, "elapsed_time": "0:11:11", "remaining_time": "0:47:52", "throughput": 13863.6, "total_tokens": 9309376} +{"current_steps": 2965, "total_steps": 15621, "loss": 0.4698, "lr": 1.9513874434309894e-06, "epoch": 0.1898085909992958, "percentage": 18.98, "elapsed_time": "0:11:12", "remaining_time": "0:47:49", "throughput": 13872.32, "total_tokens": 9324224} +{"current_steps": 2970, "total_steps": 15621, "loss": 0.4491, "lr": 1.951042703913745e-06, "epoch": 0.19012867294027272, "percentage": 19.01, "elapsed_time": "0:11:12", "remaining_time": "0:47:45", "throughput": 13881.12, "total_tokens": 9339136} +{"current_steps": 2975, "total_steps": 15621, "loss": 0.4492, "lr": 1.950696777008518e-06, "epoch": 0.1904487548812496, "percentage": 19.04, "elapsed_time": "0:11:13", "remaining_time": "0:47:42", "throughput": 13890.45, "total_tokens": 9354688} +{"current_steps": 2980, "total_steps": 15621, "loss": 0.4948, "lr": 1.9503496631472025e-06, "epoch": 0.19076883682222648, "percentage": 19.08, "elapsed_time": "0:11:14", "remaining_time": "0:47:39", "throughput": 13899.22, "total_tokens": 9369664} +{"current_steps": 2985, "total_steps": 15621, "loss": 0.6353, "lr": 1.9500013627631746e-06, "epoch": 0.19108891876320339, "percentage": 19.11, "elapsed_time": "0:11:14", "remaining_time": "0:47:36", "throughput": 13908.12, "total_tokens": 9384768} +{"current_steps": 2990, "total_steps": 15621, "loss": 0.3771, "lr": 1.949651876291291e-06, "epoch": 0.19140900070418027, "percentage": 19.14, "elapsed_time": "0:11:15", "remaining_time": "0:47:33", "throughput": 13917.44, "total_tokens": 9400320} +{"current_steps": 2995, "total_steps": 15621, "loss": 0.4872, "lr": 1.9493012041678894e-06, "epoch": 0.19172908264515717, "percentage": 19.17, "elapsed_time": "0:11:16", "remaining_time": "0:47:30", "throughput": 13926.55, "total_tokens": 9415872} +{"current_steps": 3000, "total_steps": 15621, "loss": 0.5988, "lr": 1.9489493468307883e-06, "epoch": 0.19204916458613405, "percentage": 19.2, "elapsed_time": "0:11:16", "remaining_time": "0:47:27", "throughput": 13936.94, "total_tokens": 9432704} +{"current_steps": 3005, "total_steps": 15621, "loss": 0.5456, "lr": 1.948596304719286e-06, "epoch": 0.19236924652711093, "percentage": 19.24, "elapsed_time": "0:11:17", "remaining_time": "0:47:24", "throughput": 13946.03, "total_tokens": 9448192} +{"current_steps": 3010, "total_steps": 15621, "loss": 0.4447, "lr": 1.9482420782741594e-06, "epoch": 0.19268932846808784, "percentage": 19.27, "elapsed_time": "0:11:18", "remaining_time": "0:47:21", "throughput": 13955.92, "total_tokens": 9464576} +{"current_steps": 3015, "total_steps": 15621, "loss": 0.5591, "lr": 1.9478866679376647e-06, "epoch": 0.19300941040906472, "percentage": 19.3, "elapsed_time": "0:11:18", "remaining_time": "0:47:18", "throughput": 13964.98, "total_tokens": 9479936} +{"current_steps": 3020, "total_steps": 15621, "loss": 0.5564, "lr": 1.9475300741535353e-06, "epoch": 0.1933294923500416, "percentage": 19.33, "elapsed_time": "0:11:19", "remaining_time": "0:47:15", "throughput": 13975.72, "total_tokens": 9497280} +{"current_steps": 3025, "total_steps": 15621, "loss": 0.4714, "lr": 1.9471722973669833e-06, "epoch": 0.1936495742910185, "percentage": 19.36, "elapsed_time": "0:11:20", "remaining_time": "0:47:12", "throughput": 13986.42, "total_tokens": 9514496} +{"current_steps": 3030, "total_steps": 15621, "loss": 0.3979, "lr": 1.946813338024697e-06, "epoch": 0.19396965623199539, "percentage": 19.4, "elapsed_time": "0:11:20", "remaining_time": "0:47:09", "throughput": 13995.15, "total_tokens": 9529536} +{"current_steps": 3035, "total_steps": 15621, "loss": 0.5342, "lr": 1.9464531965748414e-06, "epoch": 0.1942897381729723, "percentage": 19.43, "elapsed_time": "0:11:21", "remaining_time": "0:47:06", "throughput": 14004.64, "total_tokens": 9545472} +{"current_steps": 3040, "total_steps": 15621, "loss": 0.5827, "lr": 1.9460918734670573e-06, "epoch": 0.19460982011394917, "percentage": 19.46, "elapsed_time": "0:11:22", "remaining_time": "0:47:03", "throughput": 14013.59, "total_tokens": 9560960} +{"current_steps": 3045, "total_steps": 15621, "loss": 0.543, "lr": 1.945729369152461e-06, "epoch": 0.19492990205492605, "percentage": 19.49, "elapsed_time": "0:11:22", "remaining_time": "0:47:00", "throughput": 14022.34, "total_tokens": 9576320} +{"current_steps": 3050, "total_steps": 15621, "loss": 0.5533, "lr": 1.945365684083643e-06, "epoch": 0.19524998399590296, "percentage": 19.52, "elapsed_time": "0:11:23", "remaining_time": "0:46:57", "throughput": 14031.48, "total_tokens": 9592192} +{"current_steps": 3055, "total_steps": 15621, "loss": 0.615, "lr": 1.945000818714668e-06, "epoch": 0.19557006593687984, "percentage": 19.56, "elapsed_time": "0:11:24", "remaining_time": "0:46:54", "throughput": 14040.85, "total_tokens": 9608128} +{"current_steps": 3060, "total_steps": 15621, "loss": 0.546, "lr": 1.944634773501076e-06, "epoch": 0.19589014787785672, "percentage": 19.59, "elapsed_time": "0:11:24", "remaining_time": "0:46:51", "throughput": 14050.11, "total_tokens": 9623872} +{"current_steps": 3065, "total_steps": 15621, "loss": 0.5662, "lr": 1.9442675488998783e-06, "epoch": 0.19621022981883363, "percentage": 19.62, "elapsed_time": "0:11:25", "remaining_time": "0:46:48", "throughput": 14058.96, "total_tokens": 9639488} +{"current_steps": 3070, "total_steps": 15621, "loss": 0.5017, "lr": 1.9438991453695587e-06, "epoch": 0.1965303117598105, "percentage": 19.65, "elapsed_time": "0:11:26", "remaining_time": "0:46:45", "throughput": 14068.37, "total_tokens": 9655680} +{"current_steps": 3075, "total_steps": 15621, "loss": 0.5648, "lr": 1.943529563370073e-06, "epoch": 0.1968503937007874, "percentage": 19.69, "elapsed_time": "0:11:26", "remaining_time": "0:46:42", "throughput": 14076.7, "total_tokens": 9670400} +{"current_steps": 3080, "total_steps": 15621, "loss": 0.3815, "lr": 1.9431588033628495e-06, "epoch": 0.1971704756417643, "percentage": 19.72, "elapsed_time": "0:11:27", "remaining_time": "0:46:39", "throughput": 14085.2, "total_tokens": 9685504} +{"current_steps": 3085, "total_steps": 15621, "loss": 0.6302, "lr": 1.9427868658107862e-06, "epoch": 0.19749055758274117, "percentage": 19.75, "elapsed_time": "0:11:28", "remaining_time": "0:46:37", "throughput": 14094.83, "total_tokens": 9701952} +{"current_steps": 3090, "total_steps": 15621, "loss": 0.449, "lr": 1.942413751178251e-06, "epoch": 0.19781063952371808, "percentage": 19.78, "elapsed_time": "0:11:28", "remaining_time": "0:46:34", "throughput": 14103.31, "total_tokens": 9716928} +{"current_steps": 3095, "total_steps": 15621, "loss": 0.6552, "lr": 1.9420394599310826e-06, "epoch": 0.19813072146469496, "percentage": 19.81, "elapsed_time": "0:11:29", "remaining_time": "0:46:31", "throughput": 14111.86, "total_tokens": 9732096} +{"current_steps": 3100, "total_steps": 15621, "loss": 0.5247, "lr": 1.941663992536588e-06, "epoch": 0.19845080340567184, "percentage": 19.85, "elapsed_time": "0:11:30", "remaining_time": "0:46:28", "throughput": 14120.64, "total_tokens": 9747648} +{"current_steps": 3105, "total_steps": 15621, "loss": 0.4467, "lr": 1.941287349463542e-06, "epoch": 0.19877088534664875, "percentage": 19.88, "elapsed_time": "0:11:30", "remaining_time": "0:46:25", "throughput": 14129.29, "total_tokens": 9763072} +{"current_steps": 3110, "total_steps": 15621, "loss": 0.4856, "lr": 1.940909531182188e-06, "epoch": 0.19909096728762563, "percentage": 19.91, "elapsed_time": "0:11:31", "remaining_time": "0:46:22", "throughput": 14137.75, "total_tokens": 9778176} +{"current_steps": 3115, "total_steps": 15621, "loss": 0.6168, "lr": 1.9405305381642375e-06, "epoch": 0.19941104922860253, "percentage": 19.94, "elapsed_time": "0:11:32", "remaining_time": "0:46:19", "throughput": 14146.52, "total_tokens": 9793536} +{"current_steps": 3120, "total_steps": 15621, "loss": 0.5055, "lr": 1.9401503708828665e-06, "epoch": 0.1997311311695794, "percentage": 19.97, "elapsed_time": "0:11:32", "remaining_time": "0:46:16", "throughput": 14154.66, "total_tokens": 9808192} +{"current_steps": 3125, "total_steps": 15621, "loss": 0.5889, "lr": 1.939769029812719e-06, "epoch": 0.2000512131105563, "percentage": 20.01, "elapsed_time": "0:11:33", "remaining_time": "0:46:13", "throughput": 14163.01, "total_tokens": 9823232} +{"current_steps": 3128, "total_steps": 15621, "eval_loss": 0.4917045831680298, "epoch": 0.20024326227514244, "percentage": 20.02, "elapsed_time": "0:12:24", "remaining_time": "0:49:33", "throughput": 13205.54, "total_tokens": 9832064} +{"current_steps": 3130, "total_steps": 15621, "loss": 0.5998, "lr": 1.939386515429904e-06, "epoch": 0.2003712950515332, "percentage": 20.04, "elapsed_time": "0:13:19", "remaining_time": "0:53:08", "throughput": 12314.28, "total_tokens": 9839488} +{"current_steps": 3135, "total_steps": 15621, "loss": 0.4234, "lr": 1.9390028282119942e-06, "epoch": 0.20069137699251008, "percentage": 20.07, "elapsed_time": "0:13:19", "remaining_time": "0:53:05", "throughput": 12324.44, "total_tokens": 9856192} +{"current_steps": 3140, "total_steps": 15621, "loss": 0.5139, "lr": 1.938617968638029e-06, "epoch": 0.201011458933487, "percentage": 20.1, "elapsed_time": "0:13:20", "remaining_time": "0:53:01", "throughput": 12333.29, "total_tokens": 9871552} +{"current_steps": 3145, "total_steps": 15621, "loss": 0.5103, "lr": 1.938231937188509e-06, "epoch": 0.20133154087446387, "percentage": 20.13, "elapsed_time": "0:13:21", "remaining_time": "0:52:57", "throughput": 12341.56, "total_tokens": 9886016} +{"current_steps": 3150, "total_steps": 15621, "loss": 0.6257, "lr": 1.9378447343453995e-06, "epoch": 0.20165162281544075, "percentage": 20.17, "elapsed_time": "0:13:21", "remaining_time": "0:52:54", "throughput": 12352.25, "total_tokens": 9903552} +{"current_steps": 3155, "total_steps": 15621, "loss": 0.3501, "lr": 1.9374563605921275e-06, "epoch": 0.20197170475641765, "percentage": 20.2, "elapsed_time": "0:13:22", "remaining_time": "0:52:50", "throughput": 12362.31, "total_tokens": 9920320} +{"current_steps": 3160, "total_steps": 15621, "loss": 0.5844, "lr": 1.937066816413582e-06, "epoch": 0.20229178669739453, "percentage": 20.23, "elapsed_time": "0:13:23", "remaining_time": "0:52:47", "throughput": 12371.51, "total_tokens": 9935936} +{"current_steps": 3165, "total_steps": 15621, "loss": 0.4866, "lr": 1.9366761022961146e-06, "epoch": 0.2026118686383714, "percentage": 20.26, "elapsed_time": "0:13:23", "remaining_time": "0:52:43", "throughput": 12380.03, "total_tokens": 9950912} +{"current_steps": 3170, "total_steps": 15621, "loss": 0.5726, "lr": 1.9362842187275354e-06, "epoch": 0.20293195057934832, "percentage": 20.29, "elapsed_time": "0:13:24", "remaining_time": "0:52:39", "throughput": 12388.74, "total_tokens": 9966080} +{"current_steps": 3175, "total_steps": 15621, "loss": 0.4769, "lr": 1.9358911661971155e-06, "epoch": 0.2032520325203252, "percentage": 20.33, "elapsed_time": "0:13:25", "remaining_time": "0:52:36", "throughput": 12396.19, "total_tokens": 9982080} +{"current_steps": 3180, "total_steps": 15621, "loss": 0.4818, "lr": 1.9354969451955864e-06, "epoch": 0.2035721144613021, "percentage": 20.36, "elapsed_time": "0:13:25", "remaining_time": "0:52:32", "throughput": 12404.36, "total_tokens": 9996544} +{"current_steps": 3185, "total_steps": 15621, "loss": 0.5595, "lr": 1.9351015562151375e-06, "epoch": 0.20389219640227899, "percentage": 20.39, "elapsed_time": "0:13:26", "remaining_time": "0:52:29", "throughput": 12413.18, "total_tokens": 10011776} +{"current_steps": 3190, "total_steps": 15621, "loss": 0.4337, "lr": 1.934704999749416e-06, "epoch": 0.20421227834325587, "percentage": 20.42, "elapsed_time": "0:13:27", "remaining_time": "0:52:25", "throughput": 12422.1, "total_tokens": 10027264} +{"current_steps": 3195, "total_steps": 15621, "loss": 0.4251, "lr": 1.9343072762935274e-06, "epoch": 0.20453236028423277, "percentage": 20.45, "elapsed_time": "0:13:27", "remaining_time": "0:52:21", "throughput": 12430.84, "total_tokens": 10042432} +{"current_steps": 3200, "total_steps": 15621, "loss": 0.4122, "lr": 1.933908386344035e-06, "epoch": 0.20485244222520965, "percentage": 20.49, "elapsed_time": "0:13:28", "remaining_time": "0:52:18", "throughput": 12439.55, "total_tokens": 10057792} +{"current_steps": 3205, "total_steps": 15621, "loss": 0.528, "lr": 1.9335083303989565e-06, "epoch": 0.20517252416618653, "percentage": 20.52, "elapsed_time": "0:13:29", "remaining_time": "0:52:14", "throughput": 12449.78, "total_tokens": 10074752} +{"current_steps": 3210, "total_steps": 15621, "loss": 0.5767, "lr": 1.9331071089577674e-06, "epoch": 0.20549260610716344, "percentage": 20.55, "elapsed_time": "0:13:29", "remaining_time": "0:52:11", "throughput": 12459.1, "total_tokens": 10090752} +{"current_steps": 3215, "total_steps": 15621, "loss": 0.5028, "lr": 1.9327047225213963e-06, "epoch": 0.20581268804814032, "percentage": 20.58, "elapsed_time": "0:13:30", "remaining_time": "0:52:07", "throughput": 12467.96, "total_tokens": 10106240} +{"current_steps": 3220, "total_steps": 15621, "loss": 0.4154, "lr": 1.9323011715922283e-06, "epoch": 0.20613276998911723, "percentage": 20.61, "elapsed_time": "0:13:31", "remaining_time": "0:52:04", "throughput": 12476.89, "total_tokens": 10121856} +{"current_steps": 3225, "total_steps": 15621, "loss": 0.4682, "lr": 1.931896456674101e-06, "epoch": 0.2064528519300941, "percentage": 20.65, "elapsed_time": "0:13:31", "remaining_time": "0:52:00", "throughput": 12485.81, "total_tokens": 10137408} +{"current_steps": 3230, "total_steps": 15621, "loss": 0.4611, "lr": 1.931490578272306e-06, "epoch": 0.20677293387107099, "percentage": 20.68, "elapsed_time": "0:13:32", "remaining_time": "0:51:57", "throughput": 12494.41, "total_tokens": 10152640} +{"current_steps": 3235, "total_steps": 15621, "loss": 0.3551, "lr": 1.9310835368935867e-06, "epoch": 0.2070930158120479, "percentage": 20.71, "elapsed_time": "0:13:33", "remaining_time": "0:51:53", "throughput": 12503.03, "total_tokens": 10167936} +{"current_steps": 3240, "total_steps": 15621, "loss": 0.4241, "lr": 1.93067533304614e-06, "epoch": 0.20741309775302477, "percentage": 20.74, "elapsed_time": "0:13:33", "remaining_time": "0:51:50", "throughput": 12511.83, "total_tokens": 10183360} +{"current_steps": 3245, "total_steps": 15621, "loss": 0.5624, "lr": 1.9302659672396128e-06, "epoch": 0.20773317969400165, "percentage": 20.77, "elapsed_time": "0:13:34", "remaining_time": "0:51:46", "throughput": 12520.23, "total_tokens": 10198208} +{"current_steps": 3250, "total_steps": 15621, "loss": 0.4975, "lr": 1.9298554399851025e-06, "epoch": 0.20805326163497856, "percentage": 20.81, "elapsed_time": "0:13:35", "remaining_time": "0:51:43", "throughput": 12528.91, "total_tokens": 10213568} +{"current_steps": 3255, "total_steps": 15621, "loss": 0.4755, "lr": 1.929443751795158e-06, "epoch": 0.20837334357595544, "percentage": 20.84, "elapsed_time": "0:13:35", "remaining_time": "0:51:39", "throughput": 12538.5, "total_tokens": 10230080} +{"current_steps": 3260, "total_steps": 15621, "loss": 0.4792, "lr": 1.929030903183776e-06, "epoch": 0.20869342551693235, "percentage": 20.87, "elapsed_time": "0:13:36", "remaining_time": "0:51:36", "throughput": 12548.23, "total_tokens": 10246912} +{"current_steps": 3265, "total_steps": 15621, "loss": 0.5231, "lr": 1.9286168946664033e-06, "epoch": 0.20901350745790923, "percentage": 20.9, "elapsed_time": "0:13:37", "remaining_time": "0:51:32", "throughput": 12556.97, "total_tokens": 10262464} +{"current_steps": 3270, "total_steps": 15621, "loss": 0.6606, "lr": 1.9282017267599352e-06, "epoch": 0.2093335893988861, "percentage": 20.93, "elapsed_time": "0:13:37", "remaining_time": "0:51:29", "throughput": 12565.69, "total_tokens": 10278016} +{"current_steps": 3275, "total_steps": 15621, "loss": 0.5055, "lr": 1.9277853999827125e-06, "epoch": 0.209653671339863, "percentage": 20.97, "elapsed_time": "0:13:38", "remaining_time": "0:51:26", "throughput": 12574.62, "total_tokens": 10293824} +{"current_steps": 3280, "total_steps": 15621, "loss": 0.5263, "lr": 1.9273679148545244e-06, "epoch": 0.2099737532808399, "percentage": 21.0, "elapsed_time": "0:13:39", "remaining_time": "0:51:22", "throughput": 12583.57, "total_tokens": 10309568} +{"current_steps": 3285, "total_steps": 15621, "loss": 0.4181, "lr": 1.9269492718966062e-06, "epoch": 0.21029383522181677, "percentage": 21.03, "elapsed_time": "0:13:39", "remaining_time": "0:51:19", "throughput": 12592.74, "total_tokens": 10325696} +{"current_steps": 3290, "total_steps": 15621, "loss": 0.5398, "lr": 1.9265294716316384e-06, "epoch": 0.21061391716279368, "percentage": 21.06, "elapsed_time": "0:13:40", "remaining_time": "0:51:15", "throughput": 12602.01, "total_tokens": 10342016} +{"current_steps": 3295, "total_steps": 15621, "loss": 0.468, "lr": 1.926108514583747e-06, "epoch": 0.21093399910377056, "percentage": 21.09, "elapsed_time": "0:13:41", "remaining_time": "0:51:12", "throughput": 12610.64, "total_tokens": 10357632} +{"current_steps": 3300, "total_steps": 15621, "loss": 0.4805, "lr": 1.925686401278501e-06, "epoch": 0.21125408104474747, "percentage": 21.13, "elapsed_time": "0:13:42", "remaining_time": "0:51:09", "throughput": 12619.2, "total_tokens": 10373056} +{"current_steps": 3305, "total_steps": 15621, "loss": 0.6377, "lr": 1.9252631322429143e-06, "epoch": 0.21157416298572435, "percentage": 21.16, "elapsed_time": "0:13:42", "remaining_time": "0:51:05", "throughput": 12628.35, "total_tokens": 10389248} +{"current_steps": 3310, "total_steps": 15621, "loss": 0.4445, "lr": 1.9248387080054435e-06, "epoch": 0.21189424492670123, "percentage": 21.19, "elapsed_time": "0:13:43", "remaining_time": "0:51:02", "throughput": 12637.08, "total_tokens": 10404864} +{"current_steps": 3315, "total_steps": 15621, "loss": 0.4925, "lr": 1.9244131290959864e-06, "epoch": 0.21221432686767813, "percentage": 21.22, "elapsed_time": "0:13:44", "remaining_time": "0:50:58", "throughput": 12645.63, "total_tokens": 10420416} +{"current_steps": 3320, "total_steps": 15621, "loss": 0.4311, "lr": 1.9239863960458845e-06, "epoch": 0.212534408808655, "percentage": 21.25, "elapsed_time": "0:13:44", "remaining_time": "0:50:55", "throughput": 12653.77, "total_tokens": 10435456} +{"current_steps": 3325, "total_steps": 15621, "loss": 0.4857, "lr": 1.923558509387918e-06, "epoch": 0.21285449074963192, "percentage": 21.29, "elapsed_time": "0:13:45", "remaining_time": "0:50:52", "throughput": 12662.8, "total_tokens": 10451584} +{"current_steps": 3330, "total_steps": 15621, "loss": 0.3719, "lr": 1.9231294696563086e-06, "epoch": 0.2131745726906088, "percentage": 21.32, "elapsed_time": "0:13:46", "remaining_time": "0:50:48", "throughput": 12671.78, "total_tokens": 10467584} +{"current_steps": 3335, "total_steps": 15621, "loss": 0.432, "lr": 1.922699277386718e-06, "epoch": 0.21349465463158568, "percentage": 21.35, "elapsed_time": "0:13:46", "remaining_time": "0:50:45", "throughput": 12680.41, "total_tokens": 10483264} +{"current_steps": 3340, "total_steps": 15621, "loss": 0.5869, "lr": 1.9222679331162454e-06, "epoch": 0.21381473657256259, "percentage": 21.38, "elapsed_time": "0:13:47", "remaining_time": "0:50:42", "throughput": 12688.77, "total_tokens": 10498560} +{"current_steps": 3345, "total_steps": 15621, "loss": 0.4431, "lr": 1.92183543738343e-06, "epoch": 0.21413481851353947, "percentage": 21.41, "elapsed_time": "0:13:48", "remaining_time": "0:50:38", "throughput": 12697.31, "total_tokens": 10514176} +{"current_steps": 3350, "total_steps": 15621, "loss": 0.4427, "lr": 1.9214017907282475e-06, "epoch": 0.21445490045451635, "percentage": 21.45, "elapsed_time": "0:13:48", "remaining_time": "0:50:35", "throughput": 12705.96, "total_tokens": 10529792} +{"current_steps": 3355, "total_steps": 15621, "loss": 0.499, "lr": 1.9209669936921105e-06, "epoch": 0.21477498239549325, "percentage": 21.48, "elapsed_time": "0:13:49", "remaining_time": "0:50:32", "throughput": 12714.84, "total_tokens": 10545856} +{"current_steps": 3360, "total_steps": 15621, "loss": 0.4092, "lr": 1.920531046817869e-06, "epoch": 0.21509506433647013, "percentage": 21.51, "elapsed_time": "0:13:50", "remaining_time": "0:50:29", "throughput": 12724.0, "total_tokens": 10562368} +{"current_steps": 3365, "total_steps": 15621, "loss": 0.6207, "lr": 1.9200939506498067e-06, "epoch": 0.21541514627744704, "percentage": 21.54, "elapsed_time": "0:13:50", "remaining_time": "0:50:25", "throughput": 12732.02, "total_tokens": 10577280} +{"current_steps": 3370, "total_steps": 15621, "loss": 0.5719, "lr": 1.9196557057336446e-06, "epoch": 0.21573522821842392, "percentage": 21.57, "elapsed_time": "0:13:51", "remaining_time": "0:50:22", "throughput": 12740.23, "total_tokens": 10592384} +{"current_steps": 3375, "total_steps": 15621, "loss": 0.4577, "lr": 1.9192163126165354e-06, "epoch": 0.2160553101594008, "percentage": 21.61, "elapsed_time": "0:13:52", "remaining_time": "0:50:19", "throughput": 12749.13, "total_tokens": 10608704} +{"current_steps": 3380, "total_steps": 15621, "loss": 0.4011, "lr": 1.9187757718470673e-06, "epoch": 0.2163753921003777, "percentage": 21.64, "elapsed_time": "0:13:52", "remaining_time": "0:50:16", "throughput": 12758.27, "total_tokens": 10625280} +{"current_steps": 3385, "total_steps": 15621, "loss": 0.5336, "lr": 1.9183340839752606e-06, "epoch": 0.21669547404135459, "percentage": 21.67, "elapsed_time": "0:13:53", "remaining_time": "0:50:12", "throughput": 12767.01, "total_tokens": 10641152} +{"current_steps": 3390, "total_steps": 15621, "loss": 0.4227, "lr": 1.9178912495525672e-06, "epoch": 0.21701555598233147, "percentage": 21.7, "elapsed_time": "0:13:54", "remaining_time": "0:50:09", "throughput": 12775.87, "total_tokens": 10657472} +{"current_steps": 3395, "total_steps": 15621, "loss": 0.4942, "lr": 1.917447269131872e-06, "epoch": 0.21733563792330837, "percentage": 21.73, "elapsed_time": "0:13:54", "remaining_time": "0:50:06", "throughput": 12784.75, "total_tokens": 10673600} +{"current_steps": 3400, "total_steps": 15621, "loss": 0.5717, "lr": 1.917002143267489e-06, "epoch": 0.21765571986428525, "percentage": 21.77, "elapsed_time": "0:13:55", "remaining_time": "0:50:03", "throughput": 12793.21, "total_tokens": 10689344} +{"current_steps": 3405, "total_steps": 15621, "loss": 0.4315, "lr": 1.9165558725151633e-06, "epoch": 0.21797580180526216, "percentage": 21.8, "elapsed_time": "0:13:56", "remaining_time": "0:50:00", "throughput": 12801.15, "total_tokens": 10704384} +{"current_steps": 3410, "total_steps": 15621, "loss": 0.4937, "lr": 1.9161084574320692e-06, "epoch": 0.21829588374623904, "percentage": 21.83, "elapsed_time": "0:13:56", "remaining_time": "0:49:56", "throughput": 12809.98, "total_tokens": 10720512} +{"current_steps": 3415, "total_steps": 15621, "loss": 0.485, "lr": 1.91565989857681e-06, "epoch": 0.21861596568721592, "percentage": 21.86, "elapsed_time": "0:13:57", "remaining_time": "0:49:53", "throughput": 12817.99, "total_tokens": 10735744} +{"current_steps": 3420, "total_steps": 15621, "loss": 0.4665, "lr": 1.9152101965094162e-06, "epoch": 0.21893604762819283, "percentage": 21.89, "elapsed_time": "0:13:58", "remaining_time": "0:49:50", "throughput": 12825.93, "total_tokens": 10750848} +{"current_steps": 3425, "total_steps": 15621, "loss": 0.4939, "lr": 1.9147593517913464e-06, "epoch": 0.2192561295691697, "percentage": 21.93, "elapsed_time": "0:13:58", "remaining_time": "0:49:47", "throughput": 12833.54, "total_tokens": 10765632} +{"current_steps": 3430, "total_steps": 15621, "loss": 0.3868, "lr": 1.914307364985485e-06, "epoch": 0.21957621151014659, "percentage": 21.96, "elapsed_time": "0:13:59", "remaining_time": "0:49:43", "throughput": 12841.51, "total_tokens": 10780928} +{"current_steps": 3435, "total_steps": 15621, "loss": 0.4273, "lr": 1.913854236656144e-06, "epoch": 0.2198962934511235, "percentage": 21.99, "elapsed_time": "0:14:00", "remaining_time": "0:49:40", "throughput": 12849.98, "total_tokens": 10796864} +{"current_steps": 3440, "total_steps": 15621, "loss": 0.4643, "lr": 1.9133999673690584e-06, "epoch": 0.22021637539210037, "percentage": 22.02, "elapsed_time": "0:14:00", "remaining_time": "0:49:37", "throughput": 12858.5, "total_tokens": 10812672} +{"current_steps": 3445, "total_steps": 15621, "loss": 0.4782, "lr": 1.9129445576913886e-06, "epoch": 0.22053645733307728, "percentage": 22.05, "elapsed_time": "0:14:01", "remaining_time": "0:49:34", "throughput": 12867.01, "total_tokens": 10828544} +{"current_steps": 3450, "total_steps": 15621, "loss": 0.5318, "lr": 1.91248800819172e-06, "epoch": 0.22085653927405416, "percentage": 22.09, "elapsed_time": "0:14:02", "remaining_time": "0:49:31", "throughput": 12875.41, "total_tokens": 10844288} +{"current_steps": 3455, "total_steps": 15621, "loss": 0.5306, "lr": 1.912030319440059e-06, "epoch": 0.22117662121503104, "percentage": 22.12, "elapsed_time": "0:14:02", "remaining_time": "0:49:28", "throughput": 12883.83, "total_tokens": 10860160} +{"current_steps": 3460, "total_steps": 15621, "loss": 0.6076, "lr": 1.9115714920078354e-06, "epoch": 0.22149670315600795, "percentage": 22.15, "elapsed_time": "0:14:03", "remaining_time": "0:49:25", "throughput": 12892.14, "total_tokens": 10875968} +{"current_steps": 3465, "total_steps": 15621, "loss": 0.3367, "lr": 1.9111115264679017e-06, "epoch": 0.22181678509698483, "percentage": 22.18, "elapsed_time": "0:14:04", "remaining_time": "0:49:21", "throughput": 12900.75, "total_tokens": 10892096} +{"current_steps": 3470, "total_steps": 15621, "loss": 0.4611, "lr": 1.910650423394529e-06, "epoch": 0.2221368670379617, "percentage": 22.21, "elapsed_time": "0:14:04", "remaining_time": "0:49:18", "throughput": 12909.53, "total_tokens": 10908544} +{"current_steps": 3475, "total_steps": 15621, "loss": 0.4804, "lr": 1.910188183363411e-06, "epoch": 0.2224569489789386, "percentage": 22.25, "elapsed_time": "0:14:05", "remaining_time": "0:49:15", "throughput": 12918.05, "total_tokens": 10924544} +{"current_steps": 3480, "total_steps": 15621, "loss": 0.4408, "lr": 1.909724806951659e-06, "epoch": 0.2227770309199155, "percentage": 22.28, "elapsed_time": "0:14:06", "remaining_time": "0:49:12", "throughput": 12927.49, "total_tokens": 10941888} +{"current_steps": 3485, "total_steps": 15621, "loss": 0.4689, "lr": 1.909260294737804e-06, "epoch": 0.2230971128608924, "percentage": 22.31, "elapsed_time": "0:14:07", "remaining_time": "0:49:09", "throughput": 12936.51, "total_tokens": 10958592} +{"current_steps": 3490, "total_steps": 15621, "loss": 0.5319, "lr": 1.9087946473017953e-06, "epoch": 0.22341719480186928, "percentage": 22.34, "elapsed_time": "0:14:07", "remaining_time": "0:49:06", "throughput": 12944.61, "total_tokens": 10974208} +{"current_steps": 3495, "total_steps": 15621, "loss": 0.4363, "lr": 1.9083278652249992e-06, "epoch": 0.22373727674284616, "percentage": 22.37, "elapsed_time": "0:14:08", "remaining_time": "0:49:03", "throughput": 12952.05, "total_tokens": 10988928} +{"current_steps": 3500, "total_steps": 15621, "loss": 0.4327, "lr": 1.9078599490901983e-06, "epoch": 0.22405735868382307, "percentage": 22.41, "elapsed_time": "0:14:09", "remaining_time": "0:49:00", "throughput": 12961.29, "total_tokens": 11005952} +{"current_steps": 3505, "total_steps": 15621, "loss": 0.4012, "lr": 1.9073908994815914e-06, "epoch": 0.22437744062479995, "percentage": 22.44, "elapsed_time": "0:14:09", "remaining_time": "0:48:57", "throughput": 12968.66, "total_tokens": 11020608} +{"current_steps": 3510, "total_steps": 15621, "loss": 0.4999, "lr": 1.9069207169847928e-06, "epoch": 0.22469752256577685, "percentage": 22.47, "elapsed_time": "0:14:10", "remaining_time": "0:48:54", "throughput": 12977.17, "total_tokens": 11036736} +{"current_steps": 3515, "total_steps": 15621, "loss": 0.3645, "lr": 1.9064494021868302e-06, "epoch": 0.22501760450675373, "percentage": 22.5, "elapsed_time": "0:14:11", "remaining_time": "0:48:51", "throughput": 12985.48, "total_tokens": 11052480} +{"current_steps": 3520, "total_steps": 15621, "loss": 0.4816, "lr": 1.9059769556761464e-06, "epoch": 0.2253376864477306, "percentage": 22.53, "elapsed_time": "0:14:11", "remaining_time": "0:48:48", "throughput": 12993.85, "total_tokens": 11068416} +{"current_steps": 3525, "total_steps": 15621, "loss": 0.4443, "lr": 1.9055033780425962e-06, "epoch": 0.22565776838870752, "percentage": 22.57, "elapsed_time": "0:14:12", "remaining_time": "0:48:45", "throughput": 13003.71, "total_tokens": 11086400} +{"current_steps": 3530, "total_steps": 15621, "loss": 0.5674, "lr": 1.9050286698774464e-06, "epoch": 0.2259778503296844, "percentage": 22.6, "elapsed_time": "0:14:13", "remaining_time": "0:48:42", "throughput": 13012.28, "total_tokens": 11102848} +{"current_steps": 3535, "total_steps": 15621, "loss": 0.5366, "lr": 1.904552831773376e-06, "epoch": 0.22629793227066128, "percentage": 22.63, "elapsed_time": "0:14:13", "remaining_time": "0:48:39", "throughput": 13020.02, "total_tokens": 11118080} +{"current_steps": 3540, "total_steps": 15621, "loss": 0.5045, "lr": 1.9040758643244748e-06, "epoch": 0.22661801421163819, "percentage": 22.66, "elapsed_time": "0:14:14", "remaining_time": "0:48:36", "throughput": 13027.61, "total_tokens": 11133120} +{"current_steps": 3545, "total_steps": 15621, "loss": 0.4452, "lr": 1.903597768126242e-06, "epoch": 0.22693809615261507, "percentage": 22.69, "elapsed_time": "0:14:15", "remaining_time": "0:48:33", "throughput": 13036.54, "total_tokens": 11150144} +{"current_steps": 3550, "total_steps": 15621, "loss": 0.4862, "lr": 1.9031185437755862e-06, "epoch": 0.22725817809359197, "percentage": 22.73, "elapsed_time": "0:14:15", "remaining_time": "0:48:30", "throughput": 13044.44, "total_tokens": 11165760} +{"current_steps": 3555, "total_steps": 15621, "loss": 0.4948, "lr": 1.9026381918708246e-06, "epoch": 0.22757826003456885, "percentage": 22.76, "elapsed_time": "0:14:16", "remaining_time": "0:48:27", "throughput": 13051.52, "total_tokens": 11180096} +{"current_steps": 3560, "total_steps": 15621, "loss": 0.3775, "lr": 1.9021567130116822e-06, "epoch": 0.22789834197554573, "percentage": 22.79, "elapsed_time": "0:14:17", "remaining_time": "0:48:24", "throughput": 13059.17, "total_tokens": 11195584} +{"current_steps": 3565, "total_steps": 15621, "loss": 0.389, "lr": 1.9016741077992916e-06, "epoch": 0.22821842391652264, "percentage": 22.82, "elapsed_time": "0:14:17", "remaining_time": "0:48:21", "throughput": 13066.93, "total_tokens": 11210944} +{"current_steps": 3570, "total_steps": 15621, "loss": 0.4008, "lr": 1.90119037683619e-06, "epoch": 0.22853850585749952, "percentage": 22.85, "elapsed_time": "0:14:18", "remaining_time": "0:48:18", "throughput": 13075.47, "total_tokens": 11227392} +{"current_steps": 3575, "total_steps": 15621, "loss": 0.6598, "lr": 1.9007055207263223e-06, "epoch": 0.2288585877984764, "percentage": 22.89, "elapsed_time": "0:14:19", "remaining_time": "0:48:15", "throughput": 13084.36, "total_tokens": 11244416} +{"current_steps": 3580, "total_steps": 15621, "loss": 0.3584, "lr": 1.900219540075036e-06, "epoch": 0.2291786697394533, "percentage": 22.92, "elapsed_time": "0:14:20", "remaining_time": "0:48:12", "throughput": 13092.75, "total_tokens": 11260672} +{"current_steps": 3585, "total_steps": 15621, "loss": 0.4823, "lr": 1.8997324354890845e-06, "epoch": 0.22949875168043019, "percentage": 22.95, "elapsed_time": "0:14:20", "remaining_time": "0:48:09", "throughput": 13101.57, "total_tokens": 11277504} +{"current_steps": 3590, "total_steps": 15621, "loss": 0.5325, "lr": 1.8992442075766233e-06, "epoch": 0.2298188336214071, "percentage": 22.98, "elapsed_time": "0:14:21", "remaining_time": "0:48:06", "throughput": 13109.46, "total_tokens": 11293184} +{"current_steps": 3595, "total_steps": 15621, "loss": 0.3273, "lr": 1.8987548569472105e-06, "epoch": 0.23013891556238397, "percentage": 23.01, "elapsed_time": "0:14:22", "remaining_time": "0:48:03", "throughput": 13117.24, "total_tokens": 11308480} +{"current_steps": 3600, "total_steps": 15621, "loss": 0.3958, "lr": 1.8982643842118064e-06, "epoch": 0.23045899750336085, "percentage": 23.05, "elapsed_time": "0:14:22", "remaining_time": "0:48:00", "throughput": 13124.96, "total_tokens": 11323840} +{"current_steps": 3605, "total_steps": 15621, "loss": 0.5822, "lr": 1.8977727899827716e-06, "epoch": 0.23077907944433776, "percentage": 23.08, "elapsed_time": "0:14:23", "remaining_time": "0:47:57", "throughput": 13132.92, "total_tokens": 11339456} +{"current_steps": 3610, "total_steps": 15621, "loss": 0.6628, "lr": 1.8972800748738678e-06, "epoch": 0.23109916138531464, "percentage": 23.11, "elapsed_time": "0:14:24", "remaining_time": "0:47:55", "throughput": 13140.57, "total_tokens": 11354880} +{"current_steps": 3615, "total_steps": 15621, "loss": 0.5365, "lr": 1.896786239500255e-06, "epoch": 0.23141924332629152, "percentage": 23.14, "elapsed_time": "0:14:24", "remaining_time": "0:47:52", "throughput": 13148.15, "total_tokens": 11369984} +{"current_steps": 3620, "total_steps": 15621, "loss": 0.4328, "lr": 1.8962912844784928e-06, "epoch": 0.23173932526726843, "percentage": 23.17, "elapsed_time": "0:14:25", "remaining_time": "0:47:48", "throughput": 13155.41, "total_tokens": 11384640} +{"current_steps": 3625, "total_steps": 15621, "loss": 0.5017, "lr": 1.8957952104265384e-06, "epoch": 0.2320594072082453, "percentage": 23.21, "elapsed_time": "0:14:26", "remaining_time": "0:47:46", "throughput": 13163.92, "total_tokens": 11401152} +{"current_steps": 3630, "total_steps": 15621, "loss": 0.4551, "lr": 1.8952980179637458e-06, "epoch": 0.2323794891492222, "percentage": 23.24, "elapsed_time": "0:14:26", "remaining_time": "0:47:43", "throughput": 13171.87, "total_tokens": 11416896} +{"current_steps": 3635, "total_steps": 15621, "loss": 0.5002, "lr": 1.8947997077108662e-06, "epoch": 0.2326995710901991, "percentage": 23.27, "elapsed_time": "0:14:27", "remaining_time": "0:47:40", "throughput": 13179.98, "total_tokens": 11432832} +{"current_steps": 3640, "total_steps": 15621, "loss": 0.5022, "lr": 1.894300280290045e-06, "epoch": 0.23301965303117597, "percentage": 23.3, "elapsed_time": "0:14:28", "remaining_time": "0:47:37", "throughput": 13187.69, "total_tokens": 11448320} +{"current_steps": 3645, "total_steps": 15621, "loss": 0.5691, "lr": 1.8937997363248237e-06, "epoch": 0.23333973497215288, "percentage": 23.33, "elapsed_time": "0:14:28", "remaining_time": "0:47:34", "throughput": 13195.13, "total_tokens": 11463488} +{"current_steps": 3650, "total_steps": 15621, "loss": 0.4616, "lr": 1.8932980764401373e-06, "epoch": 0.23365981691312976, "percentage": 23.37, "elapsed_time": "0:14:29", "remaining_time": "0:47:31", "throughput": 13202.63, "total_tokens": 11478592} +{"current_steps": 3655, "total_steps": 15621, "loss": 0.367, "lr": 1.8927953012623141e-06, "epoch": 0.23397989885410664, "percentage": 23.4, "elapsed_time": "0:14:30", "remaining_time": "0:47:28", "throughput": 13210.62, "total_tokens": 11494720} +{"current_steps": 3660, "total_steps": 15621, "loss": 0.4884, "lr": 1.8922914114190744e-06, "epoch": 0.23429998079508355, "percentage": 23.43, "elapsed_time": "0:14:30", "remaining_time": "0:47:25", "throughput": 13218.92, "total_tokens": 11511232} +{"current_steps": 3665, "total_steps": 15621, "loss": 0.5212, "lr": 1.8917864075395312e-06, "epoch": 0.23462006273606043, "percentage": 23.46, "elapsed_time": "0:14:31", "remaining_time": "0:47:22", "throughput": 13226.79, "total_tokens": 11527040} +{"current_steps": 3670, "total_steps": 15621, "loss": 0.4641, "lr": 1.8912802902541873e-06, "epoch": 0.23494014467703733, "percentage": 23.49, "elapsed_time": "0:14:32", "remaining_time": "0:47:20", "throughput": 13234.48, "total_tokens": 11542528} +{"current_steps": 3675, "total_steps": 15621, "loss": 0.503, "lr": 1.8907730601949362e-06, "epoch": 0.2352602266180142, "percentage": 23.53, "elapsed_time": "0:14:32", "remaining_time": "0:47:17", "throughput": 13241.9, "total_tokens": 11557696} +{"current_steps": 3680, "total_steps": 15621, "loss": 0.4703, "lr": 1.8902647179950608e-06, "epoch": 0.2355803085589911, "percentage": 23.56, "elapsed_time": "0:14:33", "remaining_time": "0:47:14", "throughput": 13250.74, "total_tokens": 11574848} +{"current_steps": 3685, "total_steps": 15621, "loss": 0.5074, "lr": 1.889755264289232e-06, "epoch": 0.235900390499968, "percentage": 23.59, "elapsed_time": "0:14:34", "remaining_time": "0:47:11", "throughput": 13257.94, "total_tokens": 11589696} +{"current_steps": 3690, "total_steps": 15621, "loss": 0.3915, "lr": 1.8892446997135087e-06, "epoch": 0.23622047244094488, "percentage": 23.62, "elapsed_time": "0:14:34", "remaining_time": "0:47:08", "throughput": 13266.71, "total_tokens": 11606848} +{"current_steps": 3695, "total_steps": 15621, "loss": 0.6641, "lr": 1.888733024905337e-06, "epoch": 0.23654055438192176, "percentage": 23.65, "elapsed_time": "0:14:35", "remaining_time": "0:47:06", "throughput": 13275.42, "total_tokens": 11623744} +{"current_steps": 3700, "total_steps": 15621, "loss": 0.4874, "lr": 1.888220240503549e-06, "epoch": 0.23686063632289867, "percentage": 23.69, "elapsed_time": "0:14:36", "remaining_time": "0:47:03", "throughput": 13283.7, "total_tokens": 11640256} +{"current_steps": 3705, "total_steps": 15621, "loss": 0.4096, "lr": 1.8877063471483618e-06, "epoch": 0.23718071826387555, "percentage": 23.72, "elapsed_time": "0:14:36", "remaining_time": "0:47:00", "throughput": 13291.27, "total_tokens": 11655744} +{"current_steps": 3710, "total_steps": 15621, "loss": 0.2877, "lr": 1.8871913454813772e-06, "epoch": 0.23750080020485245, "percentage": 23.75, "elapsed_time": "0:14:37", "remaining_time": "0:46:57", "throughput": 13298.86, "total_tokens": 11671104} +{"current_steps": 3715, "total_steps": 15621, "loss": 0.3741, "lr": 1.886675236145581e-06, "epoch": 0.23782088214582933, "percentage": 23.78, "elapsed_time": "0:14:38", "remaining_time": "0:46:54", "throughput": 13306.7, "total_tokens": 11686848} +{"current_steps": 3720, "total_steps": 15621, "loss": 0.5053, "lr": 1.8861580197853422e-06, "epoch": 0.2381409640868062, "percentage": 23.81, "elapsed_time": "0:14:38", "remaining_time": "0:46:51", "throughput": 13314.03, "total_tokens": 11701952} +{"current_steps": 3725, "total_steps": 15621, "loss": 0.4637, "lr": 1.8856396970464105e-06, "epoch": 0.23846104602778312, "percentage": 23.85, "elapsed_time": "0:14:39", "remaining_time": "0:46:49", "throughput": 13322.39, "total_tokens": 11718592} +{"current_steps": 3730, "total_steps": 15621, "loss": 0.5129, "lr": 1.8851202685759189e-06, "epoch": 0.23878112796876, "percentage": 23.88, "elapsed_time": "0:14:40", "remaining_time": "0:46:46", "throughput": 13330.02, "total_tokens": 11734208} +{"current_steps": 3735, "total_steps": 15621, "loss": 0.4186, "lr": 1.8845997350223792e-06, "epoch": 0.2391012099097369, "percentage": 23.91, "elapsed_time": "0:14:40", "remaining_time": "0:46:43", "throughput": 13337.11, "total_tokens": 11748992} +{"current_steps": 3740, "total_steps": 15621, "loss": 0.4258, "lr": 1.8840780970356842e-06, "epoch": 0.23942129185071379, "percentage": 23.94, "elapsed_time": "0:14:41", "remaining_time": "0:46:40", "throughput": 13344.71, "total_tokens": 11764608} +{"current_steps": 3745, "total_steps": 15621, "loss": 0.3919, "lr": 1.8835553552671048e-06, "epoch": 0.23974137379169067, "percentage": 23.97, "elapsed_time": "0:14:42", "remaining_time": "0:46:37", "throughput": 13352.71, "total_tokens": 11780800} +{"current_steps": 3750, "total_steps": 15621, "loss": 0.4467, "lr": 1.8830315103692902e-06, "epoch": 0.24006145573266757, "percentage": 24.01, "elapsed_time": "0:14:42", "remaining_time": "0:46:34", "throughput": 13359.86, "total_tokens": 11795776} +{"current_steps": 3755, "total_steps": 15621, "loss": 0.52, "lr": 1.8825065629962669e-06, "epoch": 0.24038153767364445, "percentage": 24.04, "elapsed_time": "0:14:43", "remaining_time": "0:46:32", "throughput": 13367.68, "total_tokens": 11811776} +{"current_steps": 3760, "total_steps": 15621, "loss": 0.4902, "lr": 1.881980513803438e-06, "epoch": 0.24070161961462133, "percentage": 24.07, "elapsed_time": "0:14:44", "remaining_time": "0:46:29", "throughput": 13375.74, "total_tokens": 11828224} +{"current_steps": 3765, "total_steps": 15621, "loss": 0.5093, "lr": 1.881453363447582e-06, "epoch": 0.24102170155559824, "percentage": 24.1, "elapsed_time": "0:14:44", "remaining_time": "0:46:26", "throughput": 13383.16, "total_tokens": 11843904} +{"current_steps": 3770, "total_steps": 15621, "loss": 0.5653, "lr": 1.880925112586852e-06, "epoch": 0.24134178349657512, "percentage": 24.13, "elapsed_time": "0:14:45", "remaining_time": "0:46:24", "throughput": 13390.58, "total_tokens": 11859392} +{"current_steps": 3775, "total_steps": 15621, "loss": 0.4347, "lr": 1.8803957618807762e-06, "epoch": 0.24166186543755203, "percentage": 24.17, "elapsed_time": "0:14:46", "remaining_time": "0:46:21", "throughput": 13398.76, "total_tokens": 11875968} +{"current_steps": 3780, "total_steps": 15621, "loss": 0.4527, "lr": 1.8798653119902548e-06, "epoch": 0.2419819473785289, "percentage": 24.2, "elapsed_time": "0:14:47", "remaining_time": "0:46:18", "throughput": 13406.15, "total_tokens": 11891584} +{"current_steps": 3785, "total_steps": 15621, "loss": 0.4997, "lr": 1.8793337635775603e-06, "epoch": 0.24230202931950579, "percentage": 24.23, "elapsed_time": "0:14:47", "remaining_time": "0:46:15", "throughput": 13413.45, "total_tokens": 11906944} +{"current_steps": 3790, "total_steps": 15621, "loss": 0.4883, "lr": 1.8788011173063376e-06, "epoch": 0.2426221112604827, "percentage": 24.26, "elapsed_time": "0:14:48", "remaining_time": "0:46:13", "throughput": 13420.81, "total_tokens": 11922368} +{"current_steps": 3795, "total_steps": 15621, "loss": 0.5193, "lr": 1.8782673738416018e-06, "epoch": 0.24294219320145957, "percentage": 24.29, "elapsed_time": "0:14:49", "remaining_time": "0:46:10", "throughput": 13428.62, "total_tokens": 11938432} +{"current_steps": 3800, "total_steps": 15621, "loss": 0.5232, "lr": 1.877732533849737e-06, "epoch": 0.24326227514243645, "percentage": 24.33, "elapsed_time": "0:14:49", "remaining_time": "0:46:07", "throughput": 13437.85, "total_tokens": 11956608} +{"current_steps": 3805, "total_steps": 15621, "loss": 0.4473, "lr": 1.8771965979984988e-06, "epoch": 0.24358235708341336, "percentage": 24.36, "elapsed_time": "0:14:50", "remaining_time": "0:46:05", "throughput": 13445.32, "total_tokens": 11972480} +{"current_steps": 3810, "total_steps": 15621, "loss": 0.3903, "lr": 1.8766595669570084e-06, "epoch": 0.24390243902439024, "percentage": 24.39, "elapsed_time": "0:14:51", "remaining_time": "0:46:02", "throughput": 13452.04, "total_tokens": 11987072} +{"current_steps": 3815, "total_steps": 15621, "loss": 0.4257, "lr": 1.8761214413957553e-06, "epoch": 0.24422252096536715, "percentage": 24.42, "elapsed_time": "0:14:51", "remaining_time": "0:45:59", "throughput": 13459.06, "total_tokens": 12002112} +{"current_steps": 3820, "total_steps": 15621, "loss": 0.3607, "lr": 1.8755822219865963e-06, "epoch": 0.24454260290634403, "percentage": 24.45, "elapsed_time": "0:14:52", "remaining_time": "0:45:56", "throughput": 13465.89, "total_tokens": 12016960} +{"current_steps": 3825, "total_steps": 15621, "loss": 0.4264, "lr": 1.875041909402752e-06, "epoch": 0.2448626848473209, "percentage": 24.49, "elapsed_time": "0:14:53", "remaining_time": "0:45:54", "throughput": 13473.32, "total_tokens": 12032576} +{"current_steps": 3830, "total_steps": 15621, "loss": 0.3694, "lr": 1.8745005043188102e-06, "epoch": 0.2451827667882978, "percentage": 24.52, "elapsed_time": "0:14:53", "remaining_time": "0:45:51", "throughput": 13481.07, "total_tokens": 12048768} +{"current_steps": 3835, "total_steps": 15621, "loss": 0.3828, "lr": 1.8739580074107208e-06, "epoch": 0.2455028487292747, "percentage": 24.55, "elapsed_time": "0:14:54", "remaining_time": "0:45:48", "throughput": 13489.03, "total_tokens": 12065088} +{"current_steps": 3840, "total_steps": 15621, "loss": 0.7066, "lr": 1.873414419355798e-06, "epoch": 0.24582293067025157, "percentage": 24.58, "elapsed_time": "0:14:55", "remaining_time": "0:45:46", "throughput": 13496.41, "total_tokens": 12080704} +{"current_steps": 3845, "total_steps": 15621, "loss": 0.4319, "lr": 1.872869740832717e-06, "epoch": 0.24614301261122848, "percentage": 24.61, "elapsed_time": "0:14:55", "remaining_time": "0:45:43", "throughput": 13503.98, "total_tokens": 12096704} +{"current_steps": 3850, "total_steps": 15621, "loss": 0.5962, "lr": 1.8723239725215165e-06, "epoch": 0.24646309455220536, "percentage": 24.65, "elapsed_time": "0:14:56", "remaining_time": "0:45:40", "throughput": 13510.71, "total_tokens": 12111488} +{"current_steps": 3855, "total_steps": 15621, "loss": 0.4195, "lr": 1.871777115103594e-06, "epoch": 0.24678317649318227, "percentage": 24.68, "elapsed_time": "0:14:57", "remaining_time": "0:45:38", "throughput": 13518.87, "total_tokens": 12128192} +{"current_steps": 3860, "total_steps": 15621, "loss": 0.4734, "lr": 1.8712291692617074e-06, "epoch": 0.24710325843415915, "percentage": 24.71, "elapsed_time": "0:14:57", "remaining_time": "0:45:35", "throughput": 13526.15, "total_tokens": 12143808} +{"current_steps": 3865, "total_steps": 15621, "loss": 0.496, "lr": 1.8706801356799735e-06, "epoch": 0.24742334037513602, "percentage": 24.74, "elapsed_time": "0:14:58", "remaining_time": "0:45:32", "throughput": 13533.28, "total_tokens": 12159232} +{"current_steps": 3870, "total_steps": 15621, "loss": 0.4515, "lr": 1.8701300150438674e-06, "epoch": 0.24774342231611293, "percentage": 24.77, "elapsed_time": "0:14:59", "remaining_time": "0:45:30", "throughput": 13540.98, "total_tokens": 12175360} +{"current_steps": 3875, "total_steps": 15621, "loss": 0.4208, "lr": 1.869578808040221e-06, "epoch": 0.2480635042570898, "percentage": 24.81, "elapsed_time": "0:14:59", "remaining_time": "0:45:27", "throughput": 13547.92, "total_tokens": 12190272} +{"current_steps": 3880, "total_steps": 15621, "loss": 0.5226, "lr": 1.869026515357223e-06, "epoch": 0.2483835861980667, "percentage": 24.84, "elapsed_time": "0:15:00", "remaining_time": "0:45:25", "throughput": 13556.85, "total_tokens": 12208448} +{"current_steps": 3885, "total_steps": 15621, "loss": 0.6458, "lr": 1.8684731376844169e-06, "epoch": 0.2487036681390436, "percentage": 24.87, "elapsed_time": "0:15:01", "remaining_time": "0:45:22", "throughput": 13565.31, "total_tokens": 12225984} +{"current_steps": 3890, "total_steps": 15621, "loss": 0.5022, "lr": 1.8679186757127014e-06, "epoch": 0.24902375008002048, "percentage": 24.9, "elapsed_time": "0:15:01", "remaining_time": "0:45:19", "throughput": 13572.38, "total_tokens": 12241408} +{"current_steps": 3895, "total_steps": 15621, "loss": 0.4355, "lr": 1.8673631301343288e-06, "epoch": 0.24934383202099739, "percentage": 24.93, "elapsed_time": "0:15:02", "remaining_time": "0:45:17", "throughput": 13578.97, "total_tokens": 12256064} +{"current_steps": 3900, "total_steps": 15621, "loss": 0.4515, "lr": 1.8668065016429044e-06, "epoch": 0.24966391396197427, "percentage": 24.97, "elapsed_time": "0:15:03", "remaining_time": "0:45:14", "throughput": 13586.97, "total_tokens": 12272832} +{"current_steps": 3905, "total_steps": 15621, "loss": 0.5368, "lr": 1.866248790933385e-06, "epoch": 0.24998399590295114, "percentage": 25.0, "elapsed_time": "0:15:03", "remaining_time": "0:45:12", "throughput": 13594.64, "total_tokens": 12289024} +{"current_steps": 3910, "total_steps": 15621, "loss": 0.4277, "lr": 1.8656899987020795e-06, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:15:04", "remaining_time": "0:45:09", "throughput": 13601.55, "total_tokens": 12304064} +{"current_steps": 3910, "total_steps": 15621, "eval_loss": 0.46774157881736755, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:15:55", "remaining_time": "0:47:41", "throughput": 12879.41, "total_tokens": 12304064} +{"current_steps": 3915, "total_steps": 15621, "loss": 0.4665, "lr": 1.865130125646646e-06, "epoch": 0.25062415978490493, "percentage": 25.06, "elapsed_time": "0:16:45", "remaining_time": "0:50:06", "throughput": 12252.97, "total_tokens": 12320256} +{"current_steps": 3920, "total_steps": 15621, "loss": 0.4426, "lr": 1.8645691724660933e-06, "epoch": 0.2509442417258818, "percentage": 25.09, "elapsed_time": "0:16:46", "remaining_time": "0:50:03", "throughput": 12259.95, "total_tokens": 12335360} +{"current_steps": 3925, "total_steps": 15621, "loss": 0.4718, "lr": 1.8640071398607774e-06, "epoch": 0.2512643236668587, "percentage": 25.13, "elapsed_time": "0:16:46", "remaining_time": "0:50:00", "throughput": 12267.64, "total_tokens": 12351488} +{"current_steps": 3930, "total_steps": 15621, "loss": 0.6284, "lr": 1.8634440285324024e-06, "epoch": 0.2515844056078356, "percentage": 25.16, "elapsed_time": "0:16:47", "remaining_time": "0:49:57", "throughput": 12274.2, "total_tokens": 12365952} +{"current_steps": 3935, "total_steps": 15621, "loss": 0.4716, "lr": 1.8628798391840205e-06, "epoch": 0.2519044875488125, "percentage": 25.19, "elapsed_time": "0:16:48", "remaining_time": "0:49:53", "throughput": 12281.54, "total_tokens": 12381376} +{"current_steps": 3940, "total_steps": 15621, "loss": 0.4596, "lr": 1.8623145725200277e-06, "epoch": 0.2522245694897894, "percentage": 25.22, "elapsed_time": "0:16:48", "remaining_time": "0:49:50", "throughput": 12288.37, "total_tokens": 12396160} +{"current_steps": 3945, "total_steps": 15621, "loss": 0.4591, "lr": 1.8617482292461664e-06, "epoch": 0.25254465143076626, "percentage": 25.25, "elapsed_time": "0:16:49", "remaining_time": "0:49:47", "throughput": 12295.13, "total_tokens": 12410944} +{"current_steps": 3950, "total_steps": 15621, "loss": 0.4216, "lr": 1.861180810069523e-06, "epoch": 0.25286473337174314, "percentage": 25.29, "elapsed_time": "0:16:50", "remaining_time": "0:49:44", "throughput": 12302.25, "total_tokens": 12426304} +{"current_steps": 3955, "total_steps": 15621, "loss": 0.4785, "lr": 1.8606123156985268e-06, "epoch": 0.2531848153127201, "percentage": 25.32, "elapsed_time": "0:16:50", "remaining_time": "0:49:41", "throughput": 12309.87, "total_tokens": 12442432} +{"current_steps": 3960, "total_steps": 15621, "loss": 0.4666, "lr": 1.8600427468429496e-06, "epoch": 0.25350489725369696, "percentage": 25.35, "elapsed_time": "0:16:51", "remaining_time": "0:49:38", "throughput": 12317.27, "total_tokens": 12458368} +{"current_steps": 3965, "total_steps": 15621, "loss": 0.433, "lr": 1.8594721042139052e-06, "epoch": 0.25382497919467384, "percentage": 25.38, "elapsed_time": "0:16:52", "remaining_time": "0:49:35", "throughput": 12324.74, "total_tokens": 12474368} +{"current_steps": 3970, "total_steps": 15621, "loss": 0.423, "lr": 1.858900388523847e-06, "epoch": 0.2541450611356507, "percentage": 25.41, "elapsed_time": "0:16:52", "remaining_time": "0:49:32", "throughput": 12332.1, "total_tokens": 12490176} +{"current_steps": 3975, "total_steps": 15621, "loss": 0.4523, "lr": 1.8583276004865694e-06, "epoch": 0.2544651430766276, "percentage": 25.45, "elapsed_time": "0:16:53", "remaining_time": "0:49:29", "throughput": 12340.59, "total_tokens": 12507840} +{"current_steps": 3980, "total_steps": 15621, "loss": 0.352, "lr": 1.8577537408172046e-06, "epoch": 0.25478522501760453, "percentage": 25.48, "elapsed_time": "0:16:54", "remaining_time": "0:49:26", "throughput": 12347.79, "total_tokens": 12523520} +{"current_steps": 3985, "total_steps": 15621, "loss": 0.5448, "lr": 1.8571788102322234e-06, "epoch": 0.2551053069585814, "percentage": 25.51, "elapsed_time": "0:16:54", "remaining_time": "0:49:23", "throughput": 12356.03, "total_tokens": 12540736} +{"current_steps": 3990, "total_steps": 15621, "loss": 0.4698, "lr": 1.8566028094494332e-06, "epoch": 0.2554253888995583, "percentage": 25.54, "elapsed_time": "0:16:55", "remaining_time": "0:49:20", "throughput": 12363.32, "total_tokens": 12556352} +{"current_steps": 3995, "total_steps": 15621, "loss": 0.3732, "lr": 1.8560257391879778e-06, "epoch": 0.25574547084053517, "percentage": 25.57, "elapsed_time": "0:16:56", "remaining_time": "0:49:17", "throughput": 12369.72, "total_tokens": 12570688} +{"current_steps": 4000, "total_steps": 15621, "loss": 0.4079, "lr": 1.855447600168336e-06, "epoch": 0.25606555278151205, "percentage": 25.61, "elapsed_time": "0:16:56", "remaining_time": "0:49:14", "throughput": 12376.77, "total_tokens": 12585984} +{"current_steps": 4005, "total_steps": 15621, "loss": 0.4732, "lr": 1.8548683931123215e-06, "epoch": 0.25638563472248893, "percentage": 25.64, "elapsed_time": "0:16:57", "remaining_time": "0:49:11", "throughput": 12383.78, "total_tokens": 12601216} +{"current_steps": 4010, "total_steps": 15621, "loss": 0.4471, "lr": 1.8542881187430807e-06, "epoch": 0.25670571666346587, "percentage": 25.67, "elapsed_time": "0:16:58", "remaining_time": "0:49:08", "throughput": 12392.11, "total_tokens": 12618624} +{"current_steps": 4015, "total_steps": 15621, "loss": 0.5899, "lr": 1.8537067777850935e-06, "epoch": 0.25702579860444275, "percentage": 25.7, "elapsed_time": "0:16:58", "remaining_time": "0:49:05", "throughput": 12400.25, "total_tokens": 12635840} +{"current_steps": 4020, "total_steps": 15621, "loss": 0.359, "lr": 1.8531243709641704e-06, "epoch": 0.2573458805454196, "percentage": 25.73, "elapsed_time": "0:16:59", "remaining_time": "0:49:02", "throughput": 12407.65, "total_tokens": 12651904} +{"current_steps": 4025, "total_steps": 15621, "loss": 0.4977, "lr": 1.8525408990074533e-06, "epoch": 0.2576659624863965, "percentage": 25.77, "elapsed_time": "0:17:00", "remaining_time": "0:48:59", "throughput": 12414.49, "total_tokens": 12666944} +{"current_steps": 4030, "total_steps": 15621, "loss": 0.4184, "lr": 1.851956362643414e-06, "epoch": 0.2579860444273734, "percentage": 25.8, "elapsed_time": "0:17:01", "remaining_time": "0:48:56", "throughput": 12421.68, "total_tokens": 12682688} +{"current_steps": 4035, "total_steps": 15621, "loss": 0.5578, "lr": 1.851370762601853e-06, "epoch": 0.2583061263683503, "percentage": 25.83, "elapsed_time": "0:17:01", "remaining_time": "0:48:53", "throughput": 12428.86, "total_tokens": 12698304} +{"current_steps": 4040, "total_steps": 15621, "loss": 0.5083, "lr": 1.8507840996138983e-06, "epoch": 0.2586262083093272, "percentage": 25.86, "elapsed_time": "0:17:02", "remaining_time": "0:48:50", "throughput": 12435.32, "total_tokens": 12712896} +{"current_steps": 4045, "total_steps": 15621, "loss": 0.3908, "lr": 1.8501963744120062e-06, "epoch": 0.2589462902503041, "percentage": 25.89, "elapsed_time": "0:17:02", "remaining_time": "0:48:47", "throughput": 12441.76, "total_tokens": 12727488} +{"current_steps": 4050, "total_steps": 15621, "loss": 0.408, "lr": 1.849607587729958e-06, "epoch": 0.25926637219128096, "percentage": 25.93, "elapsed_time": "0:17:03", "remaining_time": "0:48:44", "throughput": 12448.63, "total_tokens": 12742720} +{"current_steps": 4055, "total_steps": 15621, "loss": 0.3966, "lr": 1.8490177403028615e-06, "epoch": 0.25958645413225784, "percentage": 25.96, "elapsed_time": "0:17:04", "remaining_time": "0:48:41", "throughput": 12455.5, "total_tokens": 12757760} +{"current_steps": 4060, "total_steps": 15621, "loss": 0.4966, "lr": 1.8484268328671475e-06, "epoch": 0.2599065360732348, "percentage": 25.99, "elapsed_time": "0:17:04", "remaining_time": "0:48:38", "throughput": 12462.59, "total_tokens": 12773312} +{"current_steps": 4065, "total_steps": 15621, "loss": 0.5448, "lr": 1.847834866160571e-06, "epoch": 0.26022661801421165, "percentage": 26.02, "elapsed_time": "0:17:05", "remaining_time": "0:48:35", "throughput": 12470.6, "total_tokens": 12790336} +{"current_steps": 4070, "total_steps": 15621, "loss": 0.4919, "lr": 1.847241840922209e-06, "epoch": 0.26054669995518853, "percentage": 26.05, "elapsed_time": "0:17:06", "remaining_time": "0:48:32", "throughput": 12477.36, "total_tokens": 12805632} +{"current_steps": 4075, "total_steps": 15621, "loss": 0.4875, "lr": 1.8466477578924616e-06, "epoch": 0.2608667818961654, "percentage": 26.09, "elapsed_time": "0:17:06", "remaining_time": "0:48:29", "throughput": 12484.4, "total_tokens": 12821184} +{"current_steps": 4080, "total_steps": 15621, "loss": 0.5004, "lr": 1.8460526178130472e-06, "epoch": 0.2611868638371423, "percentage": 26.12, "elapsed_time": "0:17:07", "remaining_time": "0:48:26", "throughput": 12491.24, "total_tokens": 12836544} +{"current_steps": 4085, "total_steps": 15621, "loss": 0.436, "lr": 1.8454564214270056e-06, "epoch": 0.26150694577811917, "percentage": 26.15, "elapsed_time": "0:17:08", "remaining_time": "0:48:23", "throughput": 12498.17, "total_tokens": 12852032} +{"current_steps": 4090, "total_steps": 15621, "loss": 0.4469, "lr": 1.8448591694786955e-06, "epoch": 0.2618270277190961, "percentage": 26.18, "elapsed_time": "0:17:08", "remaining_time": "0:48:21", "throughput": 12505.06, "total_tokens": 12867456} +{"current_steps": 4095, "total_steps": 15621, "loss": 0.341, "lr": 1.8442608627137925e-06, "epoch": 0.262147109660073, "percentage": 26.21, "elapsed_time": "0:17:09", "remaining_time": "0:48:18", "throughput": 12513.49, "total_tokens": 12885184} +{"current_steps": 4100, "total_steps": 15621, "loss": 0.3896, "lr": 1.8436615018792897e-06, "epoch": 0.26246719160104987, "percentage": 26.25, "elapsed_time": "0:17:10", "remaining_time": "0:48:15", "throughput": 12520.37, "total_tokens": 12900416} +{"current_steps": 4105, "total_steps": 15621, "loss": 0.5792, "lr": 1.8430610877234957e-06, "epoch": 0.26278727354202674, "percentage": 26.28, "elapsed_time": "0:17:11", "remaining_time": "0:48:12", "throughput": 12527.19, "total_tokens": 12915648} +{"current_steps": 4110, "total_steps": 15621, "loss": 0.4624, "lr": 1.8424596209960356e-06, "epoch": 0.2631073554830036, "percentage": 26.31, "elapsed_time": "0:17:11", "remaining_time": "0:48:09", "throughput": 12533.71, "total_tokens": 12930368} +{"current_steps": 4115, "total_steps": 15621, "loss": 0.5265, "lr": 1.8418571024478466e-06, "epoch": 0.26342743742398056, "percentage": 26.34, "elapsed_time": "0:17:12", "remaining_time": "0:48:06", "throughput": 12540.48, "total_tokens": 12945472} +{"current_steps": 4120, "total_steps": 15621, "loss": 0.491, "lr": 1.8412535328311812e-06, "epoch": 0.26374751936495744, "percentage": 26.37, "elapsed_time": "0:17:12", "remaining_time": "0:48:03", "throughput": 12547.59, "total_tokens": 12961472} +{"current_steps": 4125, "total_steps": 15621, "loss": 0.5816, "lr": 1.8406489128996023e-06, "epoch": 0.2640676013059343, "percentage": 26.41, "elapsed_time": "0:17:13", "remaining_time": "0:48:00", "throughput": 12553.86, "total_tokens": 12975872} +{"current_steps": 4130, "total_steps": 15621, "loss": 0.529, "lr": 1.8400432434079853e-06, "epoch": 0.2643876832469112, "percentage": 26.44, "elapsed_time": "0:17:14", "remaining_time": "0:47:57", "throughput": 12561.22, "total_tokens": 12992128} +{"current_steps": 4135, "total_steps": 15621, "loss": 0.4095, "lr": 1.8394365251125162e-06, "epoch": 0.2647077651878881, "percentage": 26.47, "elapsed_time": "0:17:15", "remaining_time": "0:47:56", "throughput": 12575.01, "total_tokens": 13021184} +{"current_steps": 4140, "total_steps": 15621, "loss": 0.4425, "lr": 1.8388287587706888e-06, "epoch": 0.265027847128865, "percentage": 26.5, "elapsed_time": "0:17:16", "remaining_time": "0:47:53", "throughput": 12582.41, "total_tokens": 13037568} +{"current_steps": 4145, "total_steps": 15621, "loss": 0.4682, "lr": 1.8382199451413074e-06, "epoch": 0.2653479290698419, "percentage": 26.53, "elapsed_time": "0:17:16", "remaining_time": "0:47:50", "throughput": 12589.44, "total_tokens": 13053440} +{"current_steps": 4150, "total_steps": 15621, "loss": 0.5178, "lr": 1.837610084984483e-06, "epoch": 0.26566801101081877, "percentage": 26.57, "elapsed_time": "0:17:17", "remaining_time": "0:47:47", "throughput": 12596.57, "total_tokens": 13069440} +{"current_steps": 4155, "total_steps": 15621, "loss": 0.5487, "lr": 1.8369991790616327e-06, "epoch": 0.26598809295179565, "percentage": 26.6, "elapsed_time": "0:17:18", "remaining_time": "0:47:44", "throughput": 12602.95, "total_tokens": 13084224} +{"current_steps": 4160, "total_steps": 15621, "loss": 0.6725, "lr": 1.8363872281354795e-06, "epoch": 0.26630817489277253, "percentage": 26.63, "elapsed_time": "0:17:18", "remaining_time": "0:47:42", "throughput": 12609.18, "total_tokens": 13098688} +{"current_steps": 4165, "total_steps": 15621, "loss": 0.4162, "lr": 1.835774232970052e-06, "epoch": 0.26662825683374947, "percentage": 26.66, "elapsed_time": "0:17:19", "remaining_time": "0:47:39", "throughput": 12615.95, "total_tokens": 13114112} +{"current_steps": 4170, "total_steps": 15621, "loss": 0.454, "lr": 1.8351601943306815e-06, "epoch": 0.26694833877472635, "percentage": 26.69, "elapsed_time": "0:17:20", "remaining_time": "0:47:36", "throughput": 12623.18, "total_tokens": 13130240} +{"current_steps": 4175, "total_steps": 15621, "loss": 0.3972, "lr": 1.8345451129840025e-06, "epoch": 0.2672684207157032, "percentage": 26.73, "elapsed_time": "0:17:20", "remaining_time": "0:47:33", "throughput": 12629.85, "total_tokens": 13145536} +{"current_steps": 4180, "total_steps": 15621, "loss": 0.5506, "lr": 1.8339289896979515e-06, "epoch": 0.2675885026566801, "percentage": 26.76, "elapsed_time": "0:17:21", "remaining_time": "0:47:30", "throughput": 12636.21, "total_tokens": 13160256} +{"current_steps": 4185, "total_steps": 15621, "loss": 0.5525, "lr": 1.8333118252417651e-06, "epoch": 0.267908584597657, "percentage": 26.79, "elapsed_time": "0:17:22", "remaining_time": "0:47:27", "throughput": 12643.77, "total_tokens": 13177088} +{"current_steps": 4190, "total_steps": 15621, "loss": 0.5154, "lr": 1.832693620385981e-06, "epoch": 0.26822866653863386, "percentage": 26.82, "elapsed_time": "0:17:22", "remaining_time": "0:47:25", "throughput": 12650.69, "total_tokens": 13192768} +{"current_steps": 4195, "total_steps": 15621, "loss": 0.5089, "lr": 1.8320743759024352e-06, "epoch": 0.2685487484796108, "percentage": 26.85, "elapsed_time": "0:17:23", "remaining_time": "0:47:22", "throughput": 12657.37, "total_tokens": 13208192} +{"current_steps": 4200, "total_steps": 15621, "loss": 0.5486, "lr": 1.831454092564261e-06, "epoch": 0.2688688304205877, "percentage": 26.89, "elapsed_time": "0:17:24", "remaining_time": "0:47:19", "throughput": 12664.24, "total_tokens": 13223872} +{"current_steps": 4205, "total_steps": 15621, "loss": 0.4597, "lr": 1.8308327711458899e-06, "epoch": 0.26918891236156456, "percentage": 26.92, "elapsed_time": "0:17:24", "remaining_time": "0:47:16", "throughput": 12670.76, "total_tokens": 13239104} +{"current_steps": 4210, "total_steps": 15621, "loss": 0.3925, "lr": 1.830210412423049e-06, "epoch": 0.26950899430254144, "percentage": 26.95, "elapsed_time": "0:17:25", "remaining_time": "0:47:13", "throughput": 12677.48, "total_tokens": 13254464} +{"current_steps": 4215, "total_steps": 15621, "loss": 0.3617, "lr": 1.8295870171727605e-06, "epoch": 0.2698290762435183, "percentage": 26.98, "elapsed_time": "0:17:26", "remaining_time": "0:47:10", "throughput": 12684.15, "total_tokens": 13269824} +{"current_steps": 4220, "total_steps": 15621, "loss": 0.4149, "lr": 1.8289625861733408e-06, "epoch": 0.27014915818449525, "percentage": 27.01, "elapsed_time": "0:17:26", "remaining_time": "0:47:08", "throughput": 12692.68, "total_tokens": 13288448} +{"current_steps": 4225, "total_steps": 15621, "loss": 0.5178, "lr": 1.8283371202043991e-06, "epoch": 0.27046924012547213, "percentage": 27.05, "elapsed_time": "0:17:27", "remaining_time": "0:47:05", "throughput": 12699.56, "total_tokens": 13304320} +{"current_steps": 4230, "total_steps": 15621, "loss": 0.5533, "lr": 1.827710620046837e-06, "epoch": 0.270789322066449, "percentage": 27.08, "elapsed_time": "0:17:28", "remaining_time": "0:47:03", "throughput": 12707.54, "total_tokens": 13321920} +{"current_steps": 4235, "total_steps": 15621, "loss": 0.4571, "lr": 1.8270830864828474e-06, "epoch": 0.2711094040074259, "percentage": 27.11, "elapsed_time": "0:17:29", "remaining_time": "0:47:00", "throughput": 12714.14, "total_tokens": 13337280} +{"current_steps": 4240, "total_steps": 15621, "loss": 0.434, "lr": 1.8264545202959133e-06, "epoch": 0.27142948594840277, "percentage": 27.14, "elapsed_time": "0:17:29", "remaining_time": "0:46:57", "throughput": 12721.6, "total_tokens": 13354112} +{"current_steps": 4245, "total_steps": 15621, "loss": 0.4362, "lr": 1.8258249222708067e-06, "epoch": 0.2717495678893797, "percentage": 27.17, "elapsed_time": "0:17:30", "remaining_time": "0:46:54", "throughput": 12728.23, "total_tokens": 13369600} +{"current_steps": 4250, "total_steps": 15621, "loss": 0.4558, "lr": 1.8251942931935886e-06, "epoch": 0.2720696498303566, "percentage": 27.21, "elapsed_time": "0:17:31", "remaining_time": "0:46:52", "throughput": 12735.04, "total_tokens": 13385536} +{"current_steps": 4255, "total_steps": 15621, "loss": 0.3748, "lr": 1.8245626338516069e-06, "epoch": 0.27238973177133347, "percentage": 27.24, "elapsed_time": "0:17:31", "remaining_time": "0:46:49", "throughput": 12741.59, "total_tokens": 13400832} +{"current_steps": 4260, "total_steps": 15621, "loss": 0.338, "lr": 1.823929945033495e-06, "epoch": 0.27270981371231034, "percentage": 27.27, "elapsed_time": "0:17:32", "remaining_time": "0:46:46", "throughput": 12748.07, "total_tokens": 13416000} +{"current_steps": 4265, "total_steps": 15621, "loss": 0.5038, "lr": 1.8232962275291728e-06, "epoch": 0.2730298956532872, "percentage": 27.3, "elapsed_time": "0:17:33", "remaining_time": "0:46:43", "throughput": 12754.71, "total_tokens": 13431360} +{"current_steps": 4270, "total_steps": 15621, "loss": 0.4415, "lr": 1.822661482129844e-06, "epoch": 0.2733499775942641, "percentage": 27.33, "elapsed_time": "0:17:33", "remaining_time": "0:46:41", "throughput": 12761.39, "total_tokens": 13446976} +{"current_steps": 4275, "total_steps": 15621, "loss": 0.3688, "lr": 1.8220257096279956e-06, "epoch": 0.27367005953524104, "percentage": 27.37, "elapsed_time": "0:17:34", "remaining_time": "0:46:38", "throughput": 12768.24, "total_tokens": 13463040} +{"current_steps": 4280, "total_steps": 15621, "loss": 0.6843, "lr": 1.8213889108173972e-06, "epoch": 0.2739901414762179, "percentage": 27.4, "elapsed_time": "0:17:35", "remaining_time": "0:46:35", "throughput": 12774.89, "total_tokens": 13478656} +{"current_steps": 4285, "total_steps": 15621, "loss": 0.4995, "lr": 1.8207510864930992e-06, "epoch": 0.2743102234171948, "percentage": 27.43, "elapsed_time": "0:17:35", "remaining_time": "0:46:33", "throughput": 12782.01, "total_tokens": 13495296} +{"current_steps": 4290, "total_steps": 15621, "loss": 0.5081, "lr": 1.8201122374514336e-06, "epoch": 0.2746303053581717, "percentage": 27.46, "elapsed_time": "0:17:36", "remaining_time": "0:46:30", "throughput": 12788.63, "total_tokens": 13510912} +{"current_steps": 4295, "total_steps": 15621, "loss": 0.4362, "lr": 1.8194723644900099e-06, "epoch": 0.27495038729914856, "percentage": 27.5, "elapsed_time": "0:17:37", "remaining_time": "0:46:27", "throughput": 12794.92, "total_tokens": 13525952} +{"current_steps": 4300, "total_steps": 15621, "loss": 0.5305, "lr": 1.8188314684077173e-06, "epoch": 0.2752704692401255, "percentage": 27.53, "elapsed_time": "0:17:37", "remaining_time": "0:46:25", "throughput": 12804.49, "total_tokens": 13546752} +{"current_steps": 4305, "total_steps": 15621, "loss": 0.5643, "lr": 1.8181895500047226e-06, "epoch": 0.2755905511811024, "percentage": 27.56, "elapsed_time": "0:17:38", "remaining_time": "0:46:22", "throughput": 12810.73, "total_tokens": 13561728} +{"current_steps": 4310, "total_steps": 15621, "loss": 0.4562, "lr": 1.817546610082468e-06, "epoch": 0.27591063312207925, "percentage": 27.59, "elapsed_time": "0:17:39", "remaining_time": "0:46:19", "throughput": 12817.28, "total_tokens": 13577344} +{"current_steps": 4315, "total_steps": 15621, "loss": 0.4862, "lr": 1.816902649443672e-06, "epoch": 0.27623071506305613, "percentage": 27.62, "elapsed_time": "0:17:39", "remaining_time": "0:46:17", "throughput": 12823.4, "total_tokens": 13592256} +{"current_steps": 4320, "total_steps": 15621, "loss": 0.5403, "lr": 1.8162576688923262e-06, "epoch": 0.276550797004033, "percentage": 27.66, "elapsed_time": "0:17:40", "remaining_time": "0:46:14", "throughput": 12830.61, "total_tokens": 13608832} +{"current_steps": 4325, "total_steps": 15621, "loss": 0.5508, "lr": 1.815611669233697e-06, "epoch": 0.27687087894500995, "percentage": 27.69, "elapsed_time": "0:17:41", "remaining_time": "0:46:11", "throughput": 12836.99, "total_tokens": 13624128} +{"current_steps": 4330, "total_steps": 15621, "loss": 0.5369, "lr": 1.8149646512743222e-06, "epoch": 0.2771909608859868, "percentage": 27.72, "elapsed_time": "0:17:42", "remaining_time": "0:46:09", "throughput": 12844.05, "total_tokens": 13640576} +{"current_steps": 4335, "total_steps": 15621, "loss": 0.4653, "lr": 1.8143166158220118e-06, "epoch": 0.2775110428269637, "percentage": 27.75, "elapsed_time": "0:17:42", "remaining_time": "0:46:06", "throughput": 12850.45, "total_tokens": 13655872} +{"current_steps": 4340, "total_steps": 15621, "loss": 0.6598, "lr": 1.8136675636858454e-06, "epoch": 0.2778311247679406, "percentage": 27.78, "elapsed_time": "0:17:43", "remaining_time": "0:46:04", "throughput": 12857.53, "total_tokens": 13672384} +{"current_steps": 4345, "total_steps": 15621, "loss": 0.408, "lr": 1.8130174956761723e-06, "epoch": 0.27815120670891746, "percentage": 27.82, "elapsed_time": "0:17:44", "remaining_time": "0:46:01", "throughput": 12863.74, "total_tokens": 13687296} +{"current_steps": 4350, "total_steps": 15621, "loss": 0.5366, "lr": 1.81236641260461e-06, "epoch": 0.2784712886498944, "percentage": 27.85, "elapsed_time": "0:17:44", "remaining_time": "0:45:58", "throughput": 12870.1, "total_tokens": 13702528} +{"current_steps": 4355, "total_steps": 15621, "loss": 0.498, "lr": 1.811714315284043e-06, "epoch": 0.2787913705908713, "percentage": 27.88, "elapsed_time": "0:17:45", "remaining_time": "0:45:55", "throughput": 12876.3, "total_tokens": 13717568} +{"current_steps": 4360, "total_steps": 15621, "loss": 0.4252, "lr": 1.8110612045286229e-06, "epoch": 0.27911145253184816, "percentage": 27.91, "elapsed_time": "0:17:46", "remaining_time": "0:45:53", "throughput": 12883.06, "total_tokens": 13733568} +{"current_steps": 4365, "total_steps": 15621, "loss": 0.3778, "lr": 1.8104070811537661e-06, "epoch": 0.27943153447282504, "percentage": 27.94, "elapsed_time": "0:17:46", "remaining_time": "0:45:50", "throughput": 12889.72, "total_tokens": 13749312} +{"current_steps": 4370, "total_steps": 15621, "loss": 0.4405, "lr": 1.8097519459761533e-06, "epoch": 0.2797516164138019, "percentage": 27.98, "elapsed_time": "0:17:47", "remaining_time": "0:45:48", "throughput": 12896.77, "total_tokens": 13765952} +{"current_steps": 4375, "total_steps": 15621, "loss": 0.5056, "lr": 1.8090957998137283e-06, "epoch": 0.2800716983547788, "percentage": 28.01, "elapsed_time": "0:17:48", "remaining_time": "0:45:45", "throughput": 12903.16, "total_tokens": 13781440} +{"current_steps": 4380, "total_steps": 15621, "loss": 0.4528, "lr": 1.8084386434856978e-06, "epoch": 0.28039178029575573, "percentage": 28.04, "elapsed_time": "0:17:48", "remaining_time": "0:45:42", "throughput": 12909.56, "total_tokens": 13796864} +{"current_steps": 4385, "total_steps": 15621, "loss": 0.4966, "lr": 1.8077804778125283e-06, "epoch": 0.2807118622367326, "percentage": 28.07, "elapsed_time": "0:17:49", "remaining_time": "0:45:40", "throughput": 12916.26, "total_tokens": 13812736} +{"current_steps": 4390, "total_steps": 15621, "loss": 0.489, "lr": 1.807121303615948e-06, "epoch": 0.2810319441777095, "percentage": 28.1, "elapsed_time": "0:17:50", "remaining_time": "0:45:37", "throughput": 12922.74, "total_tokens": 13828288} +{"current_steps": 4395, "total_steps": 15621, "loss": 0.4168, "lr": 1.8064611217189434e-06, "epoch": 0.28135202611868637, "percentage": 28.14, "elapsed_time": "0:17:50", "remaining_time": "0:45:35", "throughput": 12930.24, "total_tokens": 13845568} +{"current_steps": 4400, "total_steps": 15621, "loss": 0.3902, "lr": 1.8057999329457596e-06, "epoch": 0.28167210805966325, "percentage": 28.17, "elapsed_time": "0:17:51", "remaining_time": "0:45:32", "throughput": 12936.49, "total_tokens": 13860608} +{"current_steps": 4405, "total_steps": 15621, "loss": 0.5584, "lr": 1.8051377381218984e-06, "epoch": 0.2819921900006402, "percentage": 28.2, "elapsed_time": "0:17:52", "remaining_time": "0:45:29", "throughput": 12943.2, "total_tokens": 13876608} +{"current_steps": 4410, "total_steps": 15621, "loss": 0.5613, "lr": 1.8044745380741177e-06, "epoch": 0.28231227194161707, "percentage": 28.23, "elapsed_time": "0:17:52", "remaining_time": "0:45:27", "throughput": 12950.44, "total_tokens": 13893632} +{"current_steps": 4415, "total_steps": 15621, "loss": 0.3872, "lr": 1.8038103336304306e-06, "epoch": 0.28263235388259395, "percentage": 28.26, "elapsed_time": "0:17:53", "remaining_time": "0:45:24", "throughput": 12956.89, "total_tokens": 13909312} +{"current_steps": 4420, "total_steps": 15621, "loss": 0.5718, "lr": 1.8031451256201042e-06, "epoch": 0.2829524358235708, "percentage": 28.3, "elapsed_time": "0:17:54", "remaining_time": "0:45:22", "throughput": 12963.83, "total_tokens": 13925824} +{"current_steps": 4425, "total_steps": 15621, "loss": 0.5314, "lr": 1.8024789148736589e-06, "epoch": 0.2832725177645477, "percentage": 28.33, "elapsed_time": "0:17:54", "remaining_time": "0:45:19", "throughput": 12970.86, "total_tokens": 13942336} +{"current_steps": 4430, "total_steps": 15621, "loss": 0.4017, "lr": 1.8018117022228655e-06, "epoch": 0.28359259970552464, "percentage": 28.36, "elapsed_time": "0:17:55", "remaining_time": "0:45:17", "throughput": 12977.19, "total_tokens": 13957760} +{"current_steps": 4435, "total_steps": 15621, "loss": 0.5044, "lr": 1.8011434885007479e-06, "epoch": 0.2839126816465015, "percentage": 28.39, "elapsed_time": "0:17:56", "remaining_time": "0:45:14", "throughput": 12983.48, "total_tokens": 13972992} +{"current_steps": 4440, "total_steps": 15621, "loss": 0.4184, "lr": 1.8004742745415787e-06, "epoch": 0.2842327635874784, "percentage": 28.42, "elapsed_time": "0:17:56", "remaining_time": "0:45:11", "throughput": 12989.99, "total_tokens": 13988736} +{"current_steps": 4445, "total_steps": 15621, "loss": 0.5398, "lr": 1.799804061180879e-06, "epoch": 0.2845528455284553, "percentage": 28.46, "elapsed_time": "0:17:57", "remaining_time": "0:45:09", "throughput": 12995.86, "total_tokens": 14003520} +{"current_steps": 4450, "total_steps": 15621, "loss": 0.5016, "lr": 1.799132849255418e-06, "epoch": 0.28487292746943216, "percentage": 28.49, "elapsed_time": "0:17:58", "remaining_time": "0:45:06", "throughput": 13003.1, "total_tokens": 14020608} +{"current_steps": 4455, "total_steps": 15621, "loss": 0.4168, "lr": 1.798460639603212e-06, "epoch": 0.28519300941040904, "percentage": 28.52, "elapsed_time": "0:17:58", "remaining_time": "0:45:04", "throughput": 13008.96, "total_tokens": 14035328} +{"current_steps": 4460, "total_steps": 15621, "loss": 0.4799, "lr": 1.7977874330635224e-06, "epoch": 0.285513091351386, "percentage": 28.55, "elapsed_time": "0:17:59", "remaining_time": "0:45:01", "throughput": 13015.22, "total_tokens": 14050816} +{"current_steps": 4465, "total_steps": 15621, "loss": 0.3319, "lr": 1.7971132304768555e-06, "epoch": 0.28583317329236285, "percentage": 28.58, "elapsed_time": "0:18:00", "remaining_time": "0:44:59", "throughput": 13021.83, "total_tokens": 14066880} +{"current_steps": 4470, "total_steps": 15621, "loss": 0.5081, "lr": 1.7964380326849612e-06, "epoch": 0.28615325523333973, "percentage": 28.62, "elapsed_time": "0:18:00", "remaining_time": "0:44:56", "throughput": 13027.79, "total_tokens": 14081728} +{"current_steps": 4475, "total_steps": 15621, "loss": 0.4885, "lr": 1.795761840530832e-06, "epoch": 0.2864733371743166, "percentage": 28.65, "elapsed_time": "0:18:01", "remaining_time": "0:44:53", "throughput": 13034.55, "total_tokens": 14097984} +{"current_steps": 4480, "total_steps": 15621, "loss": 0.4186, "lr": 1.7950846548587015e-06, "epoch": 0.2867934191152935, "percentage": 28.68, "elapsed_time": "0:18:02", "remaining_time": "0:44:51", "throughput": 13041.92, "total_tokens": 14115264} +{"current_steps": 4485, "total_steps": 15621, "loss": 0.2815, "lr": 1.7944064765140445e-06, "epoch": 0.2871135010562704, "percentage": 28.71, "elapsed_time": "0:18:02", "remaining_time": "0:44:48", "throughput": 13047.47, "total_tokens": 14129472} +{"current_steps": 4490, "total_steps": 15621, "loss": 0.5567, "lr": 1.7937273063435735e-06, "epoch": 0.2874335829972473, "percentage": 28.74, "elapsed_time": "0:18:03", "remaining_time": "0:44:46", "throughput": 13053.7, "total_tokens": 14144896} +{"current_steps": 4495, "total_steps": 15621, "loss": 0.3559, "lr": 1.7930471451952416e-06, "epoch": 0.2877536649382242, "percentage": 28.78, "elapsed_time": "0:18:04", "remaining_time": "0:44:43", "throughput": 13059.6, "total_tokens": 14159744} +{"current_steps": 4500, "total_steps": 15621, "loss": 0.4935, "lr": 1.7923659939182377e-06, "epoch": 0.28807374687920106, "percentage": 28.81, "elapsed_time": "0:18:04", "remaining_time": "0:44:41", "throughput": 13066.55, "total_tokens": 14176384} +{"current_steps": 4505, "total_steps": 15621, "loss": 0.5441, "lr": 1.7916838533629866e-06, "epoch": 0.28839382882017794, "percentage": 28.84, "elapsed_time": "0:18:05", "remaining_time": "0:44:38", "throughput": 13073.08, "total_tokens": 14192320} +{"current_steps": 4510, "total_steps": 15621, "loss": 0.3929, "lr": 1.7910007243811493e-06, "epoch": 0.2887139107611549, "percentage": 28.87, "elapsed_time": "0:18:06", "remaining_time": "0:44:36", "throughput": 13079.5, "total_tokens": 14208192} +{"current_steps": 4515, "total_steps": 15621, "loss": 0.5737, "lr": 1.7903166078256202e-06, "epoch": 0.28903399270213176, "percentage": 28.9, "elapsed_time": "0:18:06", "remaining_time": "0:44:33", "throughput": 13085.41, "total_tokens": 14223104} +{"current_steps": 4520, "total_steps": 15621, "loss": 0.4223, "lr": 1.789631504550527e-06, "epoch": 0.28935407464310864, "percentage": 28.94, "elapsed_time": "0:18:07", "remaining_time": "0:44:31", "throughput": 13091.6, "total_tokens": 14238464} +{"current_steps": 4525, "total_steps": 15621, "loss": 0.3912, "lr": 1.7889454154112288e-06, "epoch": 0.2896741565840855, "percentage": 28.97, "elapsed_time": "0:18:08", "remaining_time": "0:44:28", "throughput": 13098.15, "total_tokens": 14254656} +{"current_steps": 4530, "total_steps": 15621, "loss": 0.3903, "lr": 1.7882583412643167e-06, "epoch": 0.2899942385250624, "percentage": 29.0, "elapsed_time": "0:18:08", "remaining_time": "0:44:26", "throughput": 13103.7, "total_tokens": 14268928} +{"current_steps": 4535, "total_steps": 15621, "loss": 0.4489, "lr": 1.78757028296761e-06, "epoch": 0.29031432046603933, "percentage": 29.03, "elapsed_time": "0:18:09", "remaining_time": "0:44:23", "throughput": 13110.73, "total_tokens": 14285952} +{"current_steps": 4540, "total_steps": 15621, "loss": 0.3513, "lr": 1.7868812413801582e-06, "epoch": 0.2906344024070162, "percentage": 29.06, "elapsed_time": "0:18:10", "remaining_time": "0:44:21", "throughput": 13117.19, "total_tokens": 14301760} +{"current_steps": 4545, "total_steps": 15621, "loss": 0.4985, "lr": 1.7861912173622372e-06, "epoch": 0.2909544843479931, "percentage": 29.1, "elapsed_time": "0:18:11", "remaining_time": "0:44:18", "throughput": 13123.89, "total_tokens": 14318208} +{"current_steps": 4550, "total_steps": 15621, "loss": 0.4537, "lr": 1.7855002117753504e-06, "epoch": 0.29127456628896997, "percentage": 29.13, "elapsed_time": "0:18:11", "remaining_time": "0:44:16", "throughput": 13130.35, "total_tokens": 14334144} +{"current_steps": 4555, "total_steps": 15621, "loss": 0.5489, "lr": 1.7848082254822266e-06, "epoch": 0.29159464822994685, "percentage": 29.16, "elapsed_time": "0:18:12", "remaining_time": "0:44:13", "throughput": 13136.2, "total_tokens": 14349120} +{"current_steps": 4560, "total_steps": 15621, "loss": 0.4957, "lr": 1.7841152593468185e-06, "epoch": 0.29191473017092373, "percentage": 29.19, "elapsed_time": "0:18:13", "remaining_time": "0:44:11", "throughput": 13142.84, "total_tokens": 14365376} +{"current_steps": 4565, "total_steps": 15621, "loss": 0.4636, "lr": 1.7834213142343026e-06, "epoch": 0.29223481211190067, "percentage": 29.22, "elapsed_time": "0:18:13", "remaining_time": "0:44:08", "throughput": 13149.39, "total_tokens": 14381568} +{"current_steps": 4570, "total_steps": 15621, "loss": 0.4752, "lr": 1.7827263910110777e-06, "epoch": 0.29255489405287755, "percentage": 29.26, "elapsed_time": "0:18:14", "remaining_time": "0:44:06", "throughput": 13155.66, "total_tokens": 14397312} +{"current_steps": 4575, "total_steps": 15621, "loss": 0.4631, "lr": 1.7820304905447632e-06, "epoch": 0.2928749759938544, "percentage": 29.29, "elapsed_time": "0:18:15", "remaining_time": "0:44:03", "throughput": 13161.89, "total_tokens": 14412928} +{"current_steps": 4580, "total_steps": 15621, "loss": 0.4515, "lr": 1.7813336137041991e-06, "epoch": 0.2931950579348313, "percentage": 29.32, "elapsed_time": "0:18:15", "remaining_time": "0:44:01", "throughput": 13167.73, "total_tokens": 14427968} +{"current_steps": 4585, "total_steps": 15621, "loss": 0.3591, "lr": 1.7806357613594447e-06, "epoch": 0.2935151398758082, "percentage": 29.35, "elapsed_time": "0:18:16", "remaining_time": "0:43:58", "throughput": 13173.59, "total_tokens": 14442944} +{"current_steps": 4590, "total_steps": 15621, "loss": 0.452, "lr": 1.7799369343817764e-06, "epoch": 0.2938352218167851, "percentage": 29.38, "elapsed_time": "0:18:17", "remaining_time": "0:43:56", "throughput": 13179.68, "total_tokens": 14458176} +{"current_steps": 4595, "total_steps": 15621, "loss": 0.3618, "lr": 1.7792371336436883e-06, "epoch": 0.294155303757762, "percentage": 29.42, "elapsed_time": "0:18:17", "remaining_time": "0:43:53", "throughput": 13185.74, "total_tokens": 14473600} +{"current_steps": 4600, "total_steps": 15621, "loss": 0.6561, "lr": 1.7785363600188892e-06, "epoch": 0.2944753856987389, "percentage": 29.45, "elapsed_time": "0:18:18", "remaining_time": "0:43:51", "throughput": 13191.72, "total_tokens": 14488896} +{"current_steps": 4605, "total_steps": 15621, "loss": 0.5982, "lr": 1.7778346143823038e-06, "epoch": 0.29479546763971576, "percentage": 29.48, "elapsed_time": "0:18:18", "remaining_time": "0:43:48", "throughput": 13196.9, "total_tokens": 14502784} +{"current_steps": 4610, "total_steps": 15621, "loss": 0.4353, "lr": 1.7771318976100696e-06, "epoch": 0.29511554958069264, "percentage": 29.51, "elapsed_time": "0:18:19", "remaining_time": "0:43:46", "throughput": 13204.0, "total_tokens": 14520000} +{"current_steps": 4615, "total_steps": 15621, "loss": 0.3531, "lr": 1.7764282105795364e-06, "epoch": 0.2954356315216696, "percentage": 29.54, "elapsed_time": "0:18:20", "remaining_time": "0:43:44", "throughput": 13210.51, "total_tokens": 14536320} +{"current_steps": 4620, "total_steps": 15621, "loss": 0.4688, "lr": 1.7757235541692663e-06, "epoch": 0.29575571346264645, "percentage": 29.58, "elapsed_time": "0:18:21", "remaining_time": "0:43:41", "throughput": 13216.62, "total_tokens": 14551808} +{"current_steps": 4625, "total_steps": 15621, "loss": 0.3106, "lr": 1.7750179292590306e-06, "epoch": 0.29607579540362333, "percentage": 29.61, "elapsed_time": "0:18:21", "remaining_time": "0:43:39", "throughput": 13222.51, "total_tokens": 14566976} +{"current_steps": 4630, "total_steps": 15621, "loss": 0.3511, "lr": 1.7743113367298107e-06, "epoch": 0.2963958773446002, "percentage": 29.64, "elapsed_time": "0:18:22", "remaining_time": "0:43:36", "throughput": 13228.89, "total_tokens": 14583104} +{"current_steps": 4635, "total_steps": 15621, "loss": 0.4515, "lr": 1.7736037774637955e-06, "epoch": 0.2967159592855771, "percentage": 29.67, "elapsed_time": "0:18:23", "remaining_time": "0:43:34", "throughput": 13234.85, "total_tokens": 14598336} +{"current_steps": 4640, "total_steps": 15621, "loss": 0.5141, "lr": 1.772895252344381e-06, "epoch": 0.29703604122655397, "percentage": 29.7, "elapsed_time": "0:18:23", "remaining_time": "0:43:32", "throughput": 13241.72, "total_tokens": 14615232} +{"current_steps": 4645, "total_steps": 15621, "loss": 0.388, "lr": 1.7721857622561692e-06, "epoch": 0.2973561231675309, "percentage": 29.74, "elapsed_time": "0:18:24", "remaining_time": "0:43:29", "throughput": 13247.85, "total_tokens": 14630848} +{"current_steps": 4650, "total_steps": 15621, "loss": 0.4668, "lr": 1.7714753080849664e-06, "epoch": 0.2976762051085078, "percentage": 29.77, "elapsed_time": "0:18:25", "remaining_time": "0:43:27", "throughput": 13254.3, "total_tokens": 14647040} +{"current_steps": 4655, "total_steps": 15621, "loss": 0.4196, "lr": 1.7707638907177837e-06, "epoch": 0.29799628704948466, "percentage": 29.8, "elapsed_time": "0:18:25", "remaining_time": "0:43:24", "throughput": 13259.98, "total_tokens": 14661888} +{"current_steps": 4660, "total_steps": 15621, "loss": 0.7015, "lr": 1.7700515110428336e-06, "epoch": 0.29831636899046154, "percentage": 29.83, "elapsed_time": "0:18:26", "remaining_time": "0:43:22", "throughput": 13266.19, "total_tokens": 14677696} +{"current_steps": 4665, "total_steps": 15621, "loss": 0.4795, "lr": 1.7693381699495307e-06, "epoch": 0.2986364509314384, "percentage": 29.86, "elapsed_time": "0:18:27", "remaining_time": "0:43:19", "throughput": 13272.28, "total_tokens": 14693184} +{"current_steps": 4670, "total_steps": 15621, "loss": 0.3712, "lr": 1.7686238683284894e-06, "epoch": 0.29895653287241536, "percentage": 29.9, "elapsed_time": "0:18:27", "remaining_time": "0:43:17", "throughput": 13277.93, "total_tokens": 14707904} +{"current_steps": 4675, "total_steps": 15621, "loss": 0.3553, "lr": 1.7679086070715237e-06, "epoch": 0.29927661481339224, "percentage": 29.93, "elapsed_time": "0:18:28", "remaining_time": "0:43:15", "throughput": 13284.33, "total_tokens": 14724096} +{"current_steps": 4680, "total_steps": 15621, "loss": 0.4575, "lr": 1.7671923870716459e-06, "epoch": 0.2995966967543691, "percentage": 29.96, "elapsed_time": "0:18:29", "remaining_time": "0:43:12", "throughput": 13289.83, "total_tokens": 14738752} +{"current_steps": 4685, "total_steps": 15621, "loss": 0.355, "lr": 1.7664752092230652e-06, "epoch": 0.299916778695346, "percentage": 29.99, "elapsed_time": "0:18:29", "remaining_time": "0:43:10", "throughput": 13295.51, "total_tokens": 14753664} +{"current_steps": 4690, "total_steps": 15621, "loss": 0.3708, "lr": 1.7657570744211863e-06, "epoch": 0.3002368606363229, "percentage": 30.02, "elapsed_time": "0:18:30", "remaining_time": "0:43:07", "throughput": 13301.57, "total_tokens": 14769152} +{"current_steps": 4692, "total_steps": 15621, "eval_loss": 0.46517089009284973, "epoch": 0.30036489341271366, "percentage": 30.04, "elapsed_time": "0:19:21", "remaining_time": "0:45:04", "throughput": 12724.3, "total_tokens": 14775488} +{"current_steps": 4695, "total_steps": 15621, "loss": 0.5088, "lr": 1.765037983562609e-06, "epoch": 0.3005569425772998, "percentage": 30.06, "elapsed_time": "0:19:57", "remaining_time": "0:46:27", "throughput": 12344.09, "total_tokens": 14784128} +{"current_steps": 4700, "total_steps": 15621, "loss": 0.4325, "lr": 1.7643179375451264e-06, "epoch": 0.3008770245182767, "percentage": 30.09, "elapsed_time": "0:19:58", "remaining_time": "0:46:24", "throughput": 12350.38, "total_tokens": 14799936} +{"current_steps": 4705, "total_steps": 15621, "loss": 0.6141, "lr": 1.7635969372677252e-06, "epoch": 0.30119710645925357, "percentage": 30.12, "elapsed_time": "0:19:58", "remaining_time": "0:46:21", "throughput": 12355.76, "total_tokens": 14814208} +{"current_steps": 4710, "total_steps": 15621, "loss": 0.4862, "lr": 1.7628749836305818e-06, "epoch": 0.30151718840023045, "percentage": 30.15, "elapsed_time": "0:19:59", "remaining_time": "0:46:19", "throughput": 12361.68, "total_tokens": 14829504} +{"current_steps": 4715, "total_steps": 15621, "loss": 0.4053, "lr": 1.7621520775350645e-06, "epoch": 0.30183727034120733, "percentage": 30.18, "elapsed_time": "0:20:00", "remaining_time": "0:46:16", "throughput": 12367.22, "total_tokens": 14843968} +{"current_steps": 4720, "total_steps": 15621, "loss": 0.4685, "lr": 1.7614282198837293e-06, "epoch": 0.30215735228218427, "percentage": 30.22, "elapsed_time": "0:20:00", "remaining_time": "0:46:13", "throughput": 12373.55, "total_tokens": 14859840} +{"current_steps": 4725, "total_steps": 15621, "loss": 0.4873, "lr": 1.7607034115803219e-06, "epoch": 0.30247743422316115, "percentage": 30.25, "elapsed_time": "0:20:01", "remaining_time": "0:46:10", "throughput": 12379.68, "total_tokens": 14875648} +{"current_steps": 4730, "total_steps": 15621, "loss": 0.4244, "lr": 1.7599776535297734e-06, "epoch": 0.302797516164138, "percentage": 30.28, "elapsed_time": "0:20:02", "remaining_time": "0:46:08", "throughput": 12385.35, "total_tokens": 14890560} +{"current_steps": 4735, "total_steps": 15621, "loss": 0.478, "lr": 1.7592509466382012e-06, "epoch": 0.3031175981051149, "percentage": 30.31, "elapsed_time": "0:20:02", "remaining_time": "0:46:05", "throughput": 12391.72, "total_tokens": 14906688} +{"current_steps": 4740, "total_steps": 15621, "loss": 0.5622, "lr": 1.7585232918129076e-06, "epoch": 0.3034376800460918, "percentage": 30.34, "elapsed_time": "0:20:03", "remaining_time": "0:46:03", "throughput": 12397.89, "total_tokens": 14922496} +{"current_steps": 4745, "total_steps": 15621, "loss": 0.4656, "lr": 1.757794689962378e-06, "epoch": 0.30375776198706866, "percentage": 30.38, "elapsed_time": "0:20:04", "remaining_time": "0:46:00", "throughput": 12404.31, "total_tokens": 14938880} +{"current_steps": 4750, "total_steps": 15621, "loss": 0.5035, "lr": 1.7570651419962807e-06, "epoch": 0.3040778439280456, "percentage": 30.41, "elapsed_time": "0:20:04", "remaining_time": "0:45:57", "throughput": 12410.14, "total_tokens": 14954112} +{"current_steps": 4755, "total_steps": 15621, "loss": 0.4471, "lr": 1.7563346488254647e-06, "epoch": 0.3043979258690225, "percentage": 30.44, "elapsed_time": "0:20:05", "remaining_time": "0:45:55", "throughput": 12416.12, "total_tokens": 14969536} +{"current_steps": 4760, "total_steps": 15621, "loss": 0.351, "lr": 1.755603211361959e-06, "epoch": 0.30471800780999936, "percentage": 30.47, "elapsed_time": "0:20:06", "remaining_time": "0:45:52", "throughput": 12422.47, "total_tokens": 14985728} +{"current_steps": 4765, "total_steps": 15621, "loss": 0.4522, "lr": 1.7548708305189722e-06, "epoch": 0.30503808975097624, "percentage": 30.5, "elapsed_time": "0:20:07", "remaining_time": "0:45:50", "throughput": 12429.89, "total_tokens": 15003904} +{"current_steps": 4770, "total_steps": 15621, "loss": 0.5752, "lr": 1.7541375072108905e-06, "epoch": 0.3053581716919531, "percentage": 30.54, "elapsed_time": "0:20:07", "remaining_time": "0:45:47", "throughput": 12435.83, "total_tokens": 15019328} +{"current_steps": 4775, "total_steps": 15621, "loss": 0.4732, "lr": 1.7534032423532766e-06, "epoch": 0.30567825363293005, "percentage": 30.57, "elapsed_time": "0:20:08", "remaining_time": "0:45:44", "throughput": 12441.26, "total_tokens": 15033856} +{"current_steps": 4780, "total_steps": 15621, "loss": 0.361, "lr": 1.7526680368628685e-06, "epoch": 0.30599833557390693, "percentage": 30.6, "elapsed_time": "0:20:09", "remaining_time": "0:45:42", "throughput": 12448.19, "total_tokens": 15051200} +{"current_steps": 4785, "total_steps": 15621, "loss": 0.4427, "lr": 1.751931891657579e-06, "epoch": 0.3063184175148838, "percentage": 30.63, "elapsed_time": "0:20:09", "remaining_time": "0:45:39", "throughput": 12453.98, "total_tokens": 15066368} +{"current_steps": 4790, "total_steps": 15621, "loss": 0.3568, "lr": 1.7511948076564943e-06, "epoch": 0.3066384994558607, "percentage": 30.66, "elapsed_time": "0:20:10", "remaining_time": "0:45:36", "throughput": 12459.76, "total_tokens": 15081600} +{"current_steps": 4795, "total_steps": 15621, "loss": 0.5404, "lr": 1.7504567857798722e-06, "epoch": 0.30695858139683757, "percentage": 30.7, "elapsed_time": "0:20:11", "remaining_time": "0:45:34", "throughput": 12465.92, "total_tokens": 15097536} +{"current_steps": 4800, "total_steps": 15621, "loss": 0.4943, "lr": 1.7497178269491417e-06, "epoch": 0.3072786633378145, "percentage": 30.73, "elapsed_time": "0:20:11", "remaining_time": "0:45:31", "throughput": 12472.22, "total_tokens": 15113728} +{"current_steps": 4805, "total_steps": 15621, "loss": 0.5532, "lr": 1.7489779320869014e-06, "epoch": 0.3075987452787914, "percentage": 30.76, "elapsed_time": "0:20:12", "remaining_time": "0:45:29", "throughput": 12478.59, "total_tokens": 15130048} +{"current_steps": 4810, "total_steps": 15621, "loss": 0.3715, "lr": 1.7482371021169193e-06, "epoch": 0.30791882721976827, "percentage": 30.79, "elapsed_time": "0:20:13", "remaining_time": "0:45:26", "throughput": 12484.55, "total_tokens": 15145600} +{"current_steps": 4815, "total_steps": 15621, "loss": 0.4077, "lr": 1.7474953379641297e-06, "epoch": 0.30823890916074514, "percentage": 30.82, "elapsed_time": "0:20:13", "remaining_time": "0:45:24", "throughput": 12491.12, "total_tokens": 15162368} +{"current_steps": 4820, "total_steps": 15621, "loss": 0.438, "lr": 1.746752640554634e-06, "epoch": 0.308558991101722, "percentage": 30.86, "elapsed_time": "0:20:14", "remaining_time": "0:45:21", "throughput": 12497.31, "total_tokens": 15178368} +{"current_steps": 4825, "total_steps": 15621, "loss": 0.5348, "lr": 1.7460090108156988e-06, "epoch": 0.3088790730426989, "percentage": 30.89, "elapsed_time": "0:20:15", "remaining_time": "0:45:18", "throughput": 12503.03, "total_tokens": 15193408} +{"current_steps": 4830, "total_steps": 15621, "loss": 0.3155, "lr": 1.7452644496757548e-06, "epoch": 0.30919915498367584, "percentage": 30.92, "elapsed_time": "0:20:15", "remaining_time": "0:45:16", "throughput": 12508.79, "total_tokens": 15208640} +{"current_steps": 4835, "total_steps": 15621, "loss": 0.4557, "lr": 1.7445189580643946e-06, "epoch": 0.3095192369246527, "percentage": 30.95, "elapsed_time": "0:20:16", "remaining_time": "0:45:13", "throughput": 12514.79, "total_tokens": 15224192} +{"current_steps": 4840, "total_steps": 15621, "loss": 0.5187, "lr": 1.7437725369123737e-06, "epoch": 0.3098393188656296, "percentage": 30.98, "elapsed_time": "0:20:17", "remaining_time": "0:45:11", "throughput": 12520.62, "total_tokens": 15239616} +{"current_steps": 4845, "total_steps": 15621, "loss": 0.4925, "lr": 1.7430251871516077e-06, "epoch": 0.3101594008066065, "percentage": 31.02, "elapsed_time": "0:20:17", "remaining_time": "0:45:08", "throughput": 12526.79, "total_tokens": 15255680} +{"current_steps": 4850, "total_steps": 15621, "loss": 0.5256, "lr": 1.7422769097151715e-06, "epoch": 0.31047948274758336, "percentage": 31.05, "elapsed_time": "0:20:18", "remaining_time": "0:45:06", "throughput": 12532.7, "total_tokens": 15271232} +{"current_steps": 4855, "total_steps": 15621, "loss": 0.5038, "lr": 1.7415277055372982e-06, "epoch": 0.3107995646885603, "percentage": 31.08, "elapsed_time": "0:20:19", "remaining_time": "0:45:03", "throughput": 12538.68, "total_tokens": 15287040} +{"current_steps": 4860, "total_steps": 15621, "loss": 0.5181, "lr": 1.7407775755533778e-06, "epoch": 0.31111964662953717, "percentage": 31.11, "elapsed_time": "0:20:19", "remaining_time": "0:45:01", "throughput": 12545.45, "total_tokens": 15304256} +{"current_steps": 4865, "total_steps": 15621, "loss": 0.364, "lr": 1.7400265206999568e-06, "epoch": 0.31143972857051405, "percentage": 31.14, "elapsed_time": "0:20:20", "remaining_time": "0:44:58", "throughput": 12552.51, "total_tokens": 15322112} +{"current_steps": 4870, "total_steps": 15621, "loss": 0.5297, "lr": 1.7392745419147362e-06, "epoch": 0.31175981051149093, "percentage": 31.18, "elapsed_time": "0:20:21", "remaining_time": "0:44:56", "throughput": 12558.18, "total_tokens": 15337216} +{"current_steps": 4875, "total_steps": 15621, "loss": 0.4478, "lr": 1.7385216401365693e-06, "epoch": 0.3120798924524678, "percentage": 31.21, "elapsed_time": "0:20:22", "remaining_time": "0:44:53", "throughput": 12564.67, "total_tokens": 15354048} +{"current_steps": 4880, "total_steps": 15621, "loss": 0.4964, "lr": 1.7377678163054638e-06, "epoch": 0.31239997439344475, "percentage": 31.24, "elapsed_time": "0:20:22", "remaining_time": "0:44:51", "throughput": 12570.34, "total_tokens": 15369344} +{"current_steps": 4885, "total_steps": 15621, "loss": 0.4864, "lr": 1.7370130713625775e-06, "epoch": 0.3127200563344216, "percentage": 31.27, "elapsed_time": "0:20:23", "remaining_time": "0:44:48", "throughput": 12576.74, "total_tokens": 15385920} +{"current_steps": 4890, "total_steps": 15621, "loss": 0.3948, "lr": 1.736257406250218e-06, "epoch": 0.3130401382753985, "percentage": 31.3, "elapsed_time": "0:20:24", "remaining_time": "0:44:46", "throughput": 12582.6, "total_tokens": 15401536} +{"current_steps": 4895, "total_steps": 15621, "loss": 0.4629, "lr": 1.735500821911842e-06, "epoch": 0.3133602202163754, "percentage": 31.34, "elapsed_time": "0:20:24", "remaining_time": "0:44:43", "throughput": 12588.46, "total_tokens": 15417152} +{"current_steps": 4900, "total_steps": 15621, "loss": 0.4961, "lr": 1.7347433192920544e-06, "epoch": 0.31368030215735226, "percentage": 31.37, "elapsed_time": "0:20:25", "remaining_time": "0:44:41", "throughput": 12593.87, "total_tokens": 15431872} +{"current_steps": 4905, "total_steps": 15621, "loss": 0.4021, "lr": 1.7339848993366056e-06, "epoch": 0.3140003840983292, "percentage": 31.4, "elapsed_time": "0:20:26", "remaining_time": "0:44:38", "throughput": 12599.82, "total_tokens": 15447552} +{"current_steps": 4910, "total_steps": 15621, "loss": 0.4667, "lr": 1.7332255629923922e-06, "epoch": 0.3143204660393061, "percentage": 31.43, "elapsed_time": "0:20:26", "remaining_time": "0:44:36", "throughput": 12606.27, "total_tokens": 15464384} +{"current_steps": 4915, "total_steps": 15621, "loss": 0.5038, "lr": 1.732465311207454e-06, "epoch": 0.31464054798028296, "percentage": 31.46, "elapsed_time": "0:20:27", "remaining_time": "0:44:33", "throughput": 12611.94, "total_tokens": 15479808} +{"current_steps": 4920, "total_steps": 15621, "loss": 0.5018, "lr": 1.731704144930975e-06, "epoch": 0.31496062992125984, "percentage": 31.5, "elapsed_time": "0:20:28", "remaining_time": "0:44:31", "throughput": 12618.28, "total_tokens": 15496512} +{"current_steps": 4925, "total_steps": 15621, "loss": 0.4137, "lr": 1.7309420651132797e-06, "epoch": 0.3152807118622367, "percentage": 31.53, "elapsed_time": "0:20:28", "remaining_time": "0:44:28", "throughput": 12624.51, "total_tokens": 15512896} +{"current_steps": 4930, "total_steps": 15621, "loss": 0.3295, "lr": 1.7301790727058343e-06, "epoch": 0.3156007938032136, "percentage": 31.56, "elapsed_time": "0:20:29", "remaining_time": "0:44:26", "throughput": 12630.12, "total_tokens": 15528064} +{"current_steps": 4935, "total_steps": 15621, "loss": 0.3593, "lr": 1.7294151686612431e-06, "epoch": 0.31592087574419053, "percentage": 31.59, "elapsed_time": "0:20:30", "remaining_time": "0:44:23", "throughput": 12635.81, "total_tokens": 15543424} +{"current_steps": 4940, "total_steps": 15621, "loss": 0.5778, "lr": 1.7286503539332495e-06, "epoch": 0.3162409576851674, "percentage": 31.62, "elapsed_time": "0:20:30", "remaining_time": "0:44:21", "throughput": 12642.13, "total_tokens": 15560192} +{"current_steps": 4945, "total_steps": 15621, "loss": 0.3873, "lr": 1.7278846294767337e-06, "epoch": 0.3165610396261443, "percentage": 31.66, "elapsed_time": "0:20:31", "remaining_time": "0:44:18", "throughput": 12648.02, "total_tokens": 15576128} +{"current_steps": 4950, "total_steps": 15621, "loss": 0.6923, "lr": 1.7271179962477118e-06, "epoch": 0.31688112156712117, "percentage": 31.69, "elapsed_time": "0:20:32", "remaining_time": "0:44:16", "throughput": 12654.2, "total_tokens": 15592576} +{"current_steps": 4955, "total_steps": 15621, "loss": 0.4372, "lr": 1.7263504552033341e-06, "epoch": 0.31720120350809805, "percentage": 31.72, "elapsed_time": "0:20:32", "remaining_time": "0:44:13", "throughput": 12659.68, "total_tokens": 15607744} +{"current_steps": 4960, "total_steps": 15621, "loss": 0.481, "lr": 1.725582007301885e-06, "epoch": 0.317521285449075, "percentage": 31.75, "elapsed_time": "0:20:33", "remaining_time": "0:44:11", "throughput": 12665.45, "total_tokens": 15623360} +{"current_steps": 4965, "total_steps": 15621, "loss": 0.4251, "lr": 1.7248126535027806e-06, "epoch": 0.31784136739005187, "percentage": 31.78, "elapsed_time": "0:20:34", "remaining_time": "0:44:08", "throughput": 12671.12, "total_tokens": 15638656} +{"current_steps": 4970, "total_steps": 15621, "loss": 0.4569, "lr": 1.7240423947665678e-06, "epoch": 0.31816144933102875, "percentage": 31.82, "elapsed_time": "0:20:34", "remaining_time": "0:44:06", "throughput": 12676.92, "total_tokens": 15654400} +{"current_steps": 4975, "total_steps": 15621, "loss": 0.3867, "lr": 1.723271232054924e-06, "epoch": 0.3184815312720056, "percentage": 31.85, "elapsed_time": "0:20:35", "remaining_time": "0:44:03", "throughput": 12682.64, "total_tokens": 15670016} +{"current_steps": 4980, "total_steps": 15621, "loss": 0.5265, "lr": 1.722499166330655e-06, "epoch": 0.3188016132129825, "percentage": 31.88, "elapsed_time": "0:20:36", "remaining_time": "0:44:01", "throughput": 12688.71, "total_tokens": 15686208} +{"current_steps": 4985, "total_steps": 15621, "loss": 0.443, "lr": 1.7217261985576936e-06, "epoch": 0.31912169515395944, "percentage": 31.91, "elapsed_time": "0:20:36", "remaining_time": "0:43:59", "throughput": 12694.79, "total_tokens": 15702592} +{"current_steps": 4990, "total_steps": 15621, "loss": 0.5114, "lr": 1.7209523297010992e-06, "epoch": 0.3194417770949363, "percentage": 31.94, "elapsed_time": "0:20:37", "remaining_time": "0:43:56", "throughput": 12700.26, "total_tokens": 15717696} +{"current_steps": 4995, "total_steps": 15621, "loss": 0.4619, "lr": 1.7201775607270564e-06, "epoch": 0.3197618590359132, "percentage": 31.98, "elapsed_time": "0:20:38", "remaining_time": "0:43:54", "throughput": 12705.92, "total_tokens": 15733184} +{"current_steps": 5000, "total_steps": 15621, "loss": 0.5318, "lr": 1.7194018926028733e-06, "epoch": 0.3200819409768901, "percentage": 32.01, "elapsed_time": "0:20:38", "remaining_time": "0:43:51", "throughput": 12712.09, "total_tokens": 15749888} +{"current_steps": 5005, "total_steps": 15621, "loss": 0.3622, "lr": 1.7186253262969803e-06, "epoch": 0.32040202291786696, "percentage": 32.04, "elapsed_time": "0:20:39", "remaining_time": "0:43:49", "throughput": 12719.2, "total_tokens": 15768384} +{"current_steps": 5010, "total_steps": 15621, "loss": 0.3291, "lr": 1.7178478627789299e-06, "epoch": 0.32072210485884384, "percentage": 32.07, "elapsed_time": "0:20:40", "remaining_time": "0:43:47", "throughput": 12725.07, "total_tokens": 15784448} +{"current_steps": 5015, "total_steps": 15621, "loss": 0.4122, "lr": 1.7170695030193944e-06, "epoch": 0.3210421867998208, "percentage": 32.1, "elapsed_time": "0:20:41", "remaining_time": "0:43:44", "throughput": 12730.91, "total_tokens": 15800512} +{"current_steps": 5020, "total_steps": 15621, "loss": 0.4778, "lr": 1.716290247990165e-06, "epoch": 0.32136226874079765, "percentage": 32.14, "elapsed_time": "0:20:41", "remaining_time": "0:43:42", "throughput": 12736.32, "total_tokens": 15815680} +{"current_steps": 5025, "total_steps": 15621, "loss": 0.3896, "lr": 1.715510098664151e-06, "epoch": 0.32168235068177453, "percentage": 32.17, "elapsed_time": "0:20:42", "remaining_time": "0:43:39", "throughput": 12741.58, "total_tokens": 15830528} +{"current_steps": 5030, "total_steps": 15621, "loss": 0.5141, "lr": 1.7147290560153777e-06, "epoch": 0.3220024326227514, "percentage": 32.2, "elapsed_time": "0:20:43", "remaining_time": "0:43:37", "throughput": 12746.91, "total_tokens": 15845568} +{"current_steps": 5035, "total_steps": 15621, "loss": 0.447, "lr": 1.7139471210189862e-06, "epoch": 0.3223225145637283, "percentage": 32.23, "elapsed_time": "0:20:43", "remaining_time": "0:43:35", "throughput": 12752.8, "total_tokens": 15861632} +{"current_steps": 5040, "total_steps": 15621, "loss": 0.543, "lr": 1.7131642946512312e-06, "epoch": 0.3226425965047052, "percentage": 32.26, "elapsed_time": "0:20:44", "remaining_time": "0:43:32", "throughput": 12758.62, "total_tokens": 15877632} +{"current_steps": 5045, "total_steps": 15621, "loss": 0.3918, "lr": 1.712380577889481e-06, "epoch": 0.3229626784456821, "percentage": 32.3, "elapsed_time": "0:20:45", "remaining_time": "0:43:30", "throughput": 12764.24, "total_tokens": 15893184} +{"current_steps": 5050, "total_steps": 15621, "loss": 0.3963, "lr": 1.711595971712215e-06, "epoch": 0.323282760386659, "percentage": 32.33, "elapsed_time": "0:20:45", "remaining_time": "0:43:27", "throughput": 12769.68, "total_tokens": 15908416} +{"current_steps": 5055, "total_steps": 15621, "loss": 0.4042, "lr": 1.7108104770990234e-06, "epoch": 0.32360284232763586, "percentage": 32.36, "elapsed_time": "0:20:46", "remaining_time": "0:43:25", "throughput": 12775.38, "total_tokens": 15924224} +{"current_steps": 5060, "total_steps": 15621, "loss": 0.254, "lr": 1.7100240950306052e-06, "epoch": 0.32392292426861274, "percentage": 32.39, "elapsed_time": "0:20:47", "remaining_time": "0:43:23", "throughput": 12781.1, "total_tokens": 15940032} +{"current_steps": 5065, "total_steps": 15621, "loss": 0.4647, "lr": 1.7092368264887677e-06, "epoch": 0.3242430062095897, "percentage": 32.42, "elapsed_time": "0:20:47", "remaining_time": "0:43:20", "throughput": 12786.38, "total_tokens": 15954944} +{"current_steps": 5070, "total_steps": 15621, "loss": 0.4846, "lr": 1.7084486724564252e-06, "epoch": 0.32456308815056656, "percentage": 32.46, "elapsed_time": "0:20:48", "remaining_time": "0:43:18", "throughput": 12792.07, "total_tokens": 15970624} +{"current_steps": 5075, "total_steps": 15621, "loss": 0.4092, "lr": 1.707659633917597e-06, "epoch": 0.32488317009154344, "percentage": 32.49, "elapsed_time": "0:20:49", "remaining_time": "0:43:15", "throughput": 12797.9, "total_tokens": 15986688} +{"current_steps": 5080, "total_steps": 15621, "loss": 0.4098, "lr": 1.7068697118574064e-06, "epoch": 0.3252032520325203, "percentage": 32.52, "elapsed_time": "0:20:49", "remaining_time": "0:43:13", "throughput": 12803.71, "total_tokens": 16002752} +{"current_steps": 5085, "total_steps": 15621, "loss": 0.4931, "lr": 1.7060789072620816e-06, "epoch": 0.3255233339734972, "percentage": 32.55, "elapsed_time": "0:20:50", "remaining_time": "0:43:11", "throughput": 12809.24, "total_tokens": 16018112} +{"current_steps": 5090, "total_steps": 15621, "loss": 0.4288, "lr": 1.7052872211189509e-06, "epoch": 0.32584341591447413, "percentage": 32.58, "elapsed_time": "0:20:51", "remaining_time": "0:43:08", "throughput": 12814.94, "total_tokens": 16033984} +{"current_steps": 5095, "total_steps": 15621, "loss": 0.3304, "lr": 1.7044946544164431e-06, "epoch": 0.326163497855451, "percentage": 32.62, "elapsed_time": "0:20:51", "remaining_time": "0:43:06", "throughput": 12820.44, "total_tokens": 16049536} +{"current_steps": 5100, "total_steps": 15621, "loss": 0.3713, "lr": 1.703701208144088e-06, "epoch": 0.3264835797964279, "percentage": 32.65, "elapsed_time": "0:20:52", "remaining_time": "0:43:03", "throughput": 12826.58, "total_tokens": 16066304} +{"current_steps": 5105, "total_steps": 15621, "loss": 0.4829, "lr": 1.702906883292512e-06, "epoch": 0.32680366173740477, "percentage": 32.68, "elapsed_time": "0:20:53", "remaining_time": "0:43:01", "throughput": 12831.9, "total_tokens": 16081536} +{"current_steps": 5110, "total_steps": 15621, "loss": 0.5586, "lr": 1.7021116808534393e-06, "epoch": 0.32712374367838165, "percentage": 32.71, "elapsed_time": "0:20:53", "remaining_time": "0:42:59", "throughput": 12837.33, "total_tokens": 16096896} +{"current_steps": 5115, "total_steps": 15621, "loss": 0.443, "lr": 1.7013156018196893e-06, "epoch": 0.32744382561935853, "percentage": 32.74, "elapsed_time": "0:20:54", "remaining_time": "0:42:56", "throughput": 12843.11, "total_tokens": 16112960} +{"current_steps": 5120, "total_steps": 15621, "loss": 0.4038, "lr": 1.7005186471851759e-06, "epoch": 0.32776390756033547, "percentage": 32.78, "elapsed_time": "0:20:55", "remaining_time": "0:42:54", "throughput": 12849.05, "total_tokens": 16129344} +{"current_steps": 5125, "total_steps": 15621, "loss": 0.6052, "lr": 1.6997208179449066e-06, "epoch": 0.32808398950131235, "percentage": 32.81, "elapsed_time": "0:20:56", "remaining_time": "0:42:52", "throughput": 12855.99, "total_tokens": 16147776} +{"current_steps": 5130, "total_steps": 15621, "loss": 0.3508, "lr": 1.6989221150949806e-06, "epoch": 0.3284040714422892, "percentage": 32.84, "elapsed_time": "0:20:56", "remaining_time": "0:42:50", "throughput": 12861.22, "total_tokens": 16162880} +{"current_steps": 5135, "total_steps": 15621, "loss": 0.2676, "lr": 1.6981225396325873e-06, "epoch": 0.3287241533832661, "percentage": 32.87, "elapsed_time": "0:20:57", "remaining_time": "0:42:47", "throughput": 12867.23, "total_tokens": 16179392} +{"current_steps": 5140, "total_steps": 15621, "loss": 0.504, "lr": 1.6973220925560067e-06, "epoch": 0.329044235324243, "percentage": 32.9, "elapsed_time": "0:20:58", "remaining_time": "0:42:45", "throughput": 12872.52, "total_tokens": 16194560} +{"current_steps": 5145, "total_steps": 15621, "loss": 0.4243, "lr": 1.696520774864606e-06, "epoch": 0.3293643172652199, "percentage": 32.94, "elapsed_time": "0:20:58", "remaining_time": "0:42:42", "throughput": 12878.07, "total_tokens": 16210112} +{"current_steps": 5150, "total_steps": 15621, "loss": 0.464, "lr": 1.69571858755884e-06, "epoch": 0.3296843992061968, "percentage": 32.97, "elapsed_time": "0:20:59", "remaining_time": "0:42:40", "throughput": 12883.72, "total_tokens": 16225856} +{"current_steps": 5155, "total_steps": 15621, "loss": 0.4314, "lr": 1.6949155316402487e-06, "epoch": 0.3300044811471737, "percentage": 33.0, "elapsed_time": "0:21:00", "remaining_time": "0:42:38", "throughput": 12889.17, "total_tokens": 16241536} +{"current_steps": 5160, "total_steps": 15621, "loss": 0.3807, "lr": 1.6941116081114566e-06, "epoch": 0.33032456308815056, "percentage": 33.03, "elapsed_time": "0:21:00", "remaining_time": "0:42:35", "throughput": 12894.31, "total_tokens": 16256384} +{"current_steps": 5165, "total_steps": 15621, "loss": 0.398, "lr": 1.6933068179761722e-06, "epoch": 0.33064464502912744, "percentage": 33.06, "elapsed_time": "0:21:01", "remaining_time": "0:42:33", "throughput": 12899.54, "total_tokens": 16271360} +{"current_steps": 5170, "total_steps": 15621, "loss": 0.4122, "lr": 1.6925011622391857e-06, "epoch": 0.3309647269701044, "percentage": 33.1, "elapsed_time": "0:21:02", "remaining_time": "0:42:31", "throughput": 12904.89, "total_tokens": 16286656} +{"current_steps": 5175, "total_steps": 15621, "loss": 0.4255, "lr": 1.6916946419063667e-06, "epoch": 0.33128480891108125, "percentage": 33.13, "elapsed_time": "0:21:02", "remaining_time": "0:42:28", "throughput": 12910.54, "total_tokens": 16302592} +{"current_steps": 5180, "total_steps": 15621, "loss": 0.5442, "lr": 1.690887257984666e-06, "epoch": 0.33160489085205813, "percentage": 33.16, "elapsed_time": "0:21:03", "remaining_time": "0:42:26", "throughput": 12916.22, "total_tokens": 16318656} +{"current_steps": 5185, "total_steps": 15621, "loss": 0.4755, "lr": 1.690079011482112e-06, "epoch": 0.331924972793035, "percentage": 33.19, "elapsed_time": "0:21:04", "remaining_time": "0:42:24", "throughput": 12921.68, "total_tokens": 16334016} +{"current_steps": 5190, "total_steps": 15621, "loss": 0.5287, "lr": 1.6892699034078096e-06, "epoch": 0.3322450547340119, "percentage": 33.22, "elapsed_time": "0:21:04", "remaining_time": "0:42:21", "throughput": 12927.34, "total_tokens": 16349888} +{"current_steps": 5195, "total_steps": 15621, "loss": 0.503, "lr": 1.68845993477194e-06, "epoch": 0.33256513667498877, "percentage": 33.26, "elapsed_time": "0:21:05", "remaining_time": "0:42:19", "throughput": 12932.62, "total_tokens": 16365056} +{"current_steps": 5200, "total_steps": 15621, "loss": 0.3973, "lr": 1.6876491065857584e-06, "epoch": 0.3328852186159657, "percentage": 33.29, "elapsed_time": "0:21:06", "remaining_time": "0:42:17", "throughput": 12937.77, "total_tokens": 16380032} +{"current_steps": 5205, "total_steps": 15621, "loss": 0.6461, "lr": 1.6868374198615928e-06, "epoch": 0.3332053005569426, "percentage": 33.32, "elapsed_time": "0:21:06", "remaining_time": "0:42:14", "throughput": 12942.8, "total_tokens": 16394752} +{"current_steps": 5210, "total_steps": 15621, "loss": 0.4714, "lr": 1.6860248756128448e-06, "epoch": 0.33352538249791946, "percentage": 33.35, "elapsed_time": "0:21:07", "remaining_time": "0:42:12", "throughput": 12948.25, "total_tokens": 16410368} +{"current_steps": 5215, "total_steps": 15621, "loss": 0.4142, "lr": 1.6852114748539844e-06, "epoch": 0.33384546443889634, "percentage": 33.38, "elapsed_time": "0:21:08", "remaining_time": "0:42:10", "throughput": 12953.24, "total_tokens": 16425088} +{"current_steps": 5220, "total_steps": 15621, "loss": 0.3446, "lr": 1.6843972186005525e-06, "epoch": 0.3341655463798732, "percentage": 33.42, "elapsed_time": "0:21:08", "remaining_time": "0:42:07", "throughput": 12958.95, "total_tokens": 16441152} +{"current_steps": 5225, "total_steps": 15621, "loss": 0.4705, "lr": 1.6835821078691577e-06, "epoch": 0.33448562832085016, "percentage": 33.45, "elapsed_time": "0:21:09", "remaining_time": "0:42:05", "throughput": 12965.13, "total_tokens": 16458240} +{"current_steps": 5230, "total_steps": 15621, "loss": 0.4342, "lr": 1.6827661436774746e-06, "epoch": 0.33480571026182704, "percentage": 33.48, "elapsed_time": "0:21:10", "remaining_time": "0:42:03", "throughput": 12970.73, "total_tokens": 16474112} +{"current_steps": 5235, "total_steps": 15621, "loss": 0.3957, "lr": 1.681949327044245e-06, "epoch": 0.3351257922028039, "percentage": 33.51, "elapsed_time": "0:21:10", "remaining_time": "0:42:01", "throughput": 12976.62, "total_tokens": 16490560} +{"current_steps": 5240, "total_steps": 15621, "loss": 0.6821, "lr": 1.6811316589892734e-06, "epoch": 0.3354458741437808, "percentage": 33.54, "elapsed_time": "0:21:11", "remaining_time": "0:41:58", "throughput": 12981.82, "total_tokens": 16505728} +{"current_steps": 5245, "total_steps": 15621, "loss": 0.4364, "lr": 1.6803131405334284e-06, "epoch": 0.3357659560847577, "percentage": 33.58, "elapsed_time": "0:21:12", "remaining_time": "0:41:56", "throughput": 12987.54, "total_tokens": 16521856} +{"current_steps": 5250, "total_steps": 15621, "loss": 0.4436, "lr": 1.6794937726986396e-06, "epoch": 0.3360860380257346, "percentage": 33.61, "elapsed_time": "0:21:12", "remaining_time": "0:41:54", "throughput": 12993.16, "total_tokens": 16537792} +{"current_steps": 5255, "total_steps": 15621, "loss": 0.4347, "lr": 1.6786735565078974e-06, "epoch": 0.3364061199667115, "percentage": 33.64, "elapsed_time": "0:21:13", "remaining_time": "0:41:52", "throughput": 12998.56, "total_tokens": 16553408} +{"current_steps": 5260, "total_steps": 15621, "loss": 0.4233, "lr": 1.677852492985251e-06, "epoch": 0.33672620190768837, "percentage": 33.67, "elapsed_time": "0:21:14", "remaining_time": "0:41:49", "throughput": 13004.51, "total_tokens": 16570112} +{"current_steps": 5265, "total_steps": 15621, "loss": 0.5003, "lr": 1.6770305831558086e-06, "epoch": 0.33704628384866525, "percentage": 33.7, "elapsed_time": "0:21:14", "remaining_time": "0:41:47", "throughput": 13010.15, "total_tokens": 16586304} +{"current_steps": 5270, "total_steps": 15621, "loss": 0.3912, "lr": 1.6762078280457342e-06, "epoch": 0.33736636578964213, "percentage": 33.74, "elapsed_time": "0:21:15", "remaining_time": "0:41:45", "throughput": 13015.48, "total_tokens": 16601920} +{"current_steps": 5275, "total_steps": 15621, "loss": 0.4725, "lr": 1.6753842286822465e-06, "epoch": 0.33768644773061907, "percentage": 33.77, "elapsed_time": "0:21:16", "remaining_time": "0:41:43", "throughput": 13021.26, "total_tokens": 16618240} +{"current_steps": 5280, "total_steps": 15621, "loss": 0.5845, "lr": 1.6745597860936199e-06, "epoch": 0.33800652967159595, "percentage": 33.8, "elapsed_time": "0:21:16", "remaining_time": "0:41:40", "throughput": 13026.45, "total_tokens": 16633408} +{"current_steps": 5285, "total_steps": 15621, "loss": 0.4484, "lr": 1.6737345013091794e-06, "epoch": 0.3383266116125728, "percentage": 33.83, "elapsed_time": "0:21:17", "remaining_time": "0:41:38", "throughput": 13032.13, "total_tokens": 16649664} +{"current_steps": 5290, "total_steps": 15621, "loss": 0.4686, "lr": 1.672908375359304e-06, "epoch": 0.3386466935535497, "percentage": 33.86, "elapsed_time": "0:21:18", "remaining_time": "0:41:36", "throughput": 13037.32, "total_tokens": 16664896} +{"current_steps": 5295, "total_steps": 15621, "loss": 0.5565, "lr": 1.6720814092754209e-06, "epoch": 0.3389667754945266, "percentage": 33.9, "elapsed_time": "0:21:18", "remaining_time": "0:41:34", "throughput": 13042.58, "total_tokens": 16680384} +{"current_steps": 5300, "total_steps": 15621, "loss": 0.3785, "lr": 1.6712536040900075e-06, "epoch": 0.33928685743550346, "percentage": 33.93, "elapsed_time": "0:21:19", "remaining_time": "0:41:31", "throughput": 13048.1, "total_tokens": 16696192} +{"current_steps": 5305, "total_steps": 15621, "loss": 0.4741, "lr": 1.6704249608365878e-06, "epoch": 0.3396069393764804, "percentage": 33.96, "elapsed_time": "0:21:20", "remaining_time": "0:41:30", "throughput": 13059.68, "total_tokens": 16727104} +{"current_steps": 5310, "total_steps": 15621, "loss": 0.4291, "lr": 1.669595480549733e-06, "epoch": 0.3399270213174573, "percentage": 33.99, "elapsed_time": "0:21:21", "remaining_time": "0:41:28", "throughput": 13064.58, "total_tokens": 16741696} +{"current_steps": 5315, "total_steps": 15621, "loss": 0.4384, "lr": 1.6687651642650587e-06, "epoch": 0.34024710325843416, "percentage": 34.02, "elapsed_time": "0:21:22", "remaining_time": "0:41:26", "throughput": 13069.84, "total_tokens": 16757120} +{"current_steps": 5320, "total_steps": 15621, "loss": 0.4572, "lr": 1.6679340130192245e-06, "epoch": 0.34056718519941104, "percentage": 34.06, "elapsed_time": "0:21:22", "remaining_time": "0:41:23", "throughput": 13074.97, "total_tokens": 16772416} +{"current_steps": 5325, "total_steps": 15621, "loss": 0.3287, "lr": 1.667102027849933e-06, "epoch": 0.3408872671403879, "percentage": 34.09, "elapsed_time": "0:21:23", "remaining_time": "0:41:21", "throughput": 13080.51, "total_tokens": 16788352} +{"current_steps": 5330, "total_steps": 15621, "loss": 0.3582, "lr": 1.6662692097959266e-06, "epoch": 0.34120734908136485, "percentage": 34.12, "elapsed_time": "0:21:24", "remaining_time": "0:41:19", "throughput": 13085.68, "total_tokens": 16803648} +{"current_steps": 5335, "total_steps": 15621, "loss": 0.4741, "lr": 1.6654355598969894e-06, "epoch": 0.34152743102234173, "percentage": 34.15, "elapsed_time": "0:21:24", "remaining_time": "0:41:17", "throughput": 13090.86, "total_tokens": 16818944} +{"current_steps": 5340, "total_steps": 15621, "loss": 0.5007, "lr": 1.6646010791939423e-06, "epoch": 0.3418475129633186, "percentage": 34.18, "elapsed_time": "0:21:25", "remaining_time": "0:41:14", "throughput": 13095.91, "total_tokens": 16833984} +{"current_steps": 5345, "total_steps": 15621, "loss": 0.5632, "lr": 1.6637657687286446e-06, "epoch": 0.3421675949042955, "percentage": 34.22, "elapsed_time": "0:21:26", "remaining_time": "0:41:12", "throughput": 13101.16, "total_tokens": 16849280} +{"current_steps": 5350, "total_steps": 15621, "loss": 0.4051, "lr": 1.6629296295439912e-06, "epoch": 0.34248767684527237, "percentage": 34.25, "elapsed_time": "0:21:26", "remaining_time": "0:41:10", "throughput": 13106.78, "total_tokens": 16865664} +{"current_steps": 5355, "total_steps": 15621, "loss": 0.4945, "lr": 1.6620926626839116e-06, "epoch": 0.3428077587862493, "percentage": 34.28, "elapsed_time": "0:21:27", "remaining_time": "0:41:08", "throughput": 13112.12, "total_tokens": 16881536} +{"current_steps": 5360, "total_steps": 15621, "loss": 0.4456, "lr": 1.661254869193369e-06, "epoch": 0.3431278407272262, "percentage": 34.31, "elapsed_time": "0:21:28", "remaining_time": "0:41:06", "throughput": 13118.21, "total_tokens": 16898816} +{"current_steps": 5365, "total_steps": 15621, "loss": 0.5174, "lr": 1.6604162501183581e-06, "epoch": 0.34344792266820307, "percentage": 34.34, "elapsed_time": "0:21:28", "remaining_time": "0:41:03", "throughput": 13123.85, "total_tokens": 16915136} +{"current_steps": 5370, "total_steps": 15621, "loss": 0.4742, "lr": 1.6595768065059045e-06, "epoch": 0.34376800460917994, "percentage": 34.38, "elapsed_time": "0:21:29", "remaining_time": "0:41:01", "throughput": 13129.3, "total_tokens": 16931200} +{"current_steps": 5375, "total_steps": 15621, "loss": 0.4691, "lr": 1.6587365394040641e-06, "epoch": 0.3440880865501568, "percentage": 34.41, "elapsed_time": "0:21:30", "remaining_time": "0:40:59", "throughput": 13134.52, "total_tokens": 16946816} +{"current_steps": 5380, "total_steps": 15621, "loss": 0.3826, "lr": 1.6578954498619195e-06, "epoch": 0.3444081684911337, "percentage": 34.44, "elapsed_time": "0:21:30", "remaining_time": "0:40:57", "throughput": 13139.98, "total_tokens": 16962880} +{"current_steps": 5385, "total_steps": 15621, "loss": 0.4712, "lr": 1.6570535389295814e-06, "epoch": 0.34472825043211064, "percentage": 34.47, "elapsed_time": "0:21:31", "remaining_time": "0:40:55", "throughput": 13145.16, "total_tokens": 16978240} +{"current_steps": 5390, "total_steps": 15621, "loss": 0.3684, "lr": 1.6562108076581853e-06, "epoch": 0.3450483323730875, "percentage": 34.5, "elapsed_time": "0:21:32", "remaining_time": "0:40:52", "throughput": 13150.39, "total_tokens": 16993728} +{"current_steps": 5395, "total_steps": 15621, "loss": 0.5846, "lr": 1.6553672570998912e-06, "epoch": 0.3453684143140644, "percentage": 34.54, "elapsed_time": "0:21:32", "remaining_time": "0:40:50", "throughput": 13155.78, "total_tokens": 17009728} +{"current_steps": 5400, "total_steps": 15621, "loss": 0.414, "lr": 1.6545228883078815e-06, "epoch": 0.3456884962550413, "percentage": 34.57, "elapsed_time": "0:21:33", "remaining_time": "0:40:48", "throughput": 13160.71, "total_tokens": 17024640} +{"current_steps": 5405, "total_steps": 15621, "loss": 0.36, "lr": 1.653677702336361e-06, "epoch": 0.34600857819601816, "percentage": 34.6, "elapsed_time": "0:21:34", "remaining_time": "0:40:46", "throughput": 13166.05, "total_tokens": 17040512} +{"current_steps": 5410, "total_steps": 15621, "loss": 0.4801, "lr": 1.6528317002405538e-06, "epoch": 0.3463286601369951, "percentage": 34.63, "elapsed_time": "0:21:34", "remaining_time": "0:40:44", "throughput": 13171.28, "total_tokens": 17056064} +{"current_steps": 5415, "total_steps": 15621, "loss": 0.3685, "lr": 1.6519848830767043e-06, "epoch": 0.34664874207797197, "percentage": 34.66, "elapsed_time": "0:21:35", "remaining_time": "0:40:41", "throughput": 13176.92, "total_tokens": 17072448} +{"current_steps": 5420, "total_steps": 15621, "loss": 0.6228, "lr": 1.6511372519020726e-06, "epoch": 0.34696882401894885, "percentage": 34.7, "elapsed_time": "0:21:36", "remaining_time": "0:40:39", "throughput": 13182.24, "total_tokens": 17088320} +{"current_steps": 5425, "total_steps": 15621, "loss": 0.4376, "lr": 1.650288807774937e-06, "epoch": 0.34728890595992573, "percentage": 34.73, "elapsed_time": "0:21:36", "remaining_time": "0:40:37", "throughput": 13187.73, "total_tokens": 17104448} +{"current_steps": 5430, "total_steps": 15621, "loss": 0.3981, "lr": 1.6494395517545893e-06, "epoch": 0.3476089879009026, "percentage": 34.76, "elapsed_time": "0:21:37", "remaining_time": "0:40:35", "throughput": 13193.85, "total_tokens": 17121856} +{"current_steps": 5435, "total_steps": 15621, "loss": 0.5135, "lr": 1.6485894849013362e-06, "epoch": 0.34792906984187955, "percentage": 34.79, "elapsed_time": "0:21:38", "remaining_time": "0:40:33", "throughput": 13198.65, "total_tokens": 17136512} +{"current_steps": 5440, "total_steps": 15621, "loss": 0.4487, "lr": 1.6477386082764961e-06, "epoch": 0.3482491517828564, "percentage": 34.82, "elapsed_time": "0:21:39", "remaining_time": "0:40:31", "throughput": 13204.15, "total_tokens": 17152640} +{"current_steps": 5445, "total_steps": 15621, "loss": 0.3645, "lr": 1.6468869229423983e-06, "epoch": 0.3485692337238333, "percentage": 34.86, "elapsed_time": "0:21:39", "remaining_time": "0:40:28", "throughput": 13209.06, "total_tokens": 17167680} +{"current_steps": 5450, "total_steps": 15621, "loss": 0.6431, "lr": 1.6460344299623813e-06, "epoch": 0.3488893156648102, "percentage": 34.89, "elapsed_time": "0:21:40", "remaining_time": "0:40:26", "throughput": 13214.22, "total_tokens": 17183296} +{"current_steps": 5455, "total_steps": 15621, "loss": 0.5412, "lr": 1.6451811304007939e-06, "epoch": 0.34920939760578706, "percentage": 34.92, "elapsed_time": "0:21:41", "remaining_time": "0:40:24", "throughput": 13219.14, "total_tokens": 17198272} +{"current_steps": 5460, "total_steps": 15621, "loss": 0.5194, "lr": 1.6443270253229895e-06, "epoch": 0.349529479546764, "percentage": 34.95, "elapsed_time": "0:21:41", "remaining_time": "0:40:22", "throughput": 13224.16, "total_tokens": 17213376} +{"current_steps": 5465, "total_steps": 15621, "loss": 0.4614, "lr": 1.6434721157953288e-06, "epoch": 0.3498495614877409, "percentage": 34.98, "elapsed_time": "0:21:42", "remaining_time": "0:40:20", "throughput": 13229.71, "total_tokens": 17229632} +{"current_steps": 5470, "total_steps": 15621, "loss": 0.5873, "lr": 1.6426164028851765e-06, "epoch": 0.35016964342871776, "percentage": 35.02, "elapsed_time": "0:21:43", "remaining_time": "0:40:18", "throughput": 13235.12, "total_tokens": 17245696} +{"current_steps": 5474, "total_steps": 15621, "eval_loss": 0.44318872690200806, "epoch": 0.3504257089814993, "percentage": 35.04, "elapsed_time": "0:22:34", "remaining_time": "0:41:50", "throughput": 12745.62, "total_tokens": 17259840} +{"current_steps": 5475, "total_steps": 15621, "loss": 0.3797, "lr": 1.6417598876609002e-06, "epoch": 0.35048972536969464, "percentage": 35.05, "elapsed_time": "0:23:13", "remaining_time": "0:43:02", "throughput": 12387.84, "total_tokens": 17262976} +{"current_steps": 5480, "total_steps": 15621, "loss": 0.4144, "lr": 1.640902571191869e-06, "epoch": 0.3508098073106715, "percentage": 35.08, "elapsed_time": "0:23:14", "remaining_time": "0:43:00", "throughput": 12392.98, "total_tokens": 17278336} +{"current_steps": 5485, "total_steps": 15621, "loss": 0.3617, "lr": 1.6400444545484524e-06, "epoch": 0.3511298892516484, "percentage": 35.11, "elapsed_time": "0:23:14", "remaining_time": "0:42:57", "throughput": 12397.92, "total_tokens": 17293248} +{"current_steps": 5490, "total_steps": 15621, "loss": 0.428, "lr": 1.6391855388020193e-06, "epoch": 0.35144997119262533, "percentage": 35.14, "elapsed_time": "0:23:15", "remaining_time": "0:42:55", "throughput": 12403.27, "total_tokens": 17309184} +{"current_steps": 5495, "total_steps": 15621, "loss": 0.4654, "lr": 1.6383258250249363e-06, "epoch": 0.3517700531336022, "percentage": 35.18, "elapsed_time": "0:23:16", "remaining_time": "0:42:52", "throughput": 12408.68, "total_tokens": 17325248} +{"current_steps": 5500, "total_steps": 15621, "loss": 0.4297, "lr": 1.6374653142905661e-06, "epoch": 0.3520901350745791, "percentage": 35.21, "elapsed_time": "0:23:16", "remaining_time": "0:42:50", "throughput": 12413.86, "total_tokens": 17340736} +{"current_steps": 5505, "total_steps": 15621, "loss": 0.4224, "lr": 1.6366040076732662e-06, "epoch": 0.35241021701555597, "percentage": 35.24, "elapsed_time": "0:23:17", "remaining_time": "0:42:48", "throughput": 12418.93, "total_tokens": 17355904} +{"current_steps": 5510, "total_steps": 15621, "loss": 0.4675, "lr": 1.6357419062483882e-06, "epoch": 0.35273029895653285, "percentage": 35.27, "elapsed_time": "0:23:18", "remaining_time": "0:42:45", "throughput": 12424.03, "total_tokens": 17371264} +{"current_steps": 5515, "total_steps": 15621, "loss": 0.4268, "lr": 1.6348790110922758e-06, "epoch": 0.3530503808975098, "percentage": 35.31, "elapsed_time": "0:23:18", "remaining_time": "0:42:43", "throughput": 12430.02, "total_tokens": 17388608} +{"current_steps": 5520, "total_steps": 15621, "loss": 0.4558, "lr": 1.6340153232822635e-06, "epoch": 0.35337046283848667, "percentage": 35.34, "elapsed_time": "0:23:19", "remaining_time": "0:42:41", "throughput": 12435.06, "total_tokens": 17403712} +{"current_steps": 5525, "total_steps": 15621, "loss": 0.5137, "lr": 1.633150843896676e-06, "epoch": 0.35369054477946354, "percentage": 35.37, "elapsed_time": "0:23:20", "remaining_time": "0:42:38", "throughput": 12441.1, "total_tokens": 17421056} +{"current_steps": 5530, "total_steps": 15621, "loss": 0.5658, "lr": 1.6322855740148263e-06, "epoch": 0.3540106267204404, "percentage": 35.4, "elapsed_time": "0:23:20", "remaining_time": "0:42:36", "throughput": 12446.04, "total_tokens": 17436096} +{"current_steps": 5535, "total_steps": 15621, "loss": 0.3768, "lr": 1.6314195147170132e-06, "epoch": 0.3543307086614173, "percentage": 35.43, "elapsed_time": "0:23:21", "remaining_time": "0:42:34", "throughput": 12451.54, "total_tokens": 17452480} +{"current_steps": 5540, "total_steps": 15621, "loss": 0.4032, "lr": 1.6305526670845225e-06, "epoch": 0.35465079060239424, "percentage": 35.47, "elapsed_time": "0:23:22", "remaining_time": "0:42:31", "throughput": 12456.59, "total_tokens": 17467776} +{"current_steps": 5545, "total_steps": 15621, "loss": 0.4877, "lr": 1.6296850321996232e-06, "epoch": 0.3549708725433711, "percentage": 35.5, "elapsed_time": "0:23:22", "remaining_time": "0:42:29", "throughput": 12461.51, "total_tokens": 17482752} +{"current_steps": 5550, "total_steps": 15621, "loss": 0.3843, "lr": 1.6288166111455683e-06, "epoch": 0.355290954484348, "percentage": 35.53, "elapsed_time": "0:23:23", "remaining_time": "0:42:26", "throughput": 12466.46, "total_tokens": 17497792} +{"current_steps": 5555, "total_steps": 15621, "loss": 0.4878, "lr": 1.6279474050065906e-06, "epoch": 0.3556110364253249, "percentage": 35.56, "elapsed_time": "0:23:24", "remaining_time": "0:42:24", "throughput": 12471.48, "total_tokens": 17513024} +{"current_steps": 5560, "total_steps": 15621, "loss": 0.4049, "lr": 1.6270774148679054e-06, "epoch": 0.35593111836630176, "percentage": 35.59, "elapsed_time": "0:23:24", "remaining_time": "0:42:22", "throughput": 12476.73, "total_tokens": 17529024} +{"current_steps": 5565, "total_steps": 15621, "loss": 0.3788, "lr": 1.6262066418157048e-06, "epoch": 0.35625120030727864, "percentage": 35.63, "elapsed_time": "0:23:25", "remaining_time": "0:42:19", "throughput": 12481.58, "total_tokens": 17543936} +{"current_steps": 5570, "total_steps": 15621, "loss": 0.5444, "lr": 1.6253350869371595e-06, "epoch": 0.35657128224825557, "percentage": 35.66, "elapsed_time": "0:23:26", "remaining_time": "0:42:17", "throughput": 12486.56, "total_tokens": 17559168} +{"current_steps": 5575, "total_steps": 15621, "loss": 0.3861, "lr": 1.6244627513204158e-06, "epoch": 0.35689136418923245, "percentage": 35.69, "elapsed_time": "0:23:26", "remaining_time": "0:42:15", "throughput": 12491.78, "total_tokens": 17574912} +{"current_steps": 5580, "total_steps": 15621, "loss": 0.4319, "lr": 1.6235896360545954e-06, "epoch": 0.35721144613020933, "percentage": 35.72, "elapsed_time": "0:23:27", "remaining_time": "0:42:12", "throughput": 12496.82, "total_tokens": 17590272} +{"current_steps": 5585, "total_steps": 15621, "loss": 0.4466, "lr": 1.622715742229792e-06, "epoch": 0.3575315280711862, "percentage": 35.75, "elapsed_time": "0:23:28", "remaining_time": "0:42:10", "throughput": 12501.99, "total_tokens": 17605952} +{"current_steps": 5590, "total_steps": 15621, "loss": 0.3861, "lr": 1.6218410709370734e-06, "epoch": 0.3578516100121631, "percentage": 35.79, "elapsed_time": "0:23:28", "remaining_time": "0:42:08", "throughput": 12506.94, "total_tokens": 17621120} +{"current_steps": 5595, "total_steps": 15621, "loss": 0.5462, "lr": 1.6209656232684768e-06, "epoch": 0.35817169195314, "percentage": 35.82, "elapsed_time": "0:23:29", "remaining_time": "0:42:05", "throughput": 12511.81, "total_tokens": 17636096} +{"current_steps": 5600, "total_steps": 15621, "loss": 0.4566, "lr": 1.620089400317008e-06, "epoch": 0.3584917738941169, "percentage": 35.85, "elapsed_time": "0:23:30", "remaining_time": "0:42:03", "throughput": 12517.4, "total_tokens": 17652672} +{"current_steps": 5605, "total_steps": 15621, "loss": 0.4979, "lr": 1.6192124031766425e-06, "epoch": 0.3588118558350938, "percentage": 35.88, "elapsed_time": "0:23:30", "remaining_time": "0:42:01", "throughput": 12522.37, "total_tokens": 17668032} +{"current_steps": 5610, "total_steps": 15621, "loss": 0.4507, "lr": 1.6183346329423213e-06, "epoch": 0.35913193777607066, "percentage": 35.91, "elapsed_time": "0:23:31", "remaining_time": "0:41:58", "throughput": 12527.32, "total_tokens": 17683264} +{"current_steps": 5615, "total_steps": 15621, "loss": 0.3672, "lr": 1.6174560907099508e-06, "epoch": 0.35945201971704754, "percentage": 35.95, "elapsed_time": "0:23:32", "remaining_time": "0:41:56", "throughput": 12532.59, "total_tokens": 17699200} +{"current_steps": 5620, "total_steps": 15621, "loss": 0.3538, "lr": 1.6165767775764013e-06, "epoch": 0.3597721016580245, "percentage": 35.98, "elapsed_time": "0:23:32", "remaining_time": "0:41:54", "throughput": 12537.69, "total_tokens": 17714816} +{"current_steps": 5625, "total_steps": 15621, "loss": 0.4157, "lr": 1.6156966946395056e-06, "epoch": 0.36009218359900136, "percentage": 36.01, "elapsed_time": "0:23:33", "remaining_time": "0:41:52", "throughput": 12543.66, "total_tokens": 17732352} +{"current_steps": 5630, "total_steps": 15621, "loss": 0.536, "lr": 1.6148158429980577e-06, "epoch": 0.36041226553997824, "percentage": 36.04, "elapsed_time": "0:23:34", "remaining_time": "0:41:49", "throughput": 12548.9, "total_tokens": 17748288} +{"current_steps": 5635, "total_steps": 15621, "loss": 0.3758, "lr": 1.6139342237518108e-06, "epoch": 0.3607323474809551, "percentage": 36.07, "elapsed_time": "0:23:34", "remaining_time": "0:41:47", "throughput": 12553.75, "total_tokens": 17763520} +{"current_steps": 5640, "total_steps": 15621, "loss": 0.4256, "lr": 1.6130518380014773e-06, "epoch": 0.361052429421932, "percentage": 36.11, "elapsed_time": "0:23:35", "remaining_time": "0:41:45", "throughput": 12558.89, "total_tokens": 17779328} +{"current_steps": 5645, "total_steps": 15621, "loss": 0.4313, "lr": 1.6121686868487259e-06, "epoch": 0.3613725113629089, "percentage": 36.14, "elapsed_time": "0:23:36", "remaining_time": "0:41:43", "throughput": 12564.22, "total_tokens": 17795584} +{"current_steps": 5650, "total_steps": 15621, "loss": 0.4449, "lr": 1.6112847713961815e-06, "epoch": 0.3616925933038858, "percentage": 36.17, "elapsed_time": "0:23:37", "remaining_time": "0:41:40", "throughput": 12568.87, "total_tokens": 17810368} +{"current_steps": 5655, "total_steps": 15621, "loss": 0.4365, "lr": 1.610400092747423e-06, "epoch": 0.3620126752448627, "percentage": 36.2, "elapsed_time": "0:23:37", "remaining_time": "0:41:38", "throughput": 12574.09, "total_tokens": 17826496} +{"current_steps": 5660, "total_steps": 15621, "loss": 0.4266, "lr": 1.609514652006981e-06, "epoch": 0.36233275718583957, "percentage": 36.23, "elapsed_time": "0:23:38", "remaining_time": "0:41:36", "throughput": 12578.8, "total_tokens": 17841344} +{"current_steps": 5665, "total_steps": 15621, "loss": 0.5632, "lr": 1.60862845028034e-06, "epoch": 0.36265283912681645, "percentage": 36.27, "elapsed_time": "0:23:39", "remaining_time": "0:41:33", "throughput": 12583.98, "total_tokens": 17857408} +{"current_steps": 5670, "total_steps": 15621, "loss": 0.4209, "lr": 1.6077414886739327e-06, "epoch": 0.36297292106779333, "percentage": 36.3, "elapsed_time": "0:23:39", "remaining_time": "0:41:31", "throughput": 12589.1, "total_tokens": 17873280} +{"current_steps": 5675, "total_steps": 15621, "loss": 0.5023, "lr": 1.6068537682951412e-06, "epoch": 0.36329300300877027, "percentage": 36.33, "elapsed_time": "0:23:40", "remaining_time": "0:41:29", "throughput": 12593.96, "total_tokens": 17888448} +{"current_steps": 5680, "total_steps": 15621, "loss": 0.4459, "lr": 1.6059652902522947e-06, "epoch": 0.36361308494974715, "percentage": 36.36, "elapsed_time": "0:23:41", "remaining_time": "0:41:27", "throughput": 12599.05, "total_tokens": 17904320} +{"current_steps": 5685, "total_steps": 15621, "loss": 0.3725, "lr": 1.6050760556546683e-06, "epoch": 0.363933166890724, "percentage": 36.39, "elapsed_time": "0:23:41", "remaining_time": "0:41:24", "throughput": 12603.98, "total_tokens": 17919744} +{"current_steps": 5690, "total_steps": 15621, "loss": 0.3823, "lr": 1.6041860656124823e-06, "epoch": 0.3642532488317009, "percentage": 36.43, "elapsed_time": "0:23:42", "remaining_time": "0:41:22", "throughput": 12608.74, "total_tokens": 17934656} +{"current_steps": 5695, "total_steps": 15621, "loss": 0.5608, "lr": 1.6032953212368993e-06, "epoch": 0.3645733307726778, "percentage": 36.46, "elapsed_time": "0:23:43", "remaining_time": "0:41:20", "throughput": 12614.03, "total_tokens": 17950976} +{"current_steps": 5700, "total_steps": 15621, "loss": 0.465, "lr": 1.6024038236400243e-06, "epoch": 0.3648934127136547, "percentage": 36.49, "elapsed_time": "0:23:43", "remaining_time": "0:41:18", "throughput": 12618.96, "total_tokens": 17966400} +{"current_steps": 5705, "total_steps": 15621, "loss": 0.5704, "lr": 1.6015115739349027e-06, "epoch": 0.3652134946546316, "percentage": 36.52, "elapsed_time": "0:23:44", "remaining_time": "0:41:15", "throughput": 12624.71, "total_tokens": 17983872} +{"current_steps": 5710, "total_steps": 15621, "loss": 0.5358, "lr": 1.6006185732355183e-06, "epoch": 0.3655335765956085, "percentage": 36.55, "elapsed_time": "0:23:45", "remaining_time": "0:41:13", "throughput": 12629.84, "total_tokens": 17999680} +{"current_steps": 5715, "total_steps": 15621, "loss": 0.3807, "lr": 1.5997248226567931e-06, "epoch": 0.36585365853658536, "percentage": 36.59, "elapsed_time": "0:23:45", "remaining_time": "0:41:11", "throughput": 12634.61, "total_tokens": 18014784} +{"current_steps": 5720, "total_steps": 15621, "loss": 0.5063, "lr": 1.5988303233145853e-06, "epoch": 0.36617374047756224, "percentage": 36.62, "elapsed_time": "0:23:46", "remaining_time": "0:41:09", "throughput": 12639.38, "total_tokens": 18029888} +{"current_steps": 5725, "total_steps": 15621, "loss": 0.3721, "lr": 1.597935076325688e-06, "epoch": 0.3664938224185392, "percentage": 36.65, "elapsed_time": "0:23:47", "remaining_time": "0:41:06", "throughput": 12644.41, "total_tokens": 18045632} +{"current_steps": 5730, "total_steps": 15621, "loss": 0.5996, "lr": 1.5970390828078272e-06, "epoch": 0.36681390435951605, "percentage": 36.68, "elapsed_time": "0:23:47", "remaining_time": "0:41:04", "throughput": 12649.23, "total_tokens": 18060928} +{"current_steps": 5735, "total_steps": 15621, "loss": 0.4616, "lr": 1.5961423438796615e-06, "epoch": 0.36713398630049293, "percentage": 36.71, "elapsed_time": "0:23:48", "remaining_time": "0:41:02", "throughput": 12654.1, "total_tokens": 18076352} +{"current_steps": 5740, "total_steps": 15621, "loss": 0.45, "lr": 1.59524486066078e-06, "epoch": 0.3674540682414698, "percentage": 36.75, "elapsed_time": "0:23:49", "remaining_time": "0:41:00", "throughput": 12659.13, "total_tokens": 18092096} +{"current_steps": 5745, "total_steps": 15621, "loss": 0.5875, "lr": 1.5943466342717012e-06, "epoch": 0.3677741501824467, "percentage": 36.78, "elapsed_time": "0:23:49", "remaining_time": "0:40:57", "throughput": 12664.07, "total_tokens": 18107648} +{"current_steps": 5750, "total_steps": 15621, "loss": 0.4526, "lr": 1.5934476658338708e-06, "epoch": 0.36809423212342357, "percentage": 36.81, "elapsed_time": "0:23:50", "remaining_time": "0:40:55", "throughput": 12668.99, "total_tokens": 18123264} +{"current_steps": 5755, "total_steps": 15621, "loss": 0.5482, "lr": 1.5925479564696619e-06, "epoch": 0.3684143140644005, "percentage": 36.84, "elapsed_time": "0:23:51", "remaining_time": "0:40:53", "throughput": 12673.76, "total_tokens": 18138368} +{"current_steps": 5760, "total_steps": 15621, "loss": 0.3433, "lr": 1.5916475073023721e-06, "epoch": 0.3687343960053774, "percentage": 36.87, "elapsed_time": "0:23:51", "remaining_time": "0:40:51", "throughput": 12678.9, "total_tokens": 18154432} +{"current_steps": 5765, "total_steps": 15621, "loss": 0.3385, "lr": 1.5907463194562226e-06, "epoch": 0.36905447794635426, "percentage": 36.91, "elapsed_time": "0:23:52", "remaining_time": "0:40:49", "throughput": 12684.32, "total_tokens": 18171200} +{"current_steps": 5770, "total_steps": 15621, "loss": 0.3763, "lr": 1.589844394056357e-06, "epoch": 0.36937455988733114, "percentage": 36.94, "elapsed_time": "0:23:53", "remaining_time": "0:40:46", "throughput": 12689.45, "total_tokens": 18187008} +{"current_steps": 5775, "total_steps": 15621, "loss": 0.3462, "lr": 1.5889417322288403e-06, "epoch": 0.369694641828308, "percentage": 36.97, "elapsed_time": "0:23:53", "remaining_time": "0:40:44", "throughput": 12694.51, "total_tokens": 18202944} +{"current_steps": 5780, "total_steps": 15621, "loss": 0.4963, "lr": 1.5880383351006556e-06, "epoch": 0.37001472376928496, "percentage": 37.0, "elapsed_time": "0:23:54", "remaining_time": "0:40:42", "throughput": 12699.19, "total_tokens": 18217984} +{"current_steps": 5785, "total_steps": 15621, "loss": 0.5257, "lr": 1.5871342037997055e-06, "epoch": 0.37033480571026184, "percentage": 37.03, "elapsed_time": "0:23:55", "remaining_time": "0:40:40", "throughput": 12704.24, "total_tokens": 18233984} +{"current_steps": 5790, "total_steps": 15621, "loss": 0.416, "lr": 1.5862293394548082e-06, "epoch": 0.3706548876512387, "percentage": 37.07, "elapsed_time": "0:23:55", "remaining_time": "0:40:38", "throughput": 12708.89, "total_tokens": 18249024} +{"current_steps": 5795, "total_steps": 15621, "loss": 0.3512, "lr": 1.5853237431956972e-06, "epoch": 0.3709749695922156, "percentage": 37.1, "elapsed_time": "0:23:56", "remaining_time": "0:40:35", "throughput": 12713.6, "total_tokens": 18264256} +{"current_steps": 5800, "total_steps": 15621, "loss": 0.554, "lr": 1.5844174161530206e-06, "epoch": 0.3712950515331925, "percentage": 37.13, "elapsed_time": "0:23:57", "remaining_time": "0:40:33", "throughput": 12718.49, "total_tokens": 18279936} +{"current_steps": 5805, "total_steps": 15621, "loss": 0.4147, "lr": 1.5835103594583382e-06, "epoch": 0.3716151334741694, "percentage": 37.16, "elapsed_time": "0:23:57", "remaining_time": "0:40:31", "throughput": 12723.46, "total_tokens": 18295488} +{"current_steps": 5810, "total_steps": 15621, "loss": 0.5357, "lr": 1.5826025742441207e-06, "epoch": 0.3719352154151463, "percentage": 37.19, "elapsed_time": "0:23:58", "remaining_time": "0:40:29", "throughput": 12728.51, "total_tokens": 18311360} +{"current_steps": 5815, "total_steps": 15621, "loss": 0.4282, "lr": 1.5816940616437486e-06, "epoch": 0.37225529735612317, "percentage": 37.23, "elapsed_time": "0:23:59", "remaining_time": "0:40:27", "throughput": 12733.29, "total_tokens": 18326592} +{"current_steps": 5820, "total_steps": 15621, "loss": 0.3564, "lr": 1.5807848227915108e-06, "epoch": 0.37257537929710005, "percentage": 37.26, "elapsed_time": "0:23:59", "remaining_time": "0:40:24", "throughput": 12738.94, "total_tokens": 18344000} +{"current_steps": 5825, "total_steps": 15621, "loss": 0.4888, "lr": 1.5798748588226028e-06, "epoch": 0.37289546123807693, "percentage": 37.29, "elapsed_time": "0:24:00", "remaining_time": "0:40:22", "throughput": 12744.01, "total_tokens": 18359872} +{"current_steps": 5830, "total_steps": 15621, "loss": 0.472, "lr": 1.578964170873125e-06, "epoch": 0.3732155431790538, "percentage": 37.32, "elapsed_time": "0:24:01", "remaining_time": "0:40:20", "throughput": 12748.43, "total_tokens": 18374400} +{"current_steps": 5835, "total_steps": 15621, "loss": 0.2731, "lr": 1.5780527600800816e-06, "epoch": 0.37353562512003075, "percentage": 37.35, "elapsed_time": "0:24:01", "remaining_time": "0:40:18", "throughput": 12753.61, "total_tokens": 18390656} +{"current_steps": 5840, "total_steps": 15621, "loss": 0.4561, "lr": 1.5771406275813808e-06, "epoch": 0.3738557070610076, "percentage": 37.39, "elapsed_time": "0:24:02", "remaining_time": "0:40:16", "throughput": 12758.5, "total_tokens": 18406400} +{"current_steps": 5845, "total_steps": 15621, "loss": 0.5531, "lr": 1.5762277745158297e-06, "epoch": 0.3741757890019845, "percentage": 37.42, "elapsed_time": "0:24:03", "remaining_time": "0:40:14", "throughput": 12763.74, "total_tokens": 18422848} +{"current_steps": 5850, "total_steps": 15621, "loss": 0.5008, "lr": 1.5753142020231365e-06, "epoch": 0.3744958709429614, "percentage": 37.45, "elapsed_time": "0:24:04", "remaining_time": "0:40:11", "throughput": 12768.83, "total_tokens": 18438912} +{"current_steps": 5855, "total_steps": 15621, "loss": 0.5494, "lr": 1.5743999112439073e-06, "epoch": 0.37481595288393826, "percentage": 37.48, "elapsed_time": "0:24:04", "remaining_time": "0:40:09", "throughput": 12774.12, "total_tokens": 18455488} +{"current_steps": 5860, "total_steps": 15621, "loss": 0.4015, "lr": 1.5734849033196446e-06, "epoch": 0.3751360348249152, "percentage": 37.51, "elapsed_time": "0:24:05", "remaining_time": "0:40:07", "throughput": 12778.51, "total_tokens": 18470080} +{"current_steps": 5865, "total_steps": 15621, "loss": 0.4426, "lr": 1.5725691793927468e-06, "epoch": 0.3754561167658921, "percentage": 37.55, "elapsed_time": "0:24:06", "remaining_time": "0:40:05", "throughput": 12782.85, "total_tokens": 18484480} +{"current_steps": 5870, "total_steps": 15621, "loss": 0.4731, "lr": 1.5716527406065057e-06, "epoch": 0.37577619870686896, "percentage": 37.58, "elapsed_time": "0:24:06", "remaining_time": "0:40:03", "throughput": 12788.31, "total_tokens": 18501312} +{"current_steps": 5875, "total_steps": 15621, "loss": 0.4582, "lr": 1.570735588105106e-06, "epoch": 0.37609628064784584, "percentage": 37.61, "elapsed_time": "0:24:07", "remaining_time": "0:40:01", "throughput": 12792.79, "total_tokens": 18515968} +{"current_steps": 5880, "total_steps": 15621, "loss": 0.3808, "lr": 1.5698177230336234e-06, "epoch": 0.3764163625888227, "percentage": 37.64, "elapsed_time": "0:24:08", "remaining_time": "0:39:58", "throughput": 12797.45, "total_tokens": 18531200} +{"current_steps": 5885, "total_steps": 15621, "loss": 0.2686, "lr": 1.568899146538023e-06, "epoch": 0.37673644452979965, "percentage": 37.67, "elapsed_time": "0:24:08", "remaining_time": "0:39:56", "throughput": 12802.68, "total_tokens": 18547712} +{"current_steps": 5890, "total_steps": 15621, "loss": 0.4112, "lr": 1.5679798597651587e-06, "epoch": 0.37705652647077653, "percentage": 37.71, "elapsed_time": "0:24:09", "remaining_time": "0:39:54", "throughput": 12807.27, "total_tokens": 18562752} +{"current_steps": 5895, "total_steps": 15621, "loss": 0.4375, "lr": 1.5670598638627706e-06, "epoch": 0.3773766084117534, "percentage": 37.74, "elapsed_time": "0:24:10", "remaining_time": "0:39:52", "throughput": 12812.07, "total_tokens": 18578368} +{"current_steps": 5900, "total_steps": 15621, "loss": 0.3833, "lr": 1.5661391599794847e-06, "epoch": 0.3776966903527303, "percentage": 37.77, "elapsed_time": "0:24:10", "remaining_time": "0:39:50", "throughput": 12814.47, "total_tokens": 18593408} +{"current_steps": 5905, "total_steps": 15621, "loss": 0.4148, "lr": 1.56521774926481e-06, "epoch": 0.37801677229370717, "percentage": 37.8, "elapsed_time": "0:24:11", "remaining_time": "0:39:48", "throughput": 12818.86, "total_tokens": 18607872} +{"current_steps": 5910, "total_steps": 15621, "loss": 0.359, "lr": 1.5642956328691393e-06, "epoch": 0.3783368542346841, "percentage": 37.83, "elapsed_time": "0:24:12", "remaining_time": "0:39:46", "throughput": 12823.92, "total_tokens": 18624000} +{"current_steps": 5915, "total_steps": 15621, "loss": 0.5591, "lr": 1.5633728119437451e-06, "epoch": 0.378656936175661, "percentage": 37.87, "elapsed_time": "0:24:12", "remaining_time": "0:39:44", "throughput": 12829.29, "total_tokens": 18640704} +{"current_steps": 5920, "total_steps": 15621, "loss": 0.472, "lr": 1.5624492876407807e-06, "epoch": 0.37897701811663786, "percentage": 37.9, "elapsed_time": "0:24:13", "remaining_time": "0:39:42", "throughput": 12835.02, "total_tokens": 18658368} +{"current_steps": 5925, "total_steps": 15621, "loss": 0.411, "lr": 1.5615250611132766e-06, "epoch": 0.37929710005761474, "percentage": 37.93, "elapsed_time": "0:24:14", "remaining_time": "0:39:40", "throughput": 12840.55, "total_tokens": 18675584} +{"current_steps": 5930, "total_steps": 15621, "loss": 0.5683, "lr": 1.5606001335151405e-06, "epoch": 0.3796171819985916, "percentage": 37.96, "elapsed_time": "0:24:15", "remaining_time": "0:39:37", "throughput": 12845.66, "total_tokens": 18691904} +{"current_steps": 5935, "total_steps": 15621, "loss": 0.3734, "lr": 1.5596745060011561e-06, "epoch": 0.3799372639395685, "percentage": 37.99, "elapsed_time": "0:24:15", "remaining_time": "0:39:35", "throughput": 12851.0, "total_tokens": 18708736} +{"current_steps": 5940, "total_steps": 15621, "loss": 0.3492, "lr": 1.5587481797269793e-06, "epoch": 0.38025734588054544, "percentage": 38.03, "elapsed_time": "0:24:16", "remaining_time": "0:39:33", "throughput": 12855.63, "total_tokens": 18724032} +{"current_steps": 5945, "total_steps": 15621, "loss": 0.4266, "lr": 1.5578211558491396e-06, "epoch": 0.3805774278215223, "percentage": 38.06, "elapsed_time": "0:24:17", "remaining_time": "0:39:31", "throughput": 12860.71, "total_tokens": 18740352} +{"current_steps": 5950, "total_steps": 15621, "loss": 0.3346, "lr": 1.5568934355250375e-06, "epoch": 0.3808975097624992, "percentage": 38.09, "elapsed_time": "0:24:17", "remaining_time": "0:39:29", "throughput": 12864.92, "total_tokens": 18754560} +{"current_steps": 5955, "total_steps": 15621, "loss": 0.6693, "lr": 1.5559650199129423e-06, "epoch": 0.3812175917034761, "percentage": 38.12, "elapsed_time": "0:24:18", "remaining_time": "0:39:27", "throughput": 12869.39, "total_tokens": 18769280} +{"current_steps": 5960, "total_steps": 15621, "loss": 0.4131, "lr": 1.5550359101719921e-06, "epoch": 0.38153767364445296, "percentage": 38.15, "elapsed_time": "0:24:19", "remaining_time": "0:39:25", "throughput": 12874.03, "total_tokens": 18784512} +{"current_steps": 5965, "total_steps": 15621, "loss": 0.3615, "lr": 1.554106107462191e-06, "epoch": 0.3818577555854299, "percentage": 38.19, "elapsed_time": "0:24:19", "remaining_time": "0:39:23", "throughput": 12878.91, "total_tokens": 18800384} +{"current_steps": 5970, "total_steps": 15621, "loss": 0.4262, "lr": 1.5531756129444092e-06, "epoch": 0.38217783752640677, "percentage": 38.22, "elapsed_time": "0:24:20", "remaining_time": "0:39:20", "throughput": 12883.55, "total_tokens": 18815552} +{"current_steps": 5975, "total_steps": 15621, "loss": 0.4191, "lr": 1.5522444277803796e-06, "epoch": 0.38249791946738365, "percentage": 38.25, "elapsed_time": "0:24:21", "remaining_time": "0:39:18", "throughput": 12887.9, "total_tokens": 18830080} +{"current_steps": 5980, "total_steps": 15621, "loss": 0.4244, "lr": 1.5513125531326976e-06, "epoch": 0.38281800140836053, "percentage": 38.28, "elapsed_time": "0:24:21", "remaining_time": "0:39:16", "throughput": 12892.91, "total_tokens": 18846272} +{"current_steps": 5985, "total_steps": 15621, "loss": 0.3802, "lr": 1.5503799901648198e-06, "epoch": 0.3831380833493374, "percentage": 38.31, "elapsed_time": "0:24:22", "remaining_time": "0:39:14", "throughput": 12897.27, "total_tokens": 18860928} +{"current_steps": 5990, "total_steps": 15621, "loss": 0.4461, "lr": 1.5494467400410625e-06, "epoch": 0.38345816529031435, "percentage": 38.35, "elapsed_time": "0:24:23", "remaining_time": "0:39:12", "throughput": 12902.2, "total_tokens": 18877120} +{"current_steps": 5995, "total_steps": 15621, "loss": 0.6047, "lr": 1.5485128039265986e-06, "epoch": 0.3837782472312912, "percentage": 38.38, "elapsed_time": "0:24:23", "remaining_time": "0:39:10", "throughput": 12906.78, "total_tokens": 18892224} +{"current_steps": 6000, "total_steps": 15621, "loss": 0.445, "lr": 1.547578182987459e-06, "epoch": 0.3840983291722681, "percentage": 38.41, "elapsed_time": "0:24:24", "remaining_time": "0:39:08", "throughput": 12911.2, "total_tokens": 18907008} +{"current_steps": 6005, "total_steps": 15621, "loss": 0.2856, "lr": 1.5466428783905286e-06, "epoch": 0.384418411113245, "percentage": 38.44, "elapsed_time": "0:24:25", "remaining_time": "0:39:06", "throughput": 12915.85, "total_tokens": 18922368} +{"current_steps": 6010, "total_steps": 15621, "loss": 0.4418, "lr": 1.5457068913035463e-06, "epoch": 0.38473849305422186, "percentage": 38.47, "elapsed_time": "0:24:25", "remaining_time": "0:39:03", "throughput": 12920.41, "total_tokens": 18937536} +{"current_steps": 6015, "total_steps": 15621, "loss": 0.5024, "lr": 1.544770222895103e-06, "epoch": 0.38505857499519874, "percentage": 38.51, "elapsed_time": "0:24:26", "remaining_time": "0:39:01", "throughput": 12925.54, "total_tokens": 18954048} +{"current_steps": 6020, "total_steps": 15621, "loss": 0.5102, "lr": 1.5438328743346398e-06, "epoch": 0.3853786569361757, "percentage": 38.54, "elapsed_time": "0:24:27", "remaining_time": "0:38:59", "throughput": 12930.18, "total_tokens": 18969472} +{"current_steps": 6025, "total_steps": 15621, "loss": 0.4192, "lr": 1.5428948467924478e-06, "epoch": 0.38569873887715256, "percentage": 38.57, "elapsed_time": "0:24:27", "remaining_time": "0:38:57", "throughput": 12934.44, "total_tokens": 18983872} +{"current_steps": 6030, "total_steps": 15621, "loss": 0.3268, "lr": 1.5419561414396656e-06, "epoch": 0.38601882081812944, "percentage": 38.6, "elapsed_time": "0:24:28", "remaining_time": "0:38:55", "throughput": 12939.08, "total_tokens": 18999360} +{"current_steps": 6035, "total_steps": 15621, "loss": 0.4969, "lr": 1.541016759448277e-06, "epoch": 0.3863389027591063, "percentage": 38.63, "elapsed_time": "0:24:29", "remaining_time": "0:38:53", "throughput": 12943.96, "total_tokens": 19015424} +{"current_steps": 6040, "total_steps": 15621, "loss": 0.3775, "lr": 1.5400767019911124e-06, "epoch": 0.3866589847000832, "percentage": 38.67, "elapsed_time": "0:24:29", "remaining_time": "0:38:51", "throughput": 12948.89, "total_tokens": 19031616} +{"current_steps": 6045, "total_steps": 15621, "loss": 0.4886, "lr": 1.539135970241844e-06, "epoch": 0.38697906664106013, "percentage": 38.7, "elapsed_time": "0:24:30", "remaining_time": "0:38:49", "throughput": 12953.54, "total_tokens": 19047040} +{"current_steps": 6050, "total_steps": 15621, "loss": 0.4842, "lr": 1.5381945653749866e-06, "epoch": 0.387299148582037, "percentage": 38.73, "elapsed_time": "0:24:31", "remaining_time": "0:38:47", "throughput": 12958.34, "total_tokens": 19062848} +{"current_steps": 6055, "total_steps": 15621, "loss": 0.5516, "lr": 1.5372524885658952e-06, "epoch": 0.3876192305230139, "percentage": 38.76, "elapsed_time": "0:24:31", "remaining_time": "0:38:45", "throughput": 12963.32, "total_tokens": 19078976} +{"current_steps": 6060, "total_steps": 15621, "loss": 0.3732, "lr": 1.5363097409907638e-06, "epoch": 0.38793931246399077, "percentage": 38.79, "elapsed_time": "0:24:32", "remaining_time": "0:38:43", "throughput": 12967.64, "total_tokens": 19093632} +{"current_steps": 6065, "total_steps": 15621, "loss": 0.3583, "lr": 1.535366323826624e-06, "epoch": 0.38825939440496765, "percentage": 38.83, "elapsed_time": "0:24:33", "remaining_time": "0:38:40", "throughput": 12972.25, "total_tokens": 19109056} +{"current_steps": 6070, "total_steps": 15621, "loss": 0.3623, "lr": 1.534422238251343e-06, "epoch": 0.3885794763459446, "percentage": 38.86, "elapsed_time": "0:24:33", "remaining_time": "0:38:38", "throughput": 12976.87, "total_tokens": 19124544} +{"current_steps": 6075, "total_steps": 15621, "loss": 0.3844, "lr": 1.5334774854436223e-06, "epoch": 0.38889955828692147, "percentage": 38.89, "elapsed_time": "0:24:34", "remaining_time": "0:38:36", "throughput": 12981.69, "total_tokens": 19140480} +{"current_steps": 6080, "total_steps": 15621, "loss": 0.378, "lr": 1.5325320665829975e-06, "epoch": 0.38921964022789834, "percentage": 38.92, "elapsed_time": "0:24:35", "remaining_time": "0:38:34", "throughput": 12986.61, "total_tokens": 19156736} +{"current_steps": 6085, "total_steps": 15621, "loss": 0.4624, "lr": 1.5315859828498352e-06, "epoch": 0.3895397221688752, "percentage": 38.95, "elapsed_time": "0:24:35", "remaining_time": "0:38:32", "throughput": 12990.95, "total_tokens": 19171520} +{"current_steps": 6090, "total_steps": 15621, "loss": 0.5057, "lr": 1.5306392354253316e-06, "epoch": 0.3898598041098521, "percentage": 38.99, "elapsed_time": "0:24:36", "remaining_time": "0:38:30", "throughput": 12995.65, "total_tokens": 19187136} +{"current_steps": 6095, "total_steps": 15621, "loss": 0.4389, "lr": 1.5296918254915123e-06, "epoch": 0.39017988605082904, "percentage": 39.02, "elapsed_time": "0:24:37", "remaining_time": "0:38:28", "throughput": 12999.97, "total_tokens": 19201856} +{"current_steps": 6100, "total_steps": 15621, "loss": 0.3827, "lr": 1.5287437542312296e-06, "epoch": 0.3904999679918059, "percentage": 39.05, "elapsed_time": "0:24:37", "remaining_time": "0:38:26", "throughput": 13004.29, "total_tokens": 19216704} +{"current_steps": 6105, "total_steps": 15621, "loss": 0.5423, "lr": 1.5277950228281614e-06, "epoch": 0.3908200499327828, "percentage": 39.08, "elapsed_time": "0:24:38", "remaining_time": "0:38:24", "throughput": 13009.49, "total_tokens": 19233408} +{"current_steps": 6110, "total_steps": 15621, "loss": 0.3617, "lr": 1.52684563246681e-06, "epoch": 0.3911401318737597, "percentage": 39.11, "elapsed_time": "0:24:39", "remaining_time": "0:38:22", "throughput": 13014.62, "total_tokens": 19250048} +{"current_steps": 6115, "total_steps": 15621, "loss": 0.4241, "lr": 1.5258955843325015e-06, "epoch": 0.39146021381473656, "percentage": 39.15, "elapsed_time": "0:24:39", "remaining_time": "0:38:20", "throughput": 13019.68, "total_tokens": 19266560} +{"current_steps": 6120, "total_steps": 15621, "loss": 0.5018, "lr": 1.5249448796113804e-06, "epoch": 0.39178029575571344, "percentage": 39.18, "elapsed_time": "0:24:40", "remaining_time": "0:38:18", "throughput": 13024.02, "total_tokens": 19281408} +{"current_steps": 6125, "total_steps": 15621, "loss": 0.4797, "lr": 1.5239935194904141e-06, "epoch": 0.39210037769669037, "percentage": 39.21, "elapsed_time": "0:24:41", "remaining_time": "0:38:16", "throughput": 13028.4, "total_tokens": 19296384} +{"current_steps": 6130, "total_steps": 15621, "loss": 0.3946, "lr": 1.523041505157386e-06, "epoch": 0.39242045963766725, "percentage": 39.24, "elapsed_time": "0:24:41", "remaining_time": "0:38:14", "throughput": 13033.04, "total_tokens": 19312000} +{"current_steps": 6135, "total_steps": 15621, "loss": 0.395, "lr": 1.5220888378008977e-06, "epoch": 0.39274054157864413, "percentage": 39.27, "elapsed_time": "0:24:42", "remaining_time": "0:38:12", "throughput": 13037.64, "total_tokens": 19327488} +{"current_steps": 6140, "total_steps": 15621, "loss": 0.4748, "lr": 1.5211355186103654e-06, "epoch": 0.393060623519621, "percentage": 39.31, "elapsed_time": "0:24:43", "remaining_time": "0:38:10", "throughput": 13041.89, "total_tokens": 19342080} +{"current_steps": 6145, "total_steps": 15621, "loss": 0.4435, "lr": 1.5201815487760192e-06, "epoch": 0.3933807054605979, "percentage": 39.34, "elapsed_time": "0:24:43", "remaining_time": "0:38:08", "throughput": 13046.79, "total_tokens": 19358336} +{"current_steps": 6150, "total_steps": 15621, "loss": 0.5032, "lr": 1.5192269294889019e-06, "epoch": 0.3937007874015748, "percentage": 39.37, "elapsed_time": "0:24:44", "remaining_time": "0:38:05", "throughput": 13051.22, "total_tokens": 19373376} +{"current_steps": 6155, "total_steps": 15621, "loss": 0.4021, "lr": 1.5182716619408666e-06, "epoch": 0.3940208693425517, "percentage": 39.4, "elapsed_time": "0:24:45", "remaining_time": "0:38:03", "throughput": 13055.7, "total_tokens": 19388608} +{"current_steps": 6160, "total_steps": 15621, "loss": 0.5383, "lr": 1.5173157473245764e-06, "epoch": 0.3943409512835286, "percentage": 39.43, "elapsed_time": "0:24:45", "remaining_time": "0:38:01", "throughput": 13059.92, "total_tokens": 19403264} +{"current_steps": 6165, "total_steps": 15621, "loss": 0.4397, "lr": 1.5163591868335016e-06, "epoch": 0.39466103322450546, "percentage": 39.47, "elapsed_time": "0:24:46", "remaining_time": "0:37:59", "throughput": 13064.47, "total_tokens": 19418816} +{"current_steps": 6170, "total_steps": 15621, "loss": 0.5856, "lr": 1.515401981661919e-06, "epoch": 0.39498111516548234, "percentage": 39.5, "elapsed_time": "0:24:47", "remaining_time": "0:37:57", "throughput": 13069.47, "total_tokens": 19435392} +{"current_steps": 6175, "total_steps": 15621, "loss": 0.4567, "lr": 1.514444133004911e-06, "epoch": 0.3953011971064593, "percentage": 39.53, "elapsed_time": "0:24:47", "remaining_time": "0:37:55", "throughput": 13073.7, "total_tokens": 19450048} +{"current_steps": 6180, "total_steps": 15621, "loss": 0.465, "lr": 1.5134856420583631e-06, "epoch": 0.39562127904743616, "percentage": 39.56, "elapsed_time": "0:24:48", "remaining_time": "0:37:53", "throughput": 13078.64, "total_tokens": 19466368} +{"current_steps": 6185, "total_steps": 15621, "loss": 0.34, "lr": 1.5125265100189614e-06, "epoch": 0.39594136098841304, "percentage": 39.59, "elapsed_time": "0:24:49", "remaining_time": "0:37:51", "throughput": 13083.48, "total_tokens": 19482624} +{"current_steps": 6190, "total_steps": 15621, "loss": 0.5382, "lr": 1.5115667380841948e-06, "epoch": 0.3962614429293899, "percentage": 39.63, "elapsed_time": "0:24:49", "remaining_time": "0:37:49", "throughput": 13088.04, "total_tokens": 19498048} +{"current_steps": 6195, "total_steps": 15621, "loss": 0.4413, "lr": 1.510606327452349e-06, "epoch": 0.3965815248703668, "percentage": 39.66, "elapsed_time": "0:24:50", "remaining_time": "0:37:47", "throughput": 13093.22, "total_tokens": 19515264} +{"current_steps": 6200, "total_steps": 15621, "loss": 0.4267, "lr": 1.5096452793225082e-06, "epoch": 0.3969016068113437, "percentage": 39.69, "elapsed_time": "0:24:51", "remaining_time": "0:37:45", "throughput": 13098.69, "total_tokens": 19533056} +{"current_steps": 6205, "total_steps": 15621, "loss": 0.3994, "lr": 1.5086835948945522e-06, "epoch": 0.3972216887523206, "percentage": 39.72, "elapsed_time": "0:24:51", "remaining_time": "0:37:43", "throughput": 13103.18, "total_tokens": 19548480} +{"current_steps": 6210, "total_steps": 15621, "loss": 0.3462, "lr": 1.5077212753691556e-06, "epoch": 0.3975417706932975, "percentage": 39.75, "elapsed_time": "0:24:52", "remaining_time": "0:37:41", "throughput": 13107.62, "total_tokens": 19563712} +{"current_steps": 6215, "total_steps": 15621, "loss": 0.41, "lr": 1.5067583219477852e-06, "epoch": 0.39786185263427437, "percentage": 39.79, "elapsed_time": "0:24:53", "remaining_time": "0:37:39", "throughput": 13111.95, "total_tokens": 19578624} +{"current_steps": 6220, "total_steps": 15621, "loss": 0.3926, "lr": 1.5057947358327e-06, "epoch": 0.39818193457525125, "percentage": 39.82, "elapsed_time": "0:24:53", "remaining_time": "0:37:37", "throughput": 13116.17, "total_tokens": 19593408} +{"current_steps": 6225, "total_steps": 15621, "loss": 0.5044, "lr": 1.504830518226948e-06, "epoch": 0.39850201651622813, "percentage": 39.85, "elapsed_time": "0:24:54", "remaining_time": "0:37:35", "throughput": 13120.88, "total_tokens": 19609216} +{"current_steps": 6230, "total_steps": 15621, "loss": 0.4468, "lr": 1.5038656703343672e-06, "epoch": 0.39882209845720507, "percentage": 39.88, "elapsed_time": "0:24:55", "remaining_time": "0:37:33", "throughput": 13125.42, "total_tokens": 19624896} +{"current_steps": 6235, "total_steps": 15621, "loss": 0.5125, "lr": 1.5029001933595805e-06, "epoch": 0.39914218039818194, "percentage": 39.91, "elapsed_time": "0:24:55", "remaining_time": "0:37:31", "throughput": 13129.86, "total_tokens": 19640128} +{"current_steps": 6240, "total_steps": 15621, "loss": 0.3482, "lr": 1.501934088507998e-06, "epoch": 0.3994622623391588, "percentage": 39.95, "elapsed_time": "0:24:56", "remaining_time": "0:37:29", "throughput": 13134.46, "total_tokens": 19655680} +{"current_steps": 6245, "total_steps": 15621, "loss": 0.6246, "lr": 1.5009673569858126e-06, "epoch": 0.3997823442801357, "percentage": 39.98, "elapsed_time": "0:24:57", "remaining_time": "0:37:27", "throughput": 13139.36, "total_tokens": 19672192} +{"current_steps": 6250, "total_steps": 15621, "loss": 0.534, "lr": 1.5e-06, "epoch": 0.4001024262211126, "percentage": 40.01, "elapsed_time": "0:24:57", "remaining_time": "0:37:25", "throughput": 13144.35, "total_tokens": 19688896} +{"current_steps": 6255, "total_steps": 15621, "loss": 0.3556, "lr": 1.4990320187583167e-06, "epoch": 0.4004225081620895, "percentage": 40.04, "elapsed_time": "0:24:58", "remaining_time": "0:37:23", "throughput": 13148.79, "total_tokens": 19704128} +{"current_steps": 6256, "total_steps": 15621, "eval_loss": 0.4279458224773407, "epoch": 0.4004865245502849, "percentage": 40.05, "elapsed_time": "0:25:49", "remaining_time": "0:38:39", "throughput": 12720.35, "total_tokens": 19707456} +{"current_steps": 6260, "total_steps": 15621, "loss": 0.3913, "lr": 1.4980634144692986e-06, "epoch": 0.4007425901030664, "percentage": 40.07, "elapsed_time": "0:26:29", "remaining_time": "0:39:36", "throughput": 12408.36, "total_tokens": 19719744} +{"current_steps": 6265, "total_steps": 15621, "loss": 0.3734, "lr": 1.4970941883422599e-06, "epoch": 0.4010626720440433, "percentage": 40.11, "elapsed_time": "0:26:29", "remaining_time": "0:39:34", "throughput": 12413.32, "total_tokens": 19736128} +{"current_steps": 6270, "total_steps": 15621, "loss": 0.4286, "lr": 1.4961243415872901e-06, "epoch": 0.40138275398502016, "percentage": 40.14, "elapsed_time": "0:26:30", "remaining_time": "0:39:32", "throughput": 12417.7, "total_tokens": 19751296} +{"current_steps": 6275, "total_steps": 15621, "loss": 0.3958, "lr": 1.4951538754152551e-06, "epoch": 0.40170283592599704, "percentage": 40.17, "elapsed_time": "0:26:31", "remaining_time": "0:39:29", "throughput": 12421.88, "total_tokens": 19765888} +{"current_steps": 6280, "total_steps": 15621, "loss": 0.4227, "lr": 1.4941827910377925e-06, "epoch": 0.402022917866974, "percentage": 40.2, "elapsed_time": "0:26:31", "remaining_time": "0:39:27", "throughput": 12426.17, "total_tokens": 19780864} +{"current_steps": 6285, "total_steps": 15621, "loss": 0.3978, "lr": 1.4932110896673131e-06, "epoch": 0.40234299980795085, "percentage": 40.23, "elapsed_time": "0:26:32", "remaining_time": "0:39:25", "throughput": 12430.92, "total_tokens": 19796864} +{"current_steps": 6290, "total_steps": 15621, "loss": 0.5383, "lr": 1.4922387725169973e-06, "epoch": 0.40266308174892773, "percentage": 40.27, "elapsed_time": "0:26:33", "remaining_time": "0:39:23", "throughput": 12435.28, "total_tokens": 19811904} +{"current_steps": 6295, "total_steps": 15621, "loss": 0.418, "lr": 1.4912658408007947e-06, "epoch": 0.4029831636899046, "percentage": 40.3, "elapsed_time": "0:26:33", "remaining_time": "0:39:21", "throughput": 12439.81, "total_tokens": 19827456} +{"current_steps": 6300, "total_steps": 15621, "loss": 0.4194, "lr": 1.4902922957334215e-06, "epoch": 0.4033032456308815, "percentage": 40.33, "elapsed_time": "0:26:34", "remaining_time": "0:39:19", "throughput": 12444.16, "total_tokens": 19842496} +{"current_steps": 6305, "total_steps": 15621, "loss": 0.4186, "lr": 1.4893181385303608e-06, "epoch": 0.40362332757185837, "percentage": 40.36, "elapsed_time": "0:26:35", "remaining_time": "0:39:16", "throughput": 12448.75, "total_tokens": 19858240} +{"current_steps": 6310, "total_steps": 15621, "loss": 0.4262, "lr": 1.4883433704078584e-06, "epoch": 0.4039434095128353, "percentage": 40.39, "elapsed_time": "0:26:35", "remaining_time": "0:39:14", "throughput": 12453.58, "total_tokens": 19874368} +{"current_steps": 6315, "total_steps": 15621, "loss": 0.3986, "lr": 1.4873679925829246e-06, "epoch": 0.4042634914538122, "percentage": 40.43, "elapsed_time": "0:26:36", "remaining_time": "0:39:12", "throughput": 12458.84, "total_tokens": 19891904} +{"current_steps": 6320, "total_steps": 15621, "loss": 0.4157, "lr": 1.4863920062733298e-06, "epoch": 0.40458357339478906, "percentage": 40.46, "elapsed_time": "0:26:37", "remaining_time": "0:39:10", "throughput": 12463.32, "total_tokens": 19907392} +{"current_steps": 6325, "total_steps": 15621, "loss": 0.3822, "lr": 1.485415412697604e-06, "epoch": 0.40490365533576594, "percentage": 40.49, "elapsed_time": "0:26:37", "remaining_time": "0:39:08", "throughput": 12467.76, "total_tokens": 19922624} +{"current_steps": 6330, "total_steps": 15621, "loss": 0.4286, "lr": 1.484438213075036e-06, "epoch": 0.4052237372767428, "percentage": 40.52, "elapsed_time": "0:26:38", "remaining_time": "0:39:06", "throughput": 12472.73, "total_tokens": 19939328} +{"current_steps": 6335, "total_steps": 15621, "loss": 0.4412, "lr": 1.4834604086256713e-06, "epoch": 0.40554381921771976, "percentage": 40.55, "elapsed_time": "0:26:39", "remaining_time": "0:39:04", "throughput": 12477.45, "total_tokens": 19955392} +{"current_steps": 6340, "total_steps": 15621, "loss": 0.401, "lr": 1.4824820005703097e-06, "epoch": 0.40586390115869664, "percentage": 40.59, "elapsed_time": "0:26:39", "remaining_time": "0:39:02", "throughput": 12482.22, "total_tokens": 19971520} +{"current_steps": 6345, "total_steps": 15621, "loss": 0.448, "lr": 1.4815029901305061e-06, "epoch": 0.4061839830996735, "percentage": 40.62, "elapsed_time": "0:26:40", "remaining_time": "0:39:00", "throughput": 12487.24, "total_tokens": 19988352} +{"current_steps": 6350, "total_steps": 15621, "loss": 0.4706, "lr": 1.480523378528565e-06, "epoch": 0.4065040650406504, "percentage": 40.65, "elapsed_time": "0:26:41", "remaining_time": "0:38:58", "throughput": 12492.23, "total_tokens": 20005184} +{"current_steps": 6355, "total_steps": 15621, "loss": 0.4379, "lr": 1.4795431669875441e-06, "epoch": 0.4068241469816273, "percentage": 40.68, "elapsed_time": "0:26:42", "remaining_time": "0:38:55", "throughput": 12496.75, "total_tokens": 20020800} +{"current_steps": 6360, "total_steps": 15621, "loss": 0.475, "lr": 1.478562356731249e-06, "epoch": 0.4071442289226042, "percentage": 40.71, "elapsed_time": "0:26:42", "remaining_time": "0:38:53", "throughput": 12501.28, "total_tokens": 20036416} +{"current_steps": 6365, "total_steps": 15621, "loss": 0.4608, "lr": 1.4775809489842326e-06, "epoch": 0.4074643108635811, "percentage": 40.75, "elapsed_time": "0:26:43", "remaining_time": "0:38:51", "throughput": 12506.24, "total_tokens": 20053184} +{"current_steps": 6370, "total_steps": 15621, "loss": 0.3944, "lr": 1.4765989449717937e-06, "epoch": 0.40778439280455797, "percentage": 40.78, "elapsed_time": "0:26:44", "remaining_time": "0:38:49", "throughput": 12511.17, "total_tokens": 20069888} +{"current_steps": 6375, "total_steps": 15621, "loss": 0.534, "lr": 1.4756163459199763e-06, "epoch": 0.40810447474553485, "percentage": 40.81, "elapsed_time": "0:26:44", "remaining_time": "0:38:47", "throughput": 12515.76, "total_tokens": 20085760} +{"current_steps": 6380, "total_steps": 15621, "loss": 0.2694, "lr": 1.4746331530555665e-06, "epoch": 0.40842455668651173, "percentage": 40.84, "elapsed_time": "0:26:45", "remaining_time": "0:38:45", "throughput": 12520.18, "total_tokens": 20101056} +{"current_steps": 6385, "total_steps": 15621, "loss": 0.4114, "lr": 1.4736493676060923e-06, "epoch": 0.4087446386274886, "percentage": 40.87, "elapsed_time": "0:26:46", "remaining_time": "0:38:43", "throughput": 12524.48, "total_tokens": 20116352} +{"current_steps": 6390, "total_steps": 15621, "loss": 0.3752, "lr": 1.4726649907998216e-06, "epoch": 0.40906472056846555, "percentage": 40.91, "elapsed_time": "0:26:46", "remaining_time": "0:38:41", "throughput": 12528.86, "total_tokens": 20131712} +{"current_steps": 6395, "total_steps": 15621, "loss": 0.3816, "lr": 1.4716800238657599e-06, "epoch": 0.4093848025094424, "percentage": 40.94, "elapsed_time": "0:26:47", "remaining_time": "0:38:39", "throughput": 12533.19, "total_tokens": 20146880} +{"current_steps": 6400, "total_steps": 15621, "loss": 0.285, "lr": 1.4706944680336505e-06, "epoch": 0.4097048844504193, "percentage": 40.97, "elapsed_time": "0:26:48", "remaining_time": "0:38:37", "throughput": 12538.07, "total_tokens": 20163520} +{"current_steps": 6405, "total_steps": 15621, "loss": 0.4656, "lr": 1.469708324533971e-06, "epoch": 0.4100249663913962, "percentage": 41.0, "elapsed_time": "0:26:48", "remaining_time": "0:38:34", "throughput": 12542.08, "total_tokens": 20177984} +{"current_steps": 6410, "total_steps": 15621, "loss": 0.3425, "lr": 1.4687215945979335e-06, "epoch": 0.41034504833237306, "percentage": 41.03, "elapsed_time": "0:26:49", "remaining_time": "0:38:32", "throughput": 12546.52, "total_tokens": 20193472} +{"current_steps": 6415, "total_steps": 15621, "loss": 0.4557, "lr": 1.4677342794574815e-06, "epoch": 0.41066513027335, "percentage": 41.07, "elapsed_time": "0:26:50", "remaining_time": "0:38:30", "throughput": 12551.64, "total_tokens": 20210624} +{"current_steps": 6420, "total_steps": 15621, "loss": 0.4171, "lr": 1.4667463803452902e-06, "epoch": 0.4109852122143269, "percentage": 41.1, "elapsed_time": "0:26:50", "remaining_time": "0:38:28", "throughput": 12556.28, "total_tokens": 20226688} +{"current_steps": 6425, "total_steps": 15621, "loss": 0.4553, "lr": 1.4657578984947627e-06, "epoch": 0.41130529415530376, "percentage": 41.13, "elapsed_time": "0:26:51", "remaining_time": "0:38:26", "throughput": 12561.66, "total_tokens": 20244608} +{"current_steps": 6430, "total_steps": 15621, "loss": 0.3597, "lr": 1.4647688351400303e-06, "epoch": 0.41162537609628064, "percentage": 41.16, "elapsed_time": "0:26:52", "remaining_time": "0:38:24", "throughput": 12566.43, "total_tokens": 20261184} +{"current_steps": 6435, "total_steps": 15621, "loss": 0.3288, "lr": 1.46377919151595e-06, "epoch": 0.4119454580372575, "percentage": 41.19, "elapsed_time": "0:26:53", "remaining_time": "0:38:22", "throughput": 12570.82, "total_tokens": 20276736} +{"current_steps": 6440, "total_steps": 15621, "loss": 0.47, "lr": 1.462788968858104e-06, "epoch": 0.41226553997823445, "percentage": 41.23, "elapsed_time": "0:26:53", "remaining_time": "0:38:20", "throughput": 12575.82, "total_tokens": 20293888} +{"current_steps": 6445, "total_steps": 15621, "loss": 0.4858, "lr": 1.4617981684027966e-06, "epoch": 0.41258562191921133, "percentage": 41.26, "elapsed_time": "0:26:54", "remaining_time": "0:38:18", "throughput": 12580.31, "total_tokens": 20309696} +{"current_steps": 6450, "total_steps": 15621, "loss": 0.3958, "lr": 1.4608067913870536e-06, "epoch": 0.4129057038601882, "percentage": 41.29, "elapsed_time": "0:26:55", "remaining_time": "0:38:16", "throughput": 12584.85, "total_tokens": 20325632} +{"current_steps": 6455, "total_steps": 15621, "loss": 0.3994, "lr": 1.4598148390486213e-06, "epoch": 0.4132257858011651, "percentage": 41.32, "elapsed_time": "0:26:55", "remaining_time": "0:38:14", "throughput": 12589.53, "total_tokens": 20341888} +{"current_steps": 6460, "total_steps": 15621, "loss": 0.5083, "lr": 1.4588223126259639e-06, "epoch": 0.41354586774214197, "percentage": 41.35, "elapsed_time": "0:26:56", "remaining_time": "0:38:12", "throughput": 12594.44, "total_tokens": 20358656} +{"current_steps": 6465, "total_steps": 15621, "loss": 0.3307, "lr": 1.4578292133582615e-06, "epoch": 0.4138659496831189, "percentage": 41.39, "elapsed_time": "0:26:57", "remaining_time": "0:38:10", "throughput": 12598.31, "total_tokens": 20372864} +{"current_steps": 6470, "total_steps": 15621, "loss": 0.3876, "lr": 1.456835542485411e-06, "epoch": 0.4141860316240958, "percentage": 41.42, "elapsed_time": "0:26:57", "remaining_time": "0:38:08", "throughput": 12602.43, "total_tokens": 20387840} +{"current_steps": 6475, "total_steps": 15621, "loss": 0.4136, "lr": 1.4558413012480215e-06, "epoch": 0.41450611356507266, "percentage": 41.45, "elapsed_time": "0:26:58", "remaining_time": "0:38:06", "throughput": 12607.32, "total_tokens": 20404736} +{"current_steps": 6480, "total_steps": 15621, "loss": 0.5707, "lr": 1.4548464908874156e-06, "epoch": 0.41482619550604954, "percentage": 41.48, "elapsed_time": "0:26:59", "remaining_time": "0:38:04", "throughput": 12612.71, "total_tokens": 20422848} +{"current_steps": 6485, "total_steps": 15621, "loss": 0.4077, "lr": 1.4538511126456255e-06, "epoch": 0.4151462774470264, "percentage": 41.51, "elapsed_time": "0:26:59", "remaining_time": "0:38:02", "throughput": 12616.9, "total_tokens": 20438016} +{"current_steps": 6490, "total_steps": 15621, "loss": 0.5888, "lr": 1.452855167765392e-06, "epoch": 0.4154663593880033, "percentage": 41.55, "elapsed_time": "0:27:00", "remaining_time": "0:38:00", "throughput": 12621.61, "total_tokens": 20454464} +{"current_steps": 6495, "total_steps": 15621, "loss": 0.4553, "lr": 1.4518586574901647e-06, "epoch": 0.41578644132898024, "percentage": 41.58, "elapsed_time": "0:27:01", "remaining_time": "0:37:58", "throughput": 12626.17, "total_tokens": 20470464} +{"current_steps": 6500, "total_steps": 15621, "loss": 0.4639, "lr": 1.450861583064098e-06, "epoch": 0.4161065232699571, "percentage": 41.61, "elapsed_time": "0:27:01", "remaining_time": "0:37:55", "throughput": 12630.44, "total_tokens": 20485696} +{"current_steps": 6505, "total_steps": 15621, "loss": 0.352, "lr": 1.4498639457320515e-06, "epoch": 0.416426605210934, "percentage": 41.64, "elapsed_time": "0:27:02", "remaining_time": "0:37:53", "throughput": 12634.53, "total_tokens": 20500608} +{"current_steps": 6510, "total_steps": 15621, "loss": 0.4715, "lr": 1.4488657467395865e-06, "epoch": 0.4167466871519109, "percentage": 41.67, "elapsed_time": "0:27:03", "remaining_time": "0:37:51", "throughput": 12638.74, "total_tokens": 20515776} +{"current_steps": 6515, "total_steps": 15621, "loss": 0.5086, "lr": 1.4478669873329663e-06, "epoch": 0.41706676909288776, "percentage": 41.71, "elapsed_time": "0:27:03", "remaining_time": "0:37:49", "throughput": 12643.15, "total_tokens": 20531456} +{"current_steps": 6520, "total_steps": 15621, "loss": 0.3953, "lr": 1.4468676687591536e-06, "epoch": 0.4173868510338647, "percentage": 41.74, "elapsed_time": "0:27:04", "remaining_time": "0:37:47", "throughput": 12647.51, "total_tokens": 20547200} +{"current_steps": 6525, "total_steps": 15621, "loss": 0.4326, "lr": 1.4458677922658104e-06, "epoch": 0.41770693297484157, "percentage": 41.77, "elapsed_time": "0:27:05", "remaining_time": "0:37:45", "throughput": 12651.74, "total_tokens": 20562560} +{"current_steps": 6530, "total_steps": 15621, "loss": 0.2884, "lr": 1.444867359101293e-06, "epoch": 0.41802701491581845, "percentage": 41.8, "elapsed_time": "0:27:05", "remaining_time": "0:37:43", "throughput": 12655.76, "total_tokens": 20577344} +{"current_steps": 6535, "total_steps": 15621, "loss": 0.3541, "lr": 1.4438663705146545e-06, "epoch": 0.41834709685679533, "percentage": 41.83, "elapsed_time": "0:27:06", "remaining_time": "0:37:41", "throughput": 12660.11, "total_tokens": 20593088} +{"current_steps": 6540, "total_steps": 15621, "loss": 0.3645, "lr": 1.442864827755641e-06, "epoch": 0.4186671787977722, "percentage": 41.87, "elapsed_time": "0:27:07", "remaining_time": "0:37:39", "throughput": 12664.9, "total_tokens": 20609792} +{"current_steps": 6545, "total_steps": 15621, "loss": 0.4507, "lr": 1.4418627320746901e-06, "epoch": 0.41898726073874915, "percentage": 41.9, "elapsed_time": "0:27:07", "remaining_time": "0:37:37", "throughput": 12669.2, "total_tokens": 20625280} +{"current_steps": 6550, "total_steps": 15621, "loss": 0.3912, "lr": 1.4408600847229304e-06, "epoch": 0.419307342679726, "percentage": 41.93, "elapsed_time": "0:27:08", "remaining_time": "0:37:35", "throughput": 12673.98, "total_tokens": 20641984} +{"current_steps": 6555, "total_steps": 15621, "loss": 0.5483, "lr": 1.4398568869521782e-06, "epoch": 0.4196274246207029, "percentage": 41.96, "elapsed_time": "0:27:09", "remaining_time": "0:37:33", "throughput": 12678.57, "total_tokens": 20658240} +{"current_steps": 6560, "total_steps": 15621, "loss": 0.3603, "lr": 1.4388531400149384e-06, "epoch": 0.4199475065616798, "percentage": 41.99, "elapsed_time": "0:27:10", "remaining_time": "0:37:31", "throughput": 12682.78, "total_tokens": 20673408} +{"current_steps": 6565, "total_steps": 15621, "loss": 0.3865, "lr": 1.4378488451644007e-06, "epoch": 0.42026758850265666, "percentage": 42.03, "elapsed_time": "0:27:10", "remaining_time": "0:37:29", "throughput": 12687.1, "total_tokens": 20688960} +{"current_steps": 6570, "total_steps": 15621, "loss": 0.4216, "lr": 1.4368440036544386e-06, "epoch": 0.42058767044363354, "percentage": 42.06, "elapsed_time": "0:27:11", "remaining_time": "0:37:27", "throughput": 12691.48, "total_tokens": 20704768} +{"current_steps": 6575, "total_steps": 15621, "loss": 0.4157, "lr": 1.435838616739609e-06, "epoch": 0.4209077523846105, "percentage": 42.09, "elapsed_time": "0:27:12", "remaining_time": "0:37:25", "throughput": 12695.62, "total_tokens": 20719808} +{"current_steps": 6580, "total_steps": 15621, "loss": 0.5319, "lr": 1.4348326856751493e-06, "epoch": 0.42122783432558736, "percentage": 42.12, "elapsed_time": "0:27:12", "remaining_time": "0:37:23", "throughput": 12700.07, "total_tokens": 20735680} +{"current_steps": 6585, "total_steps": 15621, "loss": 0.3379, "lr": 1.433826211716976e-06, "epoch": 0.42154791626656424, "percentage": 42.15, "elapsed_time": "0:27:13", "remaining_time": "0:37:21", "throughput": 12703.99, "total_tokens": 20750144} +{"current_steps": 6590, "total_steps": 15621, "loss": 0.3988, "lr": 1.4328191961216835e-06, "epoch": 0.4218679982075411, "percentage": 42.19, "elapsed_time": "0:27:14", "remaining_time": "0:37:19", "throughput": 12708.44, "total_tokens": 20766016} +{"current_steps": 6595, "total_steps": 15621, "loss": 0.4818, "lr": 1.4318116401465427e-06, "epoch": 0.422188080148518, "percentage": 42.22, "elapsed_time": "0:27:14", "remaining_time": "0:37:17", "throughput": 12713.2, "total_tokens": 20782720} +{"current_steps": 6600, "total_steps": 15621, "loss": 0.3925, "lr": 1.430803545049499e-06, "epoch": 0.42250816208949493, "percentage": 42.25, "elapsed_time": "0:27:15", "remaining_time": "0:37:15", "throughput": 12717.5, "total_tokens": 20798208} +{"current_steps": 6605, "total_steps": 15621, "loss": 0.5891, "lr": 1.4297949120891716e-06, "epoch": 0.4228282440304718, "percentage": 42.28, "elapsed_time": "0:27:16", "remaining_time": "0:37:13", "throughput": 12721.56, "total_tokens": 20813056} +{"current_steps": 6610, "total_steps": 15621, "loss": 0.4266, "lr": 1.4287857425248497e-06, "epoch": 0.4231483259714487, "percentage": 42.31, "elapsed_time": "0:27:16", "remaining_time": "0:37:11", "throughput": 12725.95, "total_tokens": 20828800} +{"current_steps": 6615, "total_steps": 15621, "loss": 0.4956, "lr": 1.427776037616494e-06, "epoch": 0.42346840791242557, "percentage": 42.35, "elapsed_time": "0:27:17", "remaining_time": "0:37:09", "throughput": 12730.39, "total_tokens": 20844736} +{"current_steps": 6620, "total_steps": 15621, "loss": 0.3504, "lr": 1.4267657986247326e-06, "epoch": 0.42378848985340245, "percentage": 42.38, "elapsed_time": "0:27:18", "remaining_time": "0:37:07", "throughput": 12734.86, "total_tokens": 20860672} +{"current_steps": 6625, "total_steps": 15621, "loss": 0.3666, "lr": 1.425755026810861e-06, "epoch": 0.4241085717943794, "percentage": 42.41, "elapsed_time": "0:27:18", "remaining_time": "0:37:05", "throughput": 12739.47, "total_tokens": 20877184} +{"current_steps": 6630, "total_steps": 15621, "loss": 0.3965, "lr": 1.4247437234368394e-06, "epoch": 0.42442865373535626, "percentage": 42.44, "elapsed_time": "0:27:19", "remaining_time": "0:37:03", "throughput": 12744.3, "total_tokens": 20894208} +{"current_steps": 6635, "total_steps": 15621, "loss": 0.407, "lr": 1.423731889765292e-06, "epoch": 0.42474873567633314, "percentage": 42.47, "elapsed_time": "0:27:20", "remaining_time": "0:37:01", "throughput": 12748.57, "total_tokens": 20909696} +{"current_steps": 6640, "total_steps": 15621, "loss": 0.3465, "lr": 1.422719527059505e-06, "epoch": 0.42506881761731, "percentage": 42.51, "elapsed_time": "0:27:20", "remaining_time": "0:36:59", "throughput": 12753.16, "total_tokens": 20926016} +{"current_steps": 6645, "total_steps": 15621, "loss": 0.362, "lr": 1.4217066365834253e-06, "epoch": 0.4253888995582869, "percentage": 42.54, "elapsed_time": "0:27:21", "remaining_time": "0:36:57", "throughput": 12757.37, "total_tokens": 20941440} +{"current_steps": 6650, "total_steps": 15621, "loss": 0.4566, "lr": 1.4206932196016586e-06, "epoch": 0.42570898149926384, "percentage": 42.57, "elapsed_time": "0:27:22", "remaining_time": "0:36:55", "throughput": 12761.42, "total_tokens": 20956352} +{"current_steps": 6655, "total_steps": 15621, "loss": 0.3947, "lr": 1.4196792773794672e-06, "epoch": 0.4260290634402407, "percentage": 42.6, "elapsed_time": "0:27:22", "remaining_time": "0:36:53", "throughput": 12766.15, "total_tokens": 20973056} +{"current_steps": 6660, "total_steps": 15621, "loss": 0.4406, "lr": 1.418664811182771e-06, "epoch": 0.4263491453812176, "percentage": 42.63, "elapsed_time": "0:27:23", "remaining_time": "0:36:51", "throughput": 12770.61, "total_tokens": 20989248} +{"current_steps": 6665, "total_steps": 15621, "loss": 0.4946, "lr": 1.417649822278142e-06, "epoch": 0.4266692273221945, "percentage": 42.67, "elapsed_time": "0:27:24", "remaining_time": "0:36:49", "throughput": 12774.68, "total_tokens": 21004096} +{"current_steps": 6670, "total_steps": 15621, "loss": 0.489, "lr": 1.4166343119328064e-06, "epoch": 0.42698930926317136, "percentage": 42.7, "elapsed_time": "0:27:24", "remaining_time": "0:36:47", "throughput": 12779.2, "total_tokens": 21020224} +{"current_steps": 6675, "total_steps": 15621, "loss": 0.466, "lr": 1.4156182814146404e-06, "epoch": 0.42730939120414824, "percentage": 42.73, "elapsed_time": "0:27:25", "remaining_time": "0:36:45", "throughput": 12783.32, "total_tokens": 21035264} +{"current_steps": 6680, "total_steps": 15621, "loss": 0.354, "lr": 1.4146017319921701e-06, "epoch": 0.42762947314512517, "percentage": 42.76, "elapsed_time": "0:27:26", "remaining_time": "0:36:43", "throughput": 12787.97, "total_tokens": 21051904} +{"current_steps": 6685, "total_steps": 15621, "loss": 0.4117, "lr": 1.4135846649345695e-06, "epoch": 0.42794955508610205, "percentage": 42.79, "elapsed_time": "0:27:26", "remaining_time": "0:36:41", "throughput": 12792.98, "total_tokens": 21069504} +{"current_steps": 6690, "total_steps": 15621, "loss": 0.4259, "lr": 1.4125670815116589e-06, "epoch": 0.42826963702707893, "percentage": 42.83, "elapsed_time": "0:27:27", "remaining_time": "0:36:39", "throughput": 12796.99, "total_tokens": 21084288} +{"current_steps": 6695, "total_steps": 15621, "loss": 0.2933, "lr": 1.4115489829939025e-06, "epoch": 0.4285897189680558, "percentage": 42.86, "elapsed_time": "0:27:28", "remaining_time": "0:36:37", "throughput": 12801.52, "total_tokens": 21100544} +{"current_steps": 6700, "total_steps": 15621, "loss": 0.4315, "lr": 1.4105303706524093e-06, "epoch": 0.4289098009090327, "percentage": 42.89, "elapsed_time": "0:27:28", "remaining_time": "0:36:35", "throughput": 12805.93, "total_tokens": 21116608} +{"current_steps": 6705, "total_steps": 15621, "loss": 0.6147, "lr": 1.4095112457589276e-06, "epoch": 0.4292298828500096, "percentage": 42.92, "elapsed_time": "0:27:29", "remaining_time": "0:36:33", "throughput": 12810.06, "total_tokens": 21131776} +{"current_steps": 6710, "total_steps": 15621, "loss": 0.4185, "lr": 1.4084916095858477e-06, "epoch": 0.4295499647909865, "percentage": 42.95, "elapsed_time": "0:27:30", "remaining_time": "0:36:31", "throughput": 12813.89, "total_tokens": 21146368} +{"current_steps": 6715, "total_steps": 15621, "loss": 0.509, "lr": 1.407471463406197e-06, "epoch": 0.4298700467319634, "percentage": 42.99, "elapsed_time": "0:27:30", "remaining_time": "0:36:29", "throughput": 12818.28, "total_tokens": 21162368} +{"current_steps": 6720, "total_steps": 15621, "loss": 0.4404, "lr": 1.4064508084936399e-06, "epoch": 0.43019012867294026, "percentage": 43.02, "elapsed_time": "0:27:31", "remaining_time": "0:36:27", "throughput": 12822.94, "total_tokens": 21179008} +{"current_steps": 6725, "total_steps": 15621, "loss": 0.569, "lr": 1.405429646122476e-06, "epoch": 0.43051021061391714, "percentage": 43.05, "elapsed_time": "0:27:32", "remaining_time": "0:36:25", "throughput": 12827.76, "total_tokens": 21196160} +{"current_steps": 6730, "total_steps": 15621, "loss": 0.5342, "lr": 1.4044079775676392e-06, "epoch": 0.4308302925548941, "percentage": 43.08, "elapsed_time": "0:27:33", "remaining_time": "0:36:23", "throughput": 12832.12, "total_tokens": 21212032} +{"current_steps": 6735, "total_steps": 15621, "loss": 0.3587, "lr": 1.4033858041046936e-06, "epoch": 0.43115037449587096, "percentage": 43.12, "elapsed_time": "0:27:33", "remaining_time": "0:36:21", "throughput": 12837.39, "total_tokens": 21230272} +{"current_steps": 6740, "total_steps": 15621, "loss": 0.3928, "lr": 1.4023631270098352e-06, "epoch": 0.43147045643684784, "percentage": 43.15, "elapsed_time": "0:27:34", "remaining_time": "0:36:19", "throughput": 12841.58, "total_tokens": 21245760} +{"current_steps": 6745, "total_steps": 15621, "loss": 0.3446, "lr": 1.4013399475598888e-06, "epoch": 0.4317905383778247, "percentage": 43.18, "elapsed_time": "0:27:35", "remaining_time": "0:36:18", "throughput": 12845.65, "total_tokens": 21260992} +{"current_steps": 6750, "total_steps": 15621, "loss": 0.2819, "lr": 1.4003162670323056e-06, "epoch": 0.4321106203188016, "percentage": 43.21, "elapsed_time": "0:27:35", "remaining_time": "0:36:16", "throughput": 12849.33, "total_tokens": 21275136} +{"current_steps": 6755, "total_steps": 15621, "loss": 0.5416, "lr": 1.3992920867051627e-06, "epoch": 0.4324307022597785, "percentage": 43.24, "elapsed_time": "0:27:36", "remaining_time": "0:36:14", "throughput": 12853.49, "total_tokens": 21290560} +{"current_steps": 6760, "total_steps": 15621, "loss": 0.3552, "lr": 1.3982674078571614e-06, "epoch": 0.4327507842007554, "percentage": 43.28, "elapsed_time": "0:27:37", "remaining_time": "0:36:12", "throughput": 12857.51, "total_tokens": 21305536} +{"current_steps": 6765, "total_steps": 15621, "loss": 0.3758, "lr": 1.3972422317676252e-06, "epoch": 0.4330708661417323, "percentage": 43.31, "elapsed_time": "0:27:37", "remaining_time": "0:36:10", "throughput": 12861.53, "total_tokens": 21320576} +{"current_steps": 6770, "total_steps": 15621, "loss": 0.3698, "lr": 1.3962165597164985e-06, "epoch": 0.43339094808270917, "percentage": 43.34, "elapsed_time": "0:27:38", "remaining_time": "0:36:08", "throughput": 12865.54, "total_tokens": 21335680} +{"current_steps": 6775, "total_steps": 15621, "loss": 0.3519, "lr": 1.395190392984345e-06, "epoch": 0.43371103002368605, "percentage": 43.37, "elapsed_time": "0:27:39", "remaining_time": "0:36:06", "throughput": 12869.88, "total_tokens": 21351808} +{"current_steps": 6780, "total_steps": 15621, "loss": 0.4522, "lr": 1.3941637328523452e-06, "epoch": 0.43403111196466293, "percentage": 43.4, "elapsed_time": "0:27:39", "remaining_time": "0:36:04", "throughput": 12873.79, "total_tokens": 21366464} +{"current_steps": 6785, "total_steps": 15621, "loss": 0.3038, "lr": 1.3931365806022978e-06, "epoch": 0.43435119390563987, "percentage": 43.44, "elapsed_time": "0:27:40", "remaining_time": "0:36:02", "throughput": 12878.4, "total_tokens": 21383296} +{"current_steps": 6790, "total_steps": 15621, "loss": 0.3111, "lr": 1.3921089375166131e-06, "epoch": 0.43467127584661674, "percentage": 43.47, "elapsed_time": "0:27:41", "remaining_time": "0:36:00", "throughput": 12882.87, "total_tokens": 21399616} +{"current_steps": 6795, "total_steps": 15621, "loss": 0.4455, "lr": 1.391080804878316e-06, "epoch": 0.4349913577875936, "percentage": 43.5, "elapsed_time": "0:27:41", "remaining_time": "0:35:58", "throughput": 12886.91, "total_tokens": 21414848} +{"current_steps": 6800, "total_steps": 15621, "loss": 0.3804, "lr": 1.3900521839710427e-06, "epoch": 0.4353114397285705, "percentage": 43.53, "elapsed_time": "0:27:42", "remaining_time": "0:35:56", "throughput": 12890.99, "total_tokens": 21430144} +{"current_steps": 6805, "total_steps": 15621, "loss": 0.3503, "lr": 1.3890230760790373e-06, "epoch": 0.4356315216695474, "percentage": 43.56, "elapsed_time": "0:27:43", "remaining_time": "0:35:54", "throughput": 12894.97, "total_tokens": 21445248} +{"current_steps": 6810, "total_steps": 15621, "loss": 0.598, "lr": 1.3879934824871544e-06, "epoch": 0.4359516036105243, "percentage": 43.6, "elapsed_time": "0:27:43", "remaining_time": "0:35:52", "throughput": 12899.03, "total_tokens": 21460544} +{"current_steps": 6815, "total_steps": 15621, "loss": 0.5102, "lr": 1.3869634044808526e-06, "epoch": 0.4362716855515012, "percentage": 43.63, "elapsed_time": "0:27:44", "remaining_time": "0:35:50", "throughput": 12903.26, "total_tokens": 21476224} +{"current_steps": 6820, "total_steps": 15621, "loss": 0.6093, "lr": 1.3859328433461971e-06, "epoch": 0.4365917674924781, "percentage": 43.66, "elapsed_time": "0:27:45", "remaining_time": "0:35:48", "throughput": 12907.45, "total_tokens": 21491712} +{"current_steps": 6825, "total_steps": 15621, "loss": 0.5794, "lr": 1.3849018003698553e-06, "epoch": 0.43691184943345496, "percentage": 43.69, "elapsed_time": "0:27:45", "remaining_time": "0:35:46", "throughput": 12912.2, "total_tokens": 21508928} +{"current_steps": 6830, "total_steps": 15621, "loss": 0.3975, "lr": 1.3838702768390964e-06, "epoch": 0.43723193137443184, "percentage": 43.72, "elapsed_time": "0:27:46", "remaining_time": "0:35:44", "throughput": 12916.05, "total_tokens": 21523648} +{"current_steps": 6835, "total_steps": 15621, "loss": 0.474, "lr": 1.38283827404179e-06, "epoch": 0.43755201331540877, "percentage": 43.76, "elapsed_time": "0:27:47", "remaining_time": "0:35:42", "throughput": 12920.22, "total_tokens": 21539264} +{"current_steps": 6840, "total_steps": 15621, "loss": 0.3763, "lr": 1.381805793266403e-06, "epoch": 0.43787209525638565, "percentage": 43.79, "elapsed_time": "0:27:47", "remaining_time": "0:35:41", "throughput": 12924.56, "total_tokens": 21555520} +{"current_steps": 6845, "total_steps": 15621, "loss": 0.4524, "lr": 1.3807728358020009e-06, "epoch": 0.43819217719736253, "percentage": 43.82, "elapsed_time": "0:27:48", "remaining_time": "0:35:39", "throughput": 12928.39, "total_tokens": 21570112} +{"current_steps": 6850, "total_steps": 15621, "loss": 0.3372, "lr": 1.3797394029382416e-06, "epoch": 0.4385122591383394, "percentage": 43.85, "elapsed_time": "0:27:49", "remaining_time": "0:35:37", "throughput": 12932.19, "total_tokens": 21584768} +{"current_steps": 6855, "total_steps": 15621, "loss": 0.3008, "lr": 1.37870549596538e-06, "epoch": 0.4388323410793163, "percentage": 43.88, "elapsed_time": "0:27:49", "remaining_time": "0:35:35", "throughput": 12936.19, "total_tokens": 21599872} +{"current_steps": 6860, "total_steps": 15621, "loss": 0.5217, "lr": 1.3776711161742595e-06, "epoch": 0.43915242302029317, "percentage": 43.92, "elapsed_time": "0:27:50", "remaining_time": "0:35:33", "throughput": 12940.45, "total_tokens": 21615808} +{"current_steps": 6865, "total_steps": 15621, "loss": 0.4772, "lr": 1.3766362648563166e-06, "epoch": 0.4394725049612701, "percentage": 43.95, "elapsed_time": "0:27:51", "remaining_time": "0:35:31", "throughput": 12944.32, "total_tokens": 21630656} +{"current_steps": 6870, "total_steps": 15621, "loss": 0.4123, "lr": 1.3756009433035744e-06, "epoch": 0.439792586902247, "percentage": 43.98, "elapsed_time": "0:27:51", "remaining_time": "0:35:29", "throughput": 12948.74, "total_tokens": 21646976} +{"current_steps": 6875, "total_steps": 15621, "loss": 0.5783, "lr": 1.3745651528086447e-06, "epoch": 0.44011266884322386, "percentage": 44.01, "elapsed_time": "0:27:52", "remaining_time": "0:35:27", "throughput": 12953.79, "total_tokens": 21665024} +{"current_steps": 6880, "total_steps": 15621, "loss": 0.4489, "lr": 1.373528894664724e-06, "epoch": 0.44043275078420074, "percentage": 44.04, "elapsed_time": "0:27:53", "remaining_time": "0:35:25", "throughput": 12957.77, "total_tokens": 21680128} +{"current_steps": 6885, "total_steps": 15621, "loss": 0.3466, "lr": 1.3724921701655924e-06, "epoch": 0.4407528327251776, "percentage": 44.08, "elapsed_time": "0:27:53", "remaining_time": "0:35:23", "throughput": 12961.9, "total_tokens": 21695808} +{"current_steps": 6890, "total_steps": 15621, "loss": 0.3186, "lr": 1.3714549806056125e-06, "epoch": 0.44107291466615456, "percentage": 44.11, "elapsed_time": "0:27:54", "remaining_time": "0:35:21", "throughput": 12966.18, "total_tokens": 21711936} +{"current_steps": 6895, "total_steps": 15621, "loss": 0.4162, "lr": 1.3704173272797283e-06, "epoch": 0.44139299660713144, "percentage": 44.14, "elapsed_time": "0:27:55", "remaining_time": "0:35:20", "throughput": 12970.26, "total_tokens": 21727488} +{"current_steps": 6900, "total_steps": 15621, "loss": 0.4556, "lr": 1.3693792114834619e-06, "epoch": 0.4417130785481083, "percentage": 44.17, "elapsed_time": "0:27:55", "remaining_time": "0:35:18", "throughput": 12975.21, "total_tokens": 21745280} +{"current_steps": 6905, "total_steps": 15621, "loss": 0.467, "lr": 1.3683406345129129e-06, "epoch": 0.4420331604890852, "percentage": 44.2, "elapsed_time": "0:27:56", "remaining_time": "0:35:16", "throughput": 12979.03, "total_tokens": 21760000} +{"current_steps": 6910, "total_steps": 15621, "loss": 0.3971, "lr": 1.3673015976647567e-06, "epoch": 0.4423532424300621, "percentage": 44.24, "elapsed_time": "0:27:57", "remaining_time": "0:35:14", "throughput": 12983.04, "total_tokens": 21775232} +{"current_steps": 6915, "total_steps": 15621, "loss": 0.3979, "lr": 1.3662621022362435e-06, "epoch": 0.442673324371039, "percentage": 44.27, "elapsed_time": "0:27:57", "remaining_time": "0:35:12", "throughput": 12987.15, "total_tokens": 21790656} +{"current_steps": 6920, "total_steps": 15621, "loss": 0.462, "lr": 1.3652221495251952e-06, "epoch": 0.4429934063120159, "percentage": 44.3, "elapsed_time": "0:27:58", "remaining_time": "0:35:10", "throughput": 12991.25, "total_tokens": 21806336} +{"current_steps": 6925, "total_steps": 15621, "loss": 0.3242, "lr": 1.3641817408300049e-06, "epoch": 0.44331348825299277, "percentage": 44.33, "elapsed_time": "0:27:59", "remaining_time": "0:35:08", "throughput": 12996.06, "total_tokens": 21823744} +{"current_steps": 6930, "total_steps": 15621, "loss": 0.559, "lr": 1.3631408774496352e-06, "epoch": 0.44363357019396965, "percentage": 44.36, "elapsed_time": "0:27:59", "remaining_time": "0:35:06", "throughput": 13000.05, "total_tokens": 21839104} +{"current_steps": 6935, "total_steps": 15621, "loss": 0.3616, "lr": 1.3620995606836165e-06, "epoch": 0.44395365213494653, "percentage": 44.4, "elapsed_time": "0:28:00", "remaining_time": "0:35:04", "throughput": 13004.1, "total_tokens": 21854528} +{"current_steps": 6940, "total_steps": 15621, "loss": 0.6013, "lr": 1.3610577918320446e-06, "epoch": 0.4442737340759234, "percentage": 44.43, "elapsed_time": "0:28:01", "remaining_time": "0:35:03", "throughput": 13008.36, "total_tokens": 21870592} +{"current_steps": 6945, "total_steps": 15621, "loss": 0.3823, "lr": 1.3600155721955802e-06, "epoch": 0.44459381601690035, "percentage": 44.46, "elapsed_time": "0:28:01", "remaining_time": "0:35:01", "throughput": 13012.25, "total_tokens": 21885696} +{"current_steps": 6950, "total_steps": 15621, "loss": 0.4017, "lr": 1.3589729030754468e-06, "epoch": 0.4449138979578772, "percentage": 44.49, "elapsed_time": "0:28:02", "remaining_time": "0:34:59", "throughput": 13016.34, "total_tokens": 21901248} +{"current_steps": 6955, "total_steps": 15621, "loss": 0.4293, "lr": 1.3579297857734293e-06, "epoch": 0.4452339798988541, "percentage": 44.52, "elapsed_time": "0:28:03", "remaining_time": "0:34:57", "throughput": 13020.2, "total_tokens": 21916352} +{"current_steps": 6960, "total_steps": 15621, "loss": 0.3354, "lr": 1.3568862215918717e-06, "epoch": 0.445554061839831, "percentage": 44.56, "elapsed_time": "0:28:03", "remaining_time": "0:34:55", "throughput": 13023.94, "total_tokens": 21931072} +{"current_steps": 6965, "total_steps": 15621, "loss": 0.5014, "lr": 1.3558422118336762e-06, "epoch": 0.44587414378080786, "percentage": 44.59, "elapsed_time": "0:28:04", "remaining_time": "0:34:53", "throughput": 13028.03, "total_tokens": 21946752} +{"current_steps": 6970, "total_steps": 15621, "loss": 0.4669, "lr": 1.354797757802301e-06, "epoch": 0.4461942257217848, "percentage": 44.62, "elapsed_time": "0:28:05", "remaining_time": "0:34:51", "throughput": 13032.04, "total_tokens": 21962176} +{"current_steps": 6975, "total_steps": 15621, "loss": 0.3986, "lr": 1.3537528608017596e-06, "epoch": 0.4465143076627617, "percentage": 44.65, "elapsed_time": "0:28:05", "remaining_time": "0:34:49", "throughput": 13036.36, "total_tokens": 21978496} +{"current_steps": 6980, "total_steps": 15621, "loss": 0.3989, "lr": 1.352707522136618e-06, "epoch": 0.44683438960373856, "percentage": 44.68, "elapsed_time": "0:28:06", "remaining_time": "0:34:47", "throughput": 13039.84, "total_tokens": 21992576} +{"current_steps": 6985, "total_steps": 15621, "loss": 0.3987, "lr": 1.3516617431119934e-06, "epoch": 0.44715447154471544, "percentage": 44.72, "elapsed_time": "0:28:07", "remaining_time": "0:34:46", "throughput": 13043.85, "total_tokens": 22008000} +{"current_steps": 6990, "total_steps": 15621, "loss": 0.5453, "lr": 1.350615525033554e-06, "epoch": 0.4474745534856923, "percentage": 44.75, "elapsed_time": "0:28:07", "remaining_time": "0:34:44", "throughput": 13047.7, "total_tokens": 22022976} +{"current_steps": 6995, "total_steps": 15621, "loss": 0.4055, "lr": 1.3495688692075144e-06, "epoch": 0.44779463542666925, "percentage": 44.78, "elapsed_time": "0:28:08", "remaining_time": "0:34:42", "throughput": 13051.64, "total_tokens": 22038144} +{"current_steps": 7000, "total_steps": 15621, "loss": 0.35, "lr": 1.3485217769406376e-06, "epoch": 0.44811471736764613, "percentage": 44.81, "elapsed_time": "0:28:09", "remaining_time": "0:34:40", "throughput": 13055.83, "total_tokens": 22054016} +{"current_steps": 7005, "total_steps": 15621, "loss": 0.3627, "lr": 1.3474742495402303e-06, "epoch": 0.448434799308623, "percentage": 44.84, "elapsed_time": "0:28:10", "remaining_time": "0:34:38", "throughput": 13061.32, "total_tokens": 22073920} +{"current_steps": 7010, "total_steps": 15621, "loss": 0.4295, "lr": 1.3464262883141425e-06, "epoch": 0.4487548812495999, "percentage": 44.88, "elapsed_time": "0:28:10", "remaining_time": "0:34:36", "throughput": 13065.51, "total_tokens": 22089728} +{"current_steps": 7015, "total_steps": 15621, "loss": 0.5883, "lr": 1.3453778945707663e-06, "epoch": 0.44907496319057677, "percentage": 44.91, "elapsed_time": "0:28:11", "remaining_time": "0:34:34", "throughput": 13069.51, "total_tokens": 22105344} +{"current_steps": 7020, "total_steps": 15621, "loss": 0.4596, "lr": 1.3443290696190332e-06, "epoch": 0.4493950451315537, "percentage": 44.94, "elapsed_time": "0:28:12", "remaining_time": "0:34:33", "throughput": 13073.87, "total_tokens": 22121792} +{"current_steps": 7025, "total_steps": 15621, "loss": 0.4175, "lr": 1.343279814768414e-06, "epoch": 0.4497151270725306, "percentage": 44.97, "elapsed_time": "0:28:12", "remaining_time": "0:34:31", "throughput": 13077.45, "total_tokens": 22136128} +{"current_steps": 7030, "total_steps": 15621, "loss": 0.3849, "lr": 1.3422301313289156e-06, "epoch": 0.45003520901350746, "percentage": 45.0, "elapsed_time": "0:28:13", "remaining_time": "0:34:29", "throughput": 13081.58, "total_tokens": 22151936} +{"current_steps": 7035, "total_steps": 15621, "loss": 0.3775, "lr": 1.34118002061108e-06, "epoch": 0.45035529095448434, "percentage": 45.04, "elapsed_time": "0:28:14", "remaining_time": "0:34:27", "throughput": 13085.82, "total_tokens": 22168128} +{"current_steps": 7038, "total_steps": 15621, "eval_loss": 0.43633610010147095, "epoch": 0.4505473401190705, "percentage": 45.05, "elapsed_time": "0:29:05", "remaining_time": "0:35:28", "throughput": 12708.94, "total_tokens": 22178432} +{"current_steps": 7040, "total_steps": 15621, "loss": 0.4432, "lr": 1.3401294839259828e-06, "epoch": 0.4506753728954612, "percentage": 45.07, "elapsed_time": "0:32:59", "remaining_time": "0:40:13", "throughput": 11204.92, "total_tokens": 22184512} +{"current_steps": 7045, "total_steps": 15621, "loss": 0.5428, "lr": 1.3390785225852312e-06, "epoch": 0.4509954548364381, "percentage": 45.1, "elapsed_time": "0:33:00", "remaining_time": "0:40:10", "throughput": 11208.94, "total_tokens": 22199872} +{"current_steps": 7050, "total_steps": 15621, "loss": 0.444, "lr": 1.3380271379009631e-06, "epoch": 0.45131553677741504, "percentage": 45.13, "elapsed_time": "0:33:01", "remaining_time": "0:40:08", "throughput": 11213.51, "total_tokens": 22216960} +{"current_steps": 7055, "total_steps": 15621, "loss": 0.2645, "lr": 1.3369753311858442e-06, "epoch": 0.4516356187183919, "percentage": 45.16, "elapsed_time": "0:33:01", "remaining_time": "0:40:06", "throughput": 11217.26, "total_tokens": 22231488} +{"current_steps": 7060, "total_steps": 15621, "loss": 0.4597, "lr": 1.3359231037530682e-06, "epoch": 0.4519557006593688, "percentage": 45.2, "elapsed_time": "0:33:02", "remaining_time": "0:40:04", "throughput": 11221.3, "total_tokens": 22246976} +{"current_steps": 7065, "total_steps": 15621, "loss": 0.4178, "lr": 1.3348704569163527e-06, "epoch": 0.4522757826003457, "percentage": 45.23, "elapsed_time": "0:33:03", "remaining_time": "0:40:01", "throughput": 11225.7, "total_tokens": 22263680} +{"current_steps": 7070, "total_steps": 15621, "loss": 0.3371, "lr": 1.33381739198994e-06, "epoch": 0.45259586454132256, "percentage": 45.26, "elapsed_time": "0:33:03", "remaining_time": "0:39:59", "throughput": 11229.87, "total_tokens": 22279552} +{"current_steps": 7075, "total_steps": 15621, "loss": 0.4463, "lr": 1.3327639102885938e-06, "epoch": 0.4529159464822995, "percentage": 45.29, "elapsed_time": "0:33:04", "remaining_time": "0:39:57", "throughput": 11234.01, "total_tokens": 22295296} +{"current_steps": 7080, "total_steps": 15621, "loss": 0.3979, "lr": 1.3317100131275986e-06, "epoch": 0.45323602842327637, "percentage": 45.32, "elapsed_time": "0:33:05", "remaining_time": "0:39:54", "throughput": 11237.89, "total_tokens": 22310400} +{"current_steps": 7085, "total_steps": 15621, "loss": 0.4852, "lr": 1.3306557018227576e-06, "epoch": 0.45355611036425325, "percentage": 45.36, "elapsed_time": "0:33:05", "remaining_time": "0:39:52", "throughput": 11242.27, "total_tokens": 22326848} +{"current_steps": 7090, "total_steps": 15621, "loss": 0.4673, "lr": 1.3296009776903903e-06, "epoch": 0.45387619230523013, "percentage": 45.39, "elapsed_time": "0:33:06", "remaining_time": "0:39:50", "throughput": 11246.39, "total_tokens": 22342592} +{"current_steps": 7095, "total_steps": 15621, "loss": 0.4693, "lr": 1.3285458420473323e-06, "epoch": 0.454196274246207, "percentage": 45.42, "elapsed_time": "0:33:07", "remaining_time": "0:39:48", "throughput": 11250.7, "total_tokens": 22358912} +{"current_steps": 7100, "total_steps": 15621, "loss": 0.3789, "lr": 1.3274902962109332e-06, "epoch": 0.45451635618718395, "percentage": 45.45, "elapsed_time": "0:33:08", "remaining_time": "0:39:45", "throughput": 11254.78, "total_tokens": 22374528} +{"current_steps": 7105, "total_steps": 15621, "loss": 0.3752, "lr": 1.3264343414990539e-06, "epoch": 0.4548364381281608, "percentage": 45.48, "elapsed_time": "0:33:08", "remaining_time": "0:39:43", "throughput": 11258.75, "total_tokens": 22389824} +{"current_steps": 7110, "total_steps": 15621, "loss": 0.4269, "lr": 1.3253779792300663e-06, "epoch": 0.4551565200691377, "percentage": 45.52, "elapsed_time": "0:33:09", "remaining_time": "0:39:41", "throughput": 11262.77, "total_tokens": 22405376} +{"current_steps": 7115, "total_steps": 15621, "loss": 0.3442, "lr": 1.3243212107228518e-06, "epoch": 0.4554766020101146, "percentage": 45.55, "elapsed_time": "0:33:09", "remaining_time": "0:39:39", "throughput": 11266.54, "total_tokens": 22420032} +{"current_steps": 7120, "total_steps": 15621, "loss": 0.393, "lr": 1.3232640372967974e-06, "epoch": 0.45579668395109146, "percentage": 45.58, "elapsed_time": "0:33:10", "remaining_time": "0:39:36", "throughput": 11270.29, "total_tokens": 22434688} +{"current_steps": 7125, "total_steps": 15621, "loss": 0.4691, "lr": 1.3222064602717974e-06, "epoch": 0.45611676589206834, "percentage": 45.61, "elapsed_time": "0:33:11", "remaining_time": "0:39:34", "throughput": 11274.58, "total_tokens": 22451072} +{"current_steps": 7130, "total_steps": 15621, "loss": 0.3578, "lr": 1.321148480968248e-06, "epoch": 0.4564368478330453, "percentage": 45.64, "elapsed_time": "0:33:11", "remaining_time": "0:39:32", "throughput": 11278.6, "total_tokens": 22466688} +{"current_steps": 7135, "total_steps": 15621, "loss": 0.4627, "lr": 1.3200901007070495e-06, "epoch": 0.45675692977402216, "percentage": 45.68, "elapsed_time": "0:33:12", "remaining_time": "0:39:29", "throughput": 11282.68, "total_tokens": 22482432} +{"current_steps": 7140, "total_steps": 15621, "loss": 0.4653, "lr": 1.3190313208096022e-06, "epoch": 0.45707701171499904, "percentage": 45.71, "elapsed_time": "0:33:13", "remaining_time": "0:39:27", "throughput": 11286.37, "total_tokens": 22496960} +{"current_steps": 7145, "total_steps": 15621, "loss": 0.3506, "lr": 1.3179721425978048e-06, "epoch": 0.4573970936559759, "percentage": 45.74, "elapsed_time": "0:33:13", "remaining_time": "0:39:25", "throughput": 11290.31, "total_tokens": 22512256} +{"current_steps": 7150, "total_steps": 15621, "loss": 0.3801, "lr": 1.3169125673940541e-06, "epoch": 0.4577171755969528, "percentage": 45.77, "elapsed_time": "0:33:14", "remaining_time": "0:39:23", "throughput": 11294.47, "total_tokens": 22528192} +{"current_steps": 7155, "total_steps": 15621, "loss": 0.4222, "lr": 1.3158525965212422e-06, "epoch": 0.45803725753792973, "percentage": 45.8, "elapsed_time": "0:33:15", "remaining_time": "0:39:20", "throughput": 11299.02, "total_tokens": 22545408} +{"current_steps": 7160, "total_steps": 15621, "loss": 0.499, "lr": 1.3147922313027548e-06, "epoch": 0.4583573394789066, "percentage": 45.84, "elapsed_time": "0:33:16", "remaining_time": "0:39:18", "throughput": 11302.98, "total_tokens": 22560832} +{"current_steps": 7165, "total_steps": 15621, "loss": 0.3566, "lr": 1.3137314730624707e-06, "epoch": 0.4586774214198835, "percentage": 45.87, "elapsed_time": "0:33:16", "remaining_time": "0:39:16", "throughput": 11307.46, "total_tokens": 22577728} +{"current_steps": 7170, "total_steps": 15621, "loss": 0.4792, "lr": 1.3126703231247588e-06, "epoch": 0.45899750336086037, "percentage": 45.9, "elapsed_time": "0:33:17", "remaining_time": "0:39:14", "throughput": 11311.74, "total_tokens": 22594112} +{"current_steps": 7175, "total_steps": 15621, "loss": 0.3942, "lr": 1.3116087828144772e-06, "epoch": 0.45931758530183725, "percentage": 45.93, "elapsed_time": "0:33:18", "remaining_time": "0:39:12", "throughput": 11315.73, "total_tokens": 22609728} +{"current_steps": 7180, "total_steps": 15621, "loss": 0.4788, "lr": 1.310546853456972e-06, "epoch": 0.4596376672428142, "percentage": 45.96, "elapsed_time": "0:33:18", "remaining_time": "0:39:09", "throughput": 11319.55, "total_tokens": 22624704} +{"current_steps": 7185, "total_steps": 15621, "loss": 0.3133, "lr": 1.3094845363780737e-06, "epoch": 0.45995774918379106, "percentage": 46.0, "elapsed_time": "0:33:19", "remaining_time": "0:39:07", "throughput": 11323.61, "total_tokens": 22640448} +{"current_steps": 7190, "total_steps": 15621, "loss": 0.2221, "lr": 1.3084218329040976e-06, "epoch": 0.46027783112476794, "percentage": 46.03, "elapsed_time": "0:33:20", "remaining_time": "0:39:05", "throughput": 11327.5, "total_tokens": 22655680} +{"current_steps": 7195, "total_steps": 15621, "loss": 0.3836, "lr": 1.3073587443618425e-06, "epoch": 0.4605979130657448, "percentage": 46.06, "elapsed_time": "0:33:20", "remaining_time": "0:39:03", "throughput": 11331.83, "total_tokens": 22672128} +{"current_steps": 7200, "total_steps": 15621, "loss": 0.528, "lr": 1.3062952720785861e-06, "epoch": 0.4609179950067217, "percentage": 46.09, "elapsed_time": "0:33:21", "remaining_time": "0:39:00", "throughput": 11335.61, "total_tokens": 22687104} +{"current_steps": 7205, "total_steps": 15621, "loss": 0.3679, "lr": 1.305231417382086e-06, "epoch": 0.4612380769476986, "percentage": 46.12, "elapsed_time": "0:33:22", "remaining_time": "0:38:58", "throughput": 11339.7, "total_tokens": 22702976} +{"current_steps": 7210, "total_steps": 15621, "loss": 0.3473, "lr": 1.3041671816005777e-06, "epoch": 0.4615581588886755, "percentage": 46.16, "elapsed_time": "0:33:22", "remaining_time": "0:38:56", "throughput": 11343.66, "total_tokens": 22718464} +{"current_steps": 7215, "total_steps": 15621, "loss": 0.3735, "lr": 1.3031025660627718e-06, "epoch": 0.4618782408296524, "percentage": 46.19, "elapsed_time": "0:33:23", "remaining_time": "0:38:54", "throughput": 11347.84, "total_tokens": 22734656} +{"current_steps": 7220, "total_steps": 15621, "loss": 0.4378, "lr": 1.3020375720978534e-06, "epoch": 0.4621983227706293, "percentage": 46.22, "elapsed_time": "0:33:24", "remaining_time": "0:38:51", "throughput": 11351.77, "total_tokens": 22750016} +{"current_steps": 7225, "total_steps": 15621, "loss": 0.385, "lr": 1.3009722010354799e-06, "epoch": 0.46251840471160616, "percentage": 46.25, "elapsed_time": "0:33:24", "remaining_time": "0:38:49", "throughput": 11355.75, "total_tokens": 22765632} +{"current_steps": 7230, "total_steps": 15621, "loss": 0.4572, "lr": 1.2999064542057794e-06, "epoch": 0.46283848665258304, "percentage": 46.28, "elapsed_time": "0:33:25", "remaining_time": "0:38:47", "throughput": 11359.7, "total_tokens": 22781184} +{"current_steps": 7235, "total_steps": 15621, "loss": 0.4955, "lr": 1.2988403329393495e-06, "epoch": 0.46315856859355997, "percentage": 46.32, "elapsed_time": "0:33:26", "remaining_time": "0:38:45", "throughput": 11363.79, "total_tokens": 22797248} +{"current_steps": 7240, "total_steps": 15621, "loss": 0.4186, "lr": 1.2977738385672557e-06, "epoch": 0.46347865053453685, "percentage": 46.35, "elapsed_time": "0:33:26", "remaining_time": "0:38:43", "throughput": 11367.72, "total_tokens": 22812800} +{"current_steps": 7245, "total_steps": 15621, "loss": 0.4086, "lr": 1.2967069724210278e-06, "epoch": 0.46379873247551373, "percentage": 46.38, "elapsed_time": "0:33:27", "remaining_time": "0:38:40", "throughput": 11371.31, "total_tokens": 22827200} +{"current_steps": 7250, "total_steps": 15621, "loss": 0.5472, "lr": 1.2956397358326609e-06, "epoch": 0.4641188144164906, "percentage": 46.41, "elapsed_time": "0:33:28", "remaining_time": "0:38:38", "throughput": 11375.4, "total_tokens": 22843264} +{"current_steps": 7255, "total_steps": 15621, "loss": 0.3845, "lr": 1.294572130134613e-06, "epoch": 0.4644388963574675, "percentage": 46.44, "elapsed_time": "0:33:28", "remaining_time": "0:38:36", "throughput": 11379.29, "total_tokens": 22858624} +{"current_steps": 7260, "total_steps": 15621, "loss": 0.5608, "lr": 1.2935041566598016e-06, "epoch": 0.4647589782984444, "percentage": 46.48, "elapsed_time": "0:33:29", "remaining_time": "0:38:34", "throughput": 11383.17, "total_tokens": 22873856} +{"current_steps": 7265, "total_steps": 15621, "loss": 0.3669, "lr": 1.2924358167416049e-06, "epoch": 0.4650790602394213, "percentage": 46.51, "elapsed_time": "0:33:30", "remaining_time": "0:38:31", "throughput": 11387.16, "total_tokens": 22889600} +{"current_steps": 7270, "total_steps": 15621, "loss": 0.4085, "lr": 1.2913671117138572e-06, "epoch": 0.4653991421803982, "percentage": 46.54, "elapsed_time": "0:33:30", "remaining_time": "0:38:29", "throughput": 11390.95, "total_tokens": 22904704} +{"current_steps": 7275, "total_steps": 15621, "loss": 0.3516, "lr": 1.29029804291085e-06, "epoch": 0.46571922412137506, "percentage": 46.57, "elapsed_time": "0:33:31", "remaining_time": "0:38:27", "throughput": 11394.89, "total_tokens": 22920384} +{"current_steps": 7280, "total_steps": 15621, "loss": 0.3724, "lr": 1.2892286116673269e-06, "epoch": 0.46603930606235194, "percentage": 46.6, "elapsed_time": "0:33:32", "remaining_time": "0:38:25", "throughput": 11399.14, "total_tokens": 22937024} +{"current_steps": 7285, "total_steps": 15621, "loss": 0.501, "lr": 1.2881588193184865e-06, "epoch": 0.4663593880033289, "percentage": 46.64, "elapsed_time": "0:33:32", "remaining_time": "0:38:23", "throughput": 11403.79, "total_tokens": 22954816} +{"current_steps": 7290, "total_steps": 15621, "loss": 0.2811, "lr": 1.287088667199977e-06, "epoch": 0.46667946994430576, "percentage": 46.67, "elapsed_time": "0:33:33", "remaining_time": "0:38:21", "throughput": 11407.44, "total_tokens": 22969472} +{"current_steps": 7295, "total_steps": 15621, "loss": 0.4666, "lr": 1.2860181566478956e-06, "epoch": 0.46699955188528264, "percentage": 46.7, "elapsed_time": "0:33:34", "remaining_time": "0:38:18", "throughput": 11411.09, "total_tokens": 22984192} +{"current_steps": 7300, "total_steps": 15621, "loss": 0.3772, "lr": 1.2849472889987874e-06, "epoch": 0.4673196338262595, "percentage": 46.73, "elapsed_time": "0:33:34", "remaining_time": "0:38:16", "throughput": 11414.99, "total_tokens": 22999680} +{"current_steps": 7305, "total_steps": 15621, "loss": 0.3756, "lr": 1.2838760655896431e-06, "epoch": 0.4676397157672364, "percentage": 46.76, "elapsed_time": "0:33:35", "remaining_time": "0:38:14", "throughput": 11418.71, "total_tokens": 23014720} +{"current_steps": 7310, "total_steps": 15621, "loss": 0.4629, "lr": 1.2828044877578983e-06, "epoch": 0.4679597977082133, "percentage": 46.8, "elapsed_time": "0:33:36", "remaining_time": "0:38:12", "throughput": 11422.74, "total_tokens": 23030528} +{"current_steps": 7315, "total_steps": 15621, "loss": 0.5176, "lr": 1.2817325568414297e-06, "epoch": 0.4682798796491902, "percentage": 46.83, "elapsed_time": "0:33:36", "remaining_time": "0:38:10", "throughput": 11426.88, "total_tokens": 23046784} +{"current_steps": 7320, "total_steps": 15621, "loss": 0.3307, "lr": 1.2806602741785562e-06, "epoch": 0.4685999615901671, "percentage": 46.86, "elapsed_time": "0:33:37", "remaining_time": "0:38:07", "throughput": 11430.55, "total_tokens": 23061632} +{"current_steps": 7325, "total_steps": 15621, "loss": 0.3325, "lr": 1.2795876411080346e-06, "epoch": 0.46892004353114397, "percentage": 46.89, "elapsed_time": "0:33:38", "remaining_time": "0:38:05", "throughput": 11434.66, "total_tokens": 23077888} +{"current_steps": 7330, "total_steps": 15621, "loss": 0.3222, "lr": 1.278514658969061e-06, "epoch": 0.46924012547212085, "percentage": 46.92, "elapsed_time": "0:33:38", "remaining_time": "0:38:03", "throughput": 11438.56, "total_tokens": 23093568} +{"current_steps": 7335, "total_steps": 15621, "loss": 0.5175, "lr": 1.2774413291012648e-06, "epoch": 0.46956020741309773, "percentage": 46.96, "elapsed_time": "0:33:39", "remaining_time": "0:38:01", "throughput": 11442.39, "total_tokens": 23108992} +{"current_steps": 7340, "total_steps": 15621, "loss": 0.4328, "lr": 1.2763676528447122e-06, "epoch": 0.46988028935407467, "percentage": 46.99, "elapsed_time": "0:33:40", "remaining_time": "0:37:59", "throughput": 11446.42, "total_tokens": 23124992} +{"current_steps": 7345, "total_steps": 15621, "loss": 0.3446, "lr": 1.2752936315399003e-06, "epoch": 0.47020037129505154, "percentage": 47.02, "elapsed_time": "0:33:40", "remaining_time": "0:37:57", "throughput": 11450.74, "total_tokens": 23141888} +{"current_steps": 7350, "total_steps": 15621, "loss": 0.343, "lr": 1.2742192665277566e-06, "epoch": 0.4705204532360284, "percentage": 47.05, "elapsed_time": "0:33:41", "remaining_time": "0:37:55", "throughput": 11454.76, "total_tokens": 23157888} +{"current_steps": 7355, "total_steps": 15621, "loss": 0.2838, "lr": 1.2731445591496393e-06, "epoch": 0.4708405351770053, "percentage": 47.08, "elapsed_time": "0:33:42", "remaining_time": "0:37:52", "throughput": 11458.45, "total_tokens": 23172864} +{"current_steps": 7360, "total_steps": 15621, "loss": 0.456, "lr": 1.2720695107473325e-06, "epoch": 0.4711606171179822, "percentage": 47.12, "elapsed_time": "0:33:43", "remaining_time": "0:37:50", "throughput": 11462.35, "total_tokens": 23188352} +{"current_steps": 7365, "total_steps": 15621, "loss": 0.3861, "lr": 1.2709941226630475e-06, "epoch": 0.4714806990589591, "percentage": 47.15, "elapsed_time": "0:33:43", "remaining_time": "0:37:48", "throughput": 11466.23, "total_tokens": 23204096} +{"current_steps": 7370, "total_steps": 15621, "loss": 0.3526, "lr": 1.2699183962394182e-06, "epoch": 0.471800780999936, "percentage": 47.18, "elapsed_time": "0:33:44", "remaining_time": "0:37:46", "throughput": 11469.91, "total_tokens": 23219072} +{"current_steps": 7375, "total_steps": 15621, "loss": 0.4323, "lr": 1.2688423328195021e-06, "epoch": 0.4721208629409129, "percentage": 47.21, "elapsed_time": "0:33:45", "remaining_time": "0:37:44", "throughput": 11473.81, "total_tokens": 23234560} +{"current_steps": 7380, "total_steps": 15621, "loss": 0.3497, "lr": 1.267765933746777e-06, "epoch": 0.47244094488188976, "percentage": 47.24, "elapsed_time": "0:33:45", "remaining_time": "0:37:42", "throughput": 11477.78, "total_tokens": 23250304} +{"current_steps": 7385, "total_steps": 15621, "loss": 0.6383, "lr": 1.2666892003651397e-06, "epoch": 0.47276102682286664, "percentage": 47.28, "elapsed_time": "0:33:46", "remaining_time": "0:37:39", "throughput": 11481.58, "total_tokens": 23265664} +{"current_steps": 7390, "total_steps": 15621, "loss": 0.453, "lr": 1.2656121340189043e-06, "epoch": 0.4730811087638435, "percentage": 47.31, "elapsed_time": "0:33:47", "remaining_time": "0:37:37", "throughput": 11485.56, "total_tokens": 23281472} +{"current_steps": 7395, "total_steps": 15621, "loss": 0.4142, "lr": 1.264534736052801e-06, "epoch": 0.47340119070482045, "percentage": 47.34, "elapsed_time": "0:33:47", "remaining_time": "0:37:35", "throughput": 11489.43, "total_tokens": 23297024} +{"current_steps": 7400, "total_steps": 15621, "loss": 0.4348, "lr": 1.2634570078119739e-06, "epoch": 0.47372127264579733, "percentage": 47.37, "elapsed_time": "0:33:48", "remaining_time": "0:37:33", "throughput": 11493.52, "total_tokens": 23313344} +{"current_steps": 7405, "total_steps": 15621, "loss": 0.535, "lr": 1.262378950641979e-06, "epoch": 0.4740413545867742, "percentage": 47.4, "elapsed_time": "0:33:49", "remaining_time": "0:37:31", "throughput": 11497.26, "total_tokens": 23328512} +{"current_steps": 7410, "total_steps": 15621, "loss": 0.444, "lr": 1.2613005658887836e-06, "epoch": 0.4743614365277511, "percentage": 47.44, "elapsed_time": "0:33:49", "remaining_time": "0:37:29", "throughput": 11500.57, "total_tokens": 23342400} +{"current_steps": 7415, "total_steps": 15621, "loss": 0.4198, "lr": 1.2602218548987637e-06, "epoch": 0.47468151846872797, "percentage": 47.47, "elapsed_time": "0:33:50", "remaining_time": "0:37:26", "throughput": 11504.59, "total_tokens": 23358400} +{"current_steps": 7420, "total_steps": 15621, "loss": 0.4155, "lr": 1.2591428190187029e-06, "epoch": 0.4750016004097049, "percentage": 47.5, "elapsed_time": "0:33:51", "remaining_time": "0:37:24", "throughput": 11508.28, "total_tokens": 23373376} +{"current_steps": 7425, "total_steps": 15621, "loss": 0.5093, "lr": 1.2580634595957898e-06, "epoch": 0.4753216823506818, "percentage": 47.53, "elapsed_time": "0:33:51", "remaining_time": "0:37:22", "throughput": 11512.65, "total_tokens": 23390400} +{"current_steps": 7430, "total_steps": 15621, "loss": 0.3871, "lr": 1.2569837779776172e-06, "epoch": 0.47564176429165866, "percentage": 47.56, "elapsed_time": "0:33:52", "remaining_time": "0:37:20", "throughput": 11516.65, "total_tokens": 23406400} +{"current_steps": 7435, "total_steps": 15621, "loss": 0.3134, "lr": 1.2559037755121804e-06, "epoch": 0.47596184623263554, "percentage": 47.6, "elapsed_time": "0:33:53", "remaining_time": "0:37:18", "throughput": 11520.47, "total_tokens": 23421824} +{"current_steps": 7440, "total_steps": 15621, "loss": 0.4599, "lr": 1.2548234535478754e-06, "epoch": 0.4762819281736124, "percentage": 47.63, "elapsed_time": "0:33:53", "remaining_time": "0:37:16", "throughput": 11524.66, "total_tokens": 23438272} +{"current_steps": 7445, "total_steps": 15621, "loss": 0.4267, "lr": 1.2537428134334968e-06, "epoch": 0.47660201011458936, "percentage": 47.66, "elapsed_time": "0:33:54", "remaining_time": "0:37:14", "throughput": 11528.87, "total_tokens": 23454976} +{"current_steps": 7450, "total_steps": 15621, "loss": 0.5302, "lr": 1.252661856518236e-06, "epoch": 0.47692209205556624, "percentage": 47.69, "elapsed_time": "0:33:55", "remaining_time": "0:37:12", "throughput": 11532.98, "total_tokens": 23471168} +{"current_steps": 7455, "total_steps": 15621, "loss": 0.3683, "lr": 1.251580584151681e-06, "epoch": 0.4772421739965431, "percentage": 47.72, "elapsed_time": "0:33:55", "remaining_time": "0:37:09", "throughput": 11536.85, "total_tokens": 23486720} +{"current_steps": 7460, "total_steps": 15621, "loss": 0.309, "lr": 1.2504989976838129e-06, "epoch": 0.47756225593752, "percentage": 47.76, "elapsed_time": "0:33:56", "remaining_time": "0:37:07", "throughput": 11540.9, "total_tokens": 23502912} +{"current_steps": 7465, "total_steps": 15621, "loss": 0.3629, "lr": 1.2494170984650048e-06, "epoch": 0.4778823378784969, "percentage": 47.79, "elapsed_time": "0:33:57", "remaining_time": "0:37:05", "throughput": 11545.15, "total_tokens": 23519552} +{"current_steps": 7470, "total_steps": 15621, "loss": 0.4253, "lr": 1.248334887846021e-06, "epoch": 0.4782024198194738, "percentage": 47.82, "elapsed_time": "0:33:57", "remaining_time": "0:37:03", "throughput": 11549.25, "total_tokens": 23535936} +{"current_steps": 7475, "total_steps": 15621, "loss": 0.4411, "lr": 1.2472523671780135e-06, "epoch": 0.4785225017604507, "percentage": 47.85, "elapsed_time": "0:33:58", "remaining_time": "0:37:01", "throughput": 11552.94, "total_tokens": 23551040} +{"current_steps": 7480, "total_steps": 15621, "loss": 0.309, "lr": 1.2461695378125233e-06, "epoch": 0.47884258370142757, "percentage": 47.88, "elapsed_time": "0:33:59", "remaining_time": "0:36:59", "throughput": 11556.62, "total_tokens": 23566208} +{"current_steps": 7485, "total_steps": 15621, "loss": 0.4347, "lr": 1.245086401101474e-06, "epoch": 0.47916266564240445, "percentage": 47.92, "elapsed_time": "0:33:59", "remaining_time": "0:36:57", "throughput": 11560.45, "total_tokens": 23581696} +{"current_steps": 7490, "total_steps": 15621, "loss": 0.4439, "lr": 1.2440029583971757e-06, "epoch": 0.47948274758338133, "percentage": 47.95, "elapsed_time": "0:34:00", "remaining_time": "0:36:55", "throughput": 11564.28, "total_tokens": 23597248} +{"current_steps": 7495, "total_steps": 15621, "loss": 0.502, "lr": 1.2429192110523188e-06, "epoch": 0.4798028295243582, "percentage": 47.98, "elapsed_time": "0:34:01", "remaining_time": "0:36:53", "throughput": 11568.08, "total_tokens": 23612800} +{"current_steps": 7500, "total_steps": 15621, "loss": 0.3388, "lr": 1.2418351604199746e-06, "epoch": 0.48012291146533514, "percentage": 48.01, "elapsed_time": "0:34:01", "remaining_time": "0:36:50", "throughput": 11572.12, "total_tokens": 23629056} +{"current_steps": 7505, "total_steps": 15621, "loss": 0.4502, "lr": 1.2407508078535934e-06, "epoch": 0.480442993406312, "percentage": 48.04, "elapsed_time": "0:34:02", "remaining_time": "0:36:48", "throughput": 11575.86, "total_tokens": 23644352} +{"current_steps": 7510, "total_steps": 15621, "loss": 0.2899, "lr": 1.2396661547070017e-06, "epoch": 0.4807630753472889, "percentage": 48.08, "elapsed_time": "0:34:03", "remaining_time": "0:36:46", "throughput": 11580.07, "total_tokens": 23661120} +{"current_steps": 7515, "total_steps": 15621, "loss": 0.3362, "lr": 1.238581202334402e-06, "epoch": 0.4810831572882658, "percentage": 48.11, "elapsed_time": "0:34:03", "remaining_time": "0:36:44", "throughput": 11584.19, "total_tokens": 23677632} +{"current_steps": 7520, "total_steps": 15621, "loss": 0.3676, "lr": 1.2374959520903699e-06, "epoch": 0.48140323922924266, "percentage": 48.14, "elapsed_time": "0:34:04", "remaining_time": "0:36:42", "throughput": 11588.28, "total_tokens": 23693952} +{"current_steps": 7525, "total_steps": 15621, "loss": 0.3442, "lr": 1.2364104053298531e-06, "epoch": 0.4817233211702196, "percentage": 48.17, "elapsed_time": "0:34:05", "remaining_time": "0:36:40", "throughput": 11591.88, "total_tokens": 23708736} +{"current_steps": 7530, "total_steps": 15621, "loss": 0.392, "lr": 1.2353245634081692e-06, "epoch": 0.4820434031111965, "percentage": 48.2, "elapsed_time": "0:34:05", "remaining_time": "0:36:38", "throughput": 11595.89, "total_tokens": 23724864} +{"current_steps": 7535, "total_steps": 15621, "loss": 0.4165, "lr": 1.2342384276810053e-06, "epoch": 0.48236348505217336, "percentage": 48.24, "elapsed_time": "0:34:06", "remaining_time": "0:36:36", "throughput": 11599.62, "total_tokens": 23740160} +{"current_steps": 7540, "total_steps": 15621, "loss": 0.435, "lr": 1.233151999504414e-06, "epoch": 0.48268356699315024, "percentage": 48.27, "elapsed_time": "0:34:07", "remaining_time": "0:36:34", "throughput": 11603.29, "total_tokens": 23755264} +{"current_steps": 7545, "total_steps": 15621, "loss": 0.3445, "lr": 1.232065280234814e-06, "epoch": 0.4830036489341271, "percentage": 48.3, "elapsed_time": "0:34:07", "remaining_time": "0:36:32", "throughput": 11606.82, "total_tokens": 23770112} +{"current_steps": 7550, "total_steps": 15621, "loss": 0.4075, "lr": 1.2309782712289867e-06, "epoch": 0.48332373087510405, "percentage": 48.33, "elapsed_time": "0:34:08", "remaining_time": "0:36:29", "throughput": 11610.63, "total_tokens": 23785536} +{"current_steps": 7555, "total_steps": 15621, "loss": 0.4257, "lr": 1.2298909738440758e-06, "epoch": 0.48364381281608093, "percentage": 48.36, "elapsed_time": "0:34:09", "remaining_time": "0:36:27", "throughput": 11614.52, "total_tokens": 23801280} +{"current_steps": 7560, "total_steps": 15621, "loss": 0.3893, "lr": 1.2288033894375847e-06, "epoch": 0.4839638947570578, "percentage": 48.4, "elapsed_time": "0:34:09", "remaining_time": "0:36:25", "throughput": 11618.19, "total_tokens": 23816448} +{"current_steps": 7565, "total_steps": 15621, "loss": 0.541, "lr": 1.2277155193673755e-06, "epoch": 0.4842839766980347, "percentage": 48.43, "elapsed_time": "0:34:10", "remaining_time": "0:36:23", "throughput": 11622.15, "total_tokens": 23832512} +{"current_steps": 7570, "total_steps": 15621, "loss": 0.3945, "lr": 1.2266273649916668e-06, "epoch": 0.48460405863901157, "percentage": 48.46, "elapsed_time": "0:34:11", "remaining_time": "0:36:21", "throughput": 11626.03, "total_tokens": 23848192} +{"current_steps": 7575, "total_steps": 15621, "loss": 0.4394, "lr": 1.2255389276690318e-06, "epoch": 0.48492414057998845, "percentage": 48.49, "elapsed_time": "0:34:11", "remaining_time": "0:36:19", "throughput": 11629.85, "total_tokens": 23863808} +{"current_steps": 7580, "total_steps": 15621, "loss": 0.3096, "lr": 1.2244502087583978e-06, "epoch": 0.4852442225209654, "percentage": 48.52, "elapsed_time": "0:34:12", "remaining_time": "0:36:17", "throughput": 11634.13, "total_tokens": 23880960} +{"current_steps": 7585, "total_steps": 15621, "loss": 0.3963, "lr": 1.2233612096190426e-06, "epoch": 0.48556430446194226, "percentage": 48.56, "elapsed_time": "0:34:13", "remaining_time": "0:36:15", "throughput": 11637.83, "total_tokens": 23896256} +{"current_steps": 7590, "total_steps": 15621, "loss": 0.5109, "lr": 1.222271931610595e-06, "epoch": 0.48588438640291914, "percentage": 48.59, "elapsed_time": "0:34:14", "remaining_time": "0:36:13", "throughput": 11641.95, "total_tokens": 23912832} +{"current_steps": 7595, "total_steps": 15621, "loss": 0.4938, "lr": 1.2211823760930306e-06, "epoch": 0.486204468343896, "percentage": 48.62, "elapsed_time": "0:34:14", "remaining_time": "0:36:11", "throughput": 11645.87, "total_tokens": 23928768} +{"current_steps": 7600, "total_steps": 15621, "loss": 0.4297, "lr": 1.2200925444266726e-06, "epoch": 0.4865245502848729, "percentage": 48.65, "elapsed_time": "0:34:15", "remaining_time": "0:36:09", "throughput": 11649.9, "total_tokens": 23945088} +{"current_steps": 7605, "total_steps": 15621, "loss": 0.5101, "lr": 1.219002437972189e-06, "epoch": 0.48684463222584984, "percentage": 48.68, "elapsed_time": "0:34:16", "remaining_time": "0:36:07", "throughput": 11653.53, "total_tokens": 23960192} +{"current_steps": 7610, "total_steps": 15621, "loss": 0.4324, "lr": 1.21791205809059e-06, "epoch": 0.4871647141668267, "percentage": 48.72, "elapsed_time": "0:34:16", "remaining_time": "0:36:05", "throughput": 11657.73, "total_tokens": 23977152} +{"current_steps": 7615, "total_steps": 15621, "loss": 0.3628, "lr": 1.2168214061432283e-06, "epoch": 0.4874847961078036, "percentage": 48.75, "elapsed_time": "0:34:17", "remaining_time": "0:36:03", "throughput": 11661.38, "total_tokens": 23992448} +{"current_steps": 7620, "total_steps": 15621, "loss": 0.4397, "lr": 1.2157304834917947e-06, "epoch": 0.4878048780487805, "percentage": 48.78, "elapsed_time": "0:34:18", "remaining_time": "0:36:01", "throughput": 11665.25, "total_tokens": 24008384} +{"current_steps": 7625, "total_steps": 15621, "loss": 0.6103, "lr": 1.2146392914983202e-06, "epoch": 0.48812495998975736, "percentage": 48.81, "elapsed_time": "0:34:18", "remaining_time": "0:35:59", "throughput": 11669.56, "total_tokens": 24025728} +{"current_steps": 7630, "total_steps": 15621, "loss": 0.51, "lr": 1.2135478315251694e-06, "epoch": 0.4884450419307343, "percentage": 48.84, "elapsed_time": "0:34:19", "remaining_time": "0:35:56", "throughput": 11673.06, "total_tokens": 24040448} +{"current_steps": 7635, "total_steps": 15621, "loss": 0.36, "lr": 1.2124561049350442e-06, "epoch": 0.48876512387171117, "percentage": 48.88, "elapsed_time": "0:34:20", "remaining_time": "0:35:54", "throughput": 11676.55, "total_tokens": 24055168} +{"current_steps": 7640, "total_steps": 15621, "loss": 0.4474, "lr": 1.2113641130909772e-06, "epoch": 0.48908520581268805, "percentage": 48.91, "elapsed_time": "0:34:20", "remaining_time": "0:35:52", "throughput": 11680.07, "total_tokens": 24070016} +{"current_steps": 7645, "total_steps": 15621, "loss": 0.3074, "lr": 1.2102718573563334e-06, "epoch": 0.48940528775366493, "percentage": 48.94, "elapsed_time": "0:34:21", "remaining_time": "0:35:50", "throughput": 11683.58, "total_tokens": 24084800} +{"current_steps": 7650, "total_steps": 15621, "loss": 0.4884, "lr": 1.2091793390948066e-06, "epoch": 0.4897253696946418, "percentage": 48.97, "elapsed_time": "0:34:22", "remaining_time": "0:35:48", "throughput": 11687.36, "total_tokens": 24100416} +{"current_steps": 7655, "total_steps": 15621, "loss": 0.2873, "lr": 1.2080865596704191e-06, "epoch": 0.49004545163561875, "percentage": 49.0, "elapsed_time": "0:34:22", "remaining_time": "0:35:46", "throughput": 11691.46, "total_tokens": 24117120} +{"current_steps": 7660, "total_steps": 15621, "loss": 0.4317, "lr": 1.2069935204475187e-06, "epoch": 0.4903655335765956, "percentage": 49.04, "elapsed_time": "0:34:23", "remaining_time": "0:35:44", "throughput": 11695.06, "total_tokens": 24132224} +{"current_steps": 7665, "total_steps": 15621, "loss": 0.4037, "lr": 1.2059002227907776e-06, "epoch": 0.4906856155175725, "percentage": 49.07, "elapsed_time": "0:34:24", "remaining_time": "0:35:42", "throughput": 11698.76, "total_tokens": 24147712} +{"current_steps": 7670, "total_steps": 15621, "loss": 0.408, "lr": 1.2048066680651908e-06, "epoch": 0.4910056974585494, "percentage": 49.1, "elapsed_time": "0:34:24", "remaining_time": "0:35:40", "throughput": 11702.83, "total_tokens": 24164288} +{"current_steps": 7675, "total_steps": 15621, "loss": 0.5751, "lr": 1.2037128576360743e-06, "epoch": 0.49132577939952626, "percentage": 49.13, "elapsed_time": "0:34:26", "remaining_time": "0:35:38", "throughput": 11710.41, "total_tokens": 24193728} +{"current_steps": 7680, "total_steps": 15621, "loss": 0.406, "lr": 1.2026187928690627e-06, "epoch": 0.49164586134050314, "percentage": 49.16, "elapsed_time": "0:34:26", "remaining_time": "0:35:36", "throughput": 11714.03, "total_tokens": 24208832} +{"current_steps": 7685, "total_steps": 15621, "loss": 0.5004, "lr": 1.2015244751301098e-06, "epoch": 0.4919659432814801, "percentage": 49.2, "elapsed_time": "0:34:27", "remaining_time": "0:35:34", "throughput": 11717.48, "total_tokens": 24223424} +{"current_steps": 7690, "total_steps": 15621, "loss": 0.444, "lr": 1.2004299057854832e-06, "epoch": 0.49228602522245696, "percentage": 49.23, "elapsed_time": "0:34:27", "remaining_time": "0:35:32", "throughput": 11721.21, "total_tokens": 24238976} +{"current_steps": 7695, "total_steps": 15621, "loss": 0.3837, "lr": 1.1993350862017661e-06, "epoch": 0.49260610716343384, "percentage": 49.26, "elapsed_time": "0:34:28", "remaining_time": "0:35:30", "throughput": 11724.67, "total_tokens": 24253632} +{"current_steps": 7700, "total_steps": 15621, "loss": 0.4074, "lr": 1.1982400177458534e-06, "epoch": 0.4929261891044107, "percentage": 49.29, "elapsed_time": "0:34:29", "remaining_time": "0:35:28", "throughput": 11728.94, "total_tokens": 24270720} +{"current_steps": 7705, "total_steps": 15621, "loss": 0.4385, "lr": 1.197144701784951e-06, "epoch": 0.4932462710453876, "percentage": 49.32, "elapsed_time": "0:34:29", "remaining_time": "0:35:26", "throughput": 11732.34, "total_tokens": 24285312} +{"current_steps": 7710, "total_steps": 15621, "loss": 0.409, "lr": 1.1960491396865735e-06, "epoch": 0.49356635298636453, "percentage": 49.36, "elapsed_time": "0:34:30", "remaining_time": "0:35:24", "throughput": 11735.9, "total_tokens": 24300352} +{"current_steps": 7715, "total_steps": 15621, "loss": 0.3518, "lr": 1.1949533328185435e-06, "epoch": 0.4938864349273414, "percentage": 49.39, "elapsed_time": "0:34:31", "remaining_time": "0:35:22", "throughput": 11739.99, "total_tokens": 24317056} +{"current_steps": 7720, "total_steps": 15621, "loss": 0.3705, "lr": 1.1938572825489883e-06, "epoch": 0.4942065168683183, "percentage": 49.42, "elapsed_time": "0:34:31", "remaining_time": "0:35:20", "throughput": 11743.91, "total_tokens": 24333184} +{"current_steps": 7725, "total_steps": 15621, "loss": 0.4313, "lr": 1.1927609902463394e-06, "epoch": 0.49452659880929517, "percentage": 49.45, "elapsed_time": "0:34:32", "remaining_time": "0:35:18", "throughput": 11747.57, "total_tokens": 24348672} +{"current_steps": 7730, "total_steps": 15621, "loss": 0.4342, "lr": 1.1916644572793314e-06, "epoch": 0.49484668075027205, "percentage": 49.48, "elapsed_time": "0:34:33", "remaining_time": "0:35:16", "throughput": 11751.08, "total_tokens": 24363648} +{"current_steps": 7735, "total_steps": 15621, "loss": 0.4951, "lr": 1.190567685016998e-06, "epoch": 0.495166762691249, "percentage": 49.52, "elapsed_time": "0:34:34", "remaining_time": "0:35:14", "throughput": 11755.37, "total_tokens": 24380992} +{"current_steps": 7740, "total_steps": 15621, "loss": 0.4152, "lr": 1.189470674828672e-06, "epoch": 0.49548684463222586, "percentage": 49.55, "elapsed_time": "0:34:34", "remaining_time": "0:35:12", "throughput": 11758.83, "total_tokens": 24395776} +{"current_steps": 7745, "total_steps": 15621, "loss": 0.3851, "lr": 1.188373428083984e-06, "epoch": 0.49580692657320274, "percentage": 49.58, "elapsed_time": "0:34:35", "remaining_time": "0:35:10", "throughput": 11762.63, "total_tokens": 24411584} +{"current_steps": 7750, "total_steps": 15621, "loss": 0.5355, "lr": 1.1872759461528596e-06, "epoch": 0.4961270085141796, "percentage": 49.61, "elapsed_time": "0:34:35", "remaining_time": "0:35:08", "throughput": 11766.17, "total_tokens": 24426560} +{"current_steps": 7755, "total_steps": 15621, "loss": 0.4046, "lr": 1.1861782304055174e-06, "epoch": 0.4964470904551565, "percentage": 49.64, "elapsed_time": "0:34:36", "remaining_time": "0:35:06", "throughput": 11769.81, "total_tokens": 24441856} +{"current_steps": 7760, "total_steps": 15621, "loss": 0.3269, "lr": 1.1850802822124686e-06, "epoch": 0.4967671723961334, "percentage": 49.68, "elapsed_time": "0:34:37", "remaining_time": "0:35:04", "throughput": 11773.54, "total_tokens": 24457472} +{"current_steps": 7765, "total_steps": 15621, "loss": 0.5104, "lr": 1.1839821029445143e-06, "epoch": 0.4970872543371103, "percentage": 49.71, "elapsed_time": "0:34:37", "remaining_time": "0:35:02", "throughput": 11776.93, "total_tokens": 24471936} +{"current_steps": 7770, "total_steps": 15621, "loss": 0.3332, "lr": 1.1828836939727442e-06, "epoch": 0.4974073362780872, "percentage": 49.74, "elapsed_time": "0:34:38", "remaining_time": "0:35:00", "throughput": 11780.68, "total_tokens": 24487616} +{"current_steps": 7775, "total_steps": 15621, "loss": 0.4292, "lr": 1.181785056668535e-06, "epoch": 0.4977274182190641, "percentage": 49.77, "elapsed_time": "0:34:39", "remaining_time": "0:34:58", "throughput": 11784.58, "total_tokens": 24503936} +{"current_steps": 7780, "total_steps": 15621, "loss": 0.429, "lr": 1.180686192403548e-06, "epoch": 0.49804750016004096, "percentage": 49.8, "elapsed_time": "0:34:39", "remaining_time": "0:34:56", "throughput": 11787.97, "total_tokens": 24518464} +{"current_steps": 7785, "total_steps": 15621, "loss": 0.3479, "lr": 1.1795871025497285e-06, "epoch": 0.49836758210101784, "percentage": 49.84, "elapsed_time": "0:34:40", "remaining_time": "0:34:54", "throughput": 11791.39, "total_tokens": 24533184} +{"current_steps": 7790, "total_steps": 15621, "loss": 0.4288, "lr": 1.1784877884793029e-06, "epoch": 0.49868766404199477, "percentage": 49.87, "elapsed_time": "0:34:41", "remaining_time": "0:34:52", "throughput": 11795.18, "total_tokens": 24548992} +{"current_steps": 7795, "total_steps": 15621, "loss": 0.3681, "lr": 1.1773882515647776e-06, "epoch": 0.49900774598297165, "percentage": 49.9, "elapsed_time": "0:34:42", "remaining_time": "0:34:50", "throughput": 11799.47, "total_tokens": 24566592} +{"current_steps": 7800, "total_steps": 15621, "loss": 0.4776, "lr": 1.1762884931789376e-06, "epoch": 0.49932782792394853, "percentage": 49.93, "elapsed_time": "0:34:42", "remaining_time": "0:34:48", "throughput": 11803.61, "total_tokens": 24583552} +{"current_steps": 7805, "total_steps": 15621, "loss": 0.4538, "lr": 1.1751885146948436e-06, "epoch": 0.4996479098649254, "percentage": 49.96, "elapsed_time": "0:34:43", "remaining_time": "0:34:46", "throughput": 11807.42, "total_tokens": 24599552} +{"current_steps": 7810, "total_steps": 15621, "loss": 0.3799, "lr": 1.1740883174858327e-06, "epoch": 0.4999679918059023, "percentage": 50.0, "elapsed_time": "0:34:44", "remaining_time": "0:34:44", "throughput": 11811.03, "total_tokens": 24614912} +{"current_steps": 7815, "total_steps": 15621, "loss": 0.3643, "lr": 1.1729879029255127e-06, "epoch": 0.5002880737468792, "percentage": 50.03, "elapsed_time": "0:34:44", "remaining_time": "0:34:42", "throughput": 11814.49, "total_tokens": 24629696} +{"current_steps": 7820, "total_steps": 15621, "loss": 0.3997, "lr": 1.171887272387765e-06, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:34:45", "remaining_time": "0:34:40", "throughput": 11818.44, "total_tokens": 24646208} +{"current_steps": 7820, "total_steps": 15621, "eval_loss": 0.4178144633769989, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:35:36", "remaining_time": "0:35:30", "throughput": 11537.51, "total_tokens": 24646208} +{"current_steps": 7825, "total_steps": 15621, "loss": 0.4907, "lr": 1.1707864272467397e-06, "epoch": 0.500928237628833, "percentage": 50.09, "elapsed_time": "0:38:45", "remaining_time": "0:38:37", "throughput": 10603.43, "total_tokens": 24661120} +{"current_steps": 7830, "total_steps": 15621, "loss": 0.4269, "lr": 1.169685368876855e-06, "epoch": 0.5012483195698099, "percentage": 50.12, "elapsed_time": "0:38:46", "remaining_time": "0:38:34", "throughput": 10607.59, "total_tokens": 24678336} +{"current_steps": 7835, "total_steps": 15621, "loss": 0.5471, "lr": 1.1685840986527946e-06, "epoch": 0.5015684015107867, "percentage": 50.16, "elapsed_time": "0:38:47", "remaining_time": "0:38:32", "throughput": 10611.34, "total_tokens": 24694336} +{"current_steps": 7840, "total_steps": 15621, "loss": 0.3986, "lr": 1.1674826179495076e-06, "epoch": 0.5018884834517636, "percentage": 50.19, "elapsed_time": "0:38:47", "remaining_time": "0:38:30", "throughput": 10614.61, "total_tokens": 24708608} +{"current_steps": 7845, "total_steps": 15621, "loss": 0.4302, "lr": 1.1663809281422056e-06, "epoch": 0.5022085653927405, "percentage": 50.22, "elapsed_time": "0:38:48", "remaining_time": "0:38:28", "throughput": 10618.37, "total_tokens": 24724672} +{"current_steps": 7850, "total_steps": 15621, "loss": 0.4556, "lr": 1.1652790306063615e-06, "epoch": 0.5025286473337174, "percentage": 50.25, "elapsed_time": "0:38:49", "remaining_time": "0:38:25", "throughput": 10622.07, "total_tokens": 24740608} +{"current_steps": 7855, "total_steps": 15621, "loss": 0.4065, "lr": 1.164176926717707e-06, "epoch": 0.5028487292746944, "percentage": 50.28, "elapsed_time": "0:38:49", "remaining_time": "0:38:23", "throughput": 10626.38, "total_tokens": 24758528} +{"current_steps": 7860, "total_steps": 15621, "loss": 0.3737, "lr": 1.1630746178522315e-06, "epoch": 0.5031688112156713, "percentage": 50.32, "elapsed_time": "0:38:50", "remaining_time": "0:38:21", "throughput": 10629.7, "total_tokens": 24772992} +{"current_steps": 7865, "total_steps": 15621, "loss": 0.4417, "lr": 1.1619721053861816e-06, "epoch": 0.5034888931566481, "percentage": 50.35, "elapsed_time": "0:38:51", "remaining_time": "0:38:18", "throughput": 10633.19, "total_tokens": 24788160} +{"current_steps": 7870, "total_steps": 15621, "loss": 0.4104, "lr": 1.1608693906960558e-06, "epoch": 0.503808975097625, "percentage": 50.38, "elapsed_time": "0:38:51", "remaining_time": "0:38:16", "throughput": 10636.96, "total_tokens": 24804224} +{"current_steps": 7875, "total_steps": 15621, "loss": 0.4523, "lr": 1.1597664751586069e-06, "epoch": 0.5041290570386019, "percentage": 50.41, "elapsed_time": "0:38:52", "remaining_time": "0:38:14", "throughput": 10640.91, "total_tokens": 24820928} +{"current_steps": 7880, "total_steps": 15621, "loss": 0.3953, "lr": 1.1586633601508382e-06, "epoch": 0.5044491389795788, "percentage": 50.44, "elapsed_time": "0:38:53", "remaining_time": "0:38:12", "throughput": 10644.31, "total_tokens": 24835776} +{"current_steps": 7885, "total_steps": 15621, "loss": 0.3764, "lr": 1.1575600470500014e-06, "epoch": 0.5047692209205557, "percentage": 50.48, "elapsed_time": "0:38:53", "remaining_time": "0:38:09", "throughput": 10648.01, "total_tokens": 24851648} +{"current_steps": 7890, "total_steps": 15621, "loss": 0.4222, "lr": 1.1564565372335957e-06, "epoch": 0.5050893028615325, "percentage": 50.51, "elapsed_time": "0:38:54", "remaining_time": "0:38:07", "throughput": 10651.54, "total_tokens": 24866880} +{"current_steps": 7895, "total_steps": 15621, "loss": 0.3276, "lr": 1.1553528320793663e-06, "epoch": 0.5054093848025094, "percentage": 50.54, "elapsed_time": "0:38:55", "remaining_time": "0:38:05", "throughput": 10654.98, "total_tokens": 24881856} +{"current_steps": 7900, "total_steps": 15621, "loss": 0.4327, "lr": 1.1542489329653022e-06, "epoch": 0.5057294667434863, "percentage": 50.57, "elapsed_time": "0:38:55", "remaining_time": "0:38:03", "throughput": 10658.95, "total_tokens": 24898560} +{"current_steps": 7905, "total_steps": 15621, "loss": 0.3841, "lr": 1.1531448412696343e-06, "epoch": 0.5060495486844632, "percentage": 50.6, "elapsed_time": "0:38:56", "remaining_time": "0:38:00", "throughput": 10662.29, "total_tokens": 24913216} +{"current_steps": 7910, "total_steps": 15621, "loss": 0.5014, "lr": 1.1520405583708337e-06, "epoch": 0.5063696306254402, "percentage": 50.64, "elapsed_time": "0:38:57", "remaining_time": "0:37:58", "throughput": 10665.89, "total_tokens": 24928832} +{"current_steps": 7915, "total_steps": 15621, "loss": 0.4926, "lr": 1.1509360856476109e-06, "epoch": 0.506689712566417, "percentage": 50.67, "elapsed_time": "0:38:57", "remaining_time": "0:37:56", "throughput": 10669.52, "total_tokens": 24944512} +{"current_steps": 7920, "total_steps": 15621, "loss": 0.4731, "lr": 1.149831424478913e-06, "epoch": 0.5070097945073939, "percentage": 50.7, "elapsed_time": "0:38:58", "remaining_time": "0:37:53", "throughput": 10673.01, "total_tokens": 24959744} +{"current_steps": 7925, "total_steps": 15621, "loss": 0.3939, "lr": 1.1487265762439224e-06, "epoch": 0.5073298764483708, "percentage": 50.73, "elapsed_time": "0:38:59", "remaining_time": "0:37:51", "throughput": 10676.65, "total_tokens": 24975488} +{"current_steps": 7930, "total_steps": 15621, "loss": 0.3612, "lr": 1.1476215423220547e-06, "epoch": 0.5076499583893477, "percentage": 50.76, "elapsed_time": "0:38:59", "remaining_time": "0:37:49", "throughput": 10680.01, "total_tokens": 24990272} +{"current_steps": 7935, "total_steps": 15621, "loss": 0.3724, "lr": 1.146516324092959e-06, "epoch": 0.5079700403303246, "percentage": 50.8, "elapsed_time": "0:39:00", "remaining_time": "0:37:47", "throughput": 10683.72, "total_tokens": 25006272} +{"current_steps": 7940, "total_steps": 15621, "loss": 0.2965, "lr": 1.1454109229365117e-06, "epoch": 0.5082901222713014, "percentage": 50.83, "elapsed_time": "0:39:01", "remaining_time": "0:37:44", "throughput": 10687.49, "total_tokens": 25022464} +{"current_steps": 7945, "total_steps": 15621, "loss": 0.3151, "lr": 1.14430534023282e-06, "epoch": 0.5086102042122783, "percentage": 50.86, "elapsed_time": "0:39:01", "remaining_time": "0:37:42", "throughput": 10690.89, "total_tokens": 25037376} +{"current_steps": 7950, "total_steps": 15621, "loss": 0.4737, "lr": 1.1431995773622167e-06, "epoch": 0.5089302861532552, "percentage": 50.89, "elapsed_time": "0:39:02", "remaining_time": "0:37:40", "throughput": 10694.62, "total_tokens": 25053440} +{"current_steps": 7955, "total_steps": 15621, "loss": 0.4343, "lr": 1.1420936357052597e-06, "epoch": 0.5092503680942321, "percentage": 50.93, "elapsed_time": "0:39:03", "remaining_time": "0:37:38", "throughput": 10698.24, "total_tokens": 25069120} +{"current_steps": 7960, "total_steps": 15621, "loss": 0.3024, "lr": 1.1409875166427303e-06, "epoch": 0.5095704500352091, "percentage": 50.96, "elapsed_time": "0:39:03", "remaining_time": "0:37:35", "throughput": 10701.71, "total_tokens": 25084224} +{"current_steps": 7965, "total_steps": 15621, "loss": 0.5023, "lr": 1.1398812215556308e-06, "epoch": 0.509890531976186, "percentage": 50.99, "elapsed_time": "0:39:04", "remaining_time": "0:37:33", "throughput": 10705.23, "total_tokens": 25099520} +{"current_steps": 7970, "total_steps": 15621, "loss": 0.372, "lr": 1.1387747518251837e-06, "epoch": 0.5102106139171628, "percentage": 51.02, "elapsed_time": "0:39:05", "remaining_time": "0:37:31", "throughput": 10708.84, "total_tokens": 25115200} +{"current_steps": 7975, "total_steps": 15621, "loss": 0.3171, "lr": 1.13766810883283e-06, "epoch": 0.5105306958581397, "percentage": 51.05, "elapsed_time": "0:39:05", "remaining_time": "0:37:29", "throughput": 10712.63, "total_tokens": 25131520} +{"current_steps": 7980, "total_steps": 15621, "loss": 0.5088, "lr": 1.1365612939602255e-06, "epoch": 0.5108507777991166, "percentage": 51.09, "elapsed_time": "0:39:06", "remaining_time": "0:37:26", "throughput": 10716.38, "total_tokens": 25147776} +{"current_steps": 7985, "total_steps": 15621, "loss": 0.3884, "lr": 1.1354543085892423e-06, "epoch": 0.5111708597400935, "percentage": 51.12, "elapsed_time": "0:39:07", "remaining_time": "0:37:24", "throughput": 10719.79, "total_tokens": 25162816} +{"current_steps": 7990, "total_steps": 15621, "loss": 0.3417, "lr": 1.1343471541019646e-06, "epoch": 0.5114909416810703, "percentage": 51.15, "elapsed_time": "0:39:08", "remaining_time": "0:37:22", "throughput": 10723.47, "total_tokens": 25178752} +{"current_steps": 7995, "total_steps": 15621, "loss": 0.3672, "lr": 1.1332398318806872e-06, "epoch": 0.5118110236220472, "percentage": 51.18, "elapsed_time": "0:39:08", "remaining_time": "0:37:20", "throughput": 10726.94, "total_tokens": 25194048} +{"current_steps": 8000, "total_steps": 15621, "loss": 0.3787, "lr": 1.1321323433079158e-06, "epoch": 0.5121311055630241, "percentage": 51.21, "elapsed_time": "0:39:09", "remaining_time": "0:37:18", "throughput": 10730.41, "total_tokens": 25209216} +{"current_steps": 8005, "total_steps": 15621, "loss": 0.3897, "lr": 1.1310246897663623e-06, "epoch": 0.512451187504001, "percentage": 51.25, "elapsed_time": "0:39:09", "remaining_time": "0:37:15", "throughput": 10733.93, "total_tokens": 25224640} +{"current_steps": 8010, "total_steps": 15621, "loss": 0.4115, "lr": 1.1299168726389447e-06, "epoch": 0.5127712694449779, "percentage": 51.28, "elapsed_time": "0:39:10", "remaining_time": "0:37:13", "throughput": 10737.35, "total_tokens": 25239808} +{"current_steps": 8015, "total_steps": 15621, "loss": 0.346, "lr": 1.1288088933087868e-06, "epoch": 0.5130913513859549, "percentage": 51.31, "elapsed_time": "0:39:11", "remaining_time": "0:37:11", "throughput": 10741.49, "total_tokens": 25257344} +{"current_steps": 8020, "total_steps": 15621, "loss": 0.3318, "lr": 1.1277007531592127e-06, "epoch": 0.5134114333269317, "percentage": 51.34, "elapsed_time": "0:39:12", "remaining_time": "0:37:09", "throughput": 10744.79, "total_tokens": 25272064} +{"current_steps": 8025, "total_steps": 15621, "loss": 0.3698, "lr": 1.1265924535737492e-06, "epoch": 0.5137315152679086, "percentage": 51.37, "elapsed_time": "0:39:12", "remaining_time": "0:37:06", "throughput": 10748.45, "total_tokens": 25287936} +{"current_steps": 8030, "total_steps": 15621, "loss": 0.3019, "lr": 1.125483995936121e-06, "epoch": 0.5140515972088855, "percentage": 51.41, "elapsed_time": "0:39:13", "remaining_time": "0:37:04", "throughput": 10751.91, "total_tokens": 25303232} +{"current_steps": 8035, "total_steps": 15621, "loss": 0.3742, "lr": 1.1243753816302507e-06, "epoch": 0.5143716791498624, "percentage": 51.44, "elapsed_time": "0:39:14", "remaining_time": "0:37:02", "throughput": 10755.4, "total_tokens": 25318656} +{"current_steps": 8040, "total_steps": 15621, "loss": 0.4047, "lr": 1.1232666120402558e-06, "epoch": 0.5146917610908393, "percentage": 51.47, "elapsed_time": "0:39:14", "remaining_time": "0:37:00", "throughput": 10758.8, "total_tokens": 25333760} +{"current_steps": 8045, "total_steps": 15621, "loss": 0.3819, "lr": 1.1221576885504487e-06, "epoch": 0.5150118430318161, "percentage": 51.5, "elapsed_time": "0:39:15", "remaining_time": "0:36:58", "throughput": 10762.48, "total_tokens": 25349824} +{"current_steps": 8050, "total_steps": 15621, "loss": 0.3978, "lr": 1.121048612545333e-06, "epoch": 0.515331924972793, "percentage": 51.53, "elapsed_time": "0:39:16", "remaining_time": "0:36:55", "throughput": 10766.03, "total_tokens": 25365376} +{"current_steps": 8055, "total_steps": 15621, "loss": 0.459, "lr": 1.1199393854096034e-06, "epoch": 0.5156520069137699, "percentage": 51.57, "elapsed_time": "0:39:16", "remaining_time": "0:36:53", "throughput": 10769.57, "total_tokens": 25380928} +{"current_steps": 8060, "total_steps": 15621, "loss": 0.3448, "lr": 1.118830008528143e-06, "epoch": 0.5159720888547468, "percentage": 51.6, "elapsed_time": "0:39:17", "remaining_time": "0:36:51", "throughput": 10773.06, "total_tokens": 25396352} +{"current_steps": 8065, "total_steps": 15621, "loss": 0.3084, "lr": 1.1177204832860212e-06, "epoch": 0.5162921707957238, "percentage": 51.63, "elapsed_time": "0:39:18", "remaining_time": "0:36:49", "throughput": 10776.47, "total_tokens": 25411456} +{"current_steps": 8070, "total_steps": 15621, "loss": 0.4402, "lr": 1.1166108110684947e-06, "epoch": 0.5166122527367006, "percentage": 51.66, "elapsed_time": "0:39:18", "remaining_time": "0:36:47", "throughput": 10780.45, "total_tokens": 25428544} +{"current_steps": 8075, "total_steps": 15621, "loss": 0.4209, "lr": 1.1155009932610003e-06, "epoch": 0.5169323346776775, "percentage": 51.69, "elapsed_time": "0:39:19", "remaining_time": "0:36:44", "throughput": 10783.93, "total_tokens": 25443968} +{"current_steps": 8080, "total_steps": 15621, "loss": 0.3319, "lr": 1.1143910312491605e-06, "epoch": 0.5172524166186544, "percentage": 51.73, "elapsed_time": "0:39:20", "remaining_time": "0:36:42", "throughput": 10787.29, "total_tokens": 25458880} +{"current_steps": 8085, "total_steps": 15621, "loss": 0.3206, "lr": 1.1132809264187748e-06, "epoch": 0.5175724985596313, "percentage": 51.76, "elapsed_time": "0:39:20", "remaining_time": "0:36:40", "throughput": 10790.77, "total_tokens": 25474304} +{"current_steps": 8090, "total_steps": 15621, "loss": 0.4119, "lr": 1.1121706801558226e-06, "epoch": 0.5178925805006082, "percentage": 51.79, "elapsed_time": "0:39:21", "remaining_time": "0:36:38", "throughput": 10794.18, "total_tokens": 25489472} +{"current_steps": 8095, "total_steps": 15621, "loss": 0.3921, "lr": 1.111060293846459e-06, "epoch": 0.518212662441585, "percentage": 51.82, "elapsed_time": "0:39:22", "remaining_time": "0:36:36", "throughput": 10797.65, "total_tokens": 25504896} +{"current_steps": 8100, "total_steps": 15621, "loss": 0.4749, "lr": 1.1099497688770148e-06, "epoch": 0.5185327443825619, "percentage": 51.85, "elapsed_time": "0:39:22", "remaining_time": "0:36:33", "throughput": 10800.85, "total_tokens": 25519360} +{"current_steps": 8105, "total_steps": 15621, "loss": 0.4449, "lr": 1.1088391066339928e-06, "epoch": 0.5188528263235388, "percentage": 51.89, "elapsed_time": "0:39:23", "remaining_time": "0:36:31", "throughput": 10804.58, "total_tokens": 25535680} +{"current_steps": 8110, "total_steps": 15621, "loss": 0.5377, "lr": 1.1077283085040684e-06, "epoch": 0.5191729082645157, "percentage": 51.92, "elapsed_time": "0:39:24", "remaining_time": "0:36:29", "throughput": 10807.91, "total_tokens": 25550592} +{"current_steps": 8115, "total_steps": 15621, "loss": 0.3997, "lr": 1.1066173758740863e-06, "epoch": 0.5194929902054926, "percentage": 51.95, "elapsed_time": "0:39:24", "remaining_time": "0:36:27", "throughput": 10811.27, "total_tokens": 25565696} +{"current_steps": 8120, "total_steps": 15621, "loss": 0.3523, "lr": 1.105506310131058e-06, "epoch": 0.5198130721464695, "percentage": 51.98, "elapsed_time": "0:39:25", "remaining_time": "0:36:25", "throughput": 10814.9, "total_tokens": 25581568} +{"current_steps": 8125, "total_steps": 15621, "loss": 0.4599, "lr": 1.1043951126621634e-06, "epoch": 0.5201331540874464, "percentage": 52.01, "elapsed_time": "0:39:26", "remaining_time": "0:36:22", "throughput": 10818.56, "total_tokens": 25597760} +{"current_steps": 8130, "total_steps": 15621, "loss": 0.4081, "lr": 1.1032837848547445e-06, "epoch": 0.5204532360284233, "percentage": 52.05, "elapsed_time": "0:39:26", "remaining_time": "0:36:20", "throughput": 10822.66, "total_tokens": 25615424} +{"current_steps": 8135, "total_steps": 15621, "loss": 0.4117, "lr": 1.1021723280963074e-06, "epoch": 0.5207733179694002, "percentage": 52.08, "elapsed_time": "0:39:27", "remaining_time": "0:36:18", "throughput": 10826.11, "total_tokens": 25630720} +{"current_steps": 8140, "total_steps": 15621, "loss": 0.5029, "lr": 1.1010607437745194e-06, "epoch": 0.5210933999103771, "percentage": 52.11, "elapsed_time": "0:39:28", "remaining_time": "0:36:16", "throughput": 10830.47, "total_tokens": 25649280} +{"current_steps": 8145, "total_steps": 15621, "loss": 0.5131, "lr": 1.0999490332772057e-06, "epoch": 0.5214134818513539, "percentage": 52.14, "elapsed_time": "0:39:28", "remaining_time": "0:36:14", "throughput": 10833.87, "total_tokens": 25664576} +{"current_steps": 8150, "total_steps": 15621, "loss": 0.426, "lr": 1.0988371979923507e-06, "epoch": 0.5217335637923308, "percentage": 52.17, "elapsed_time": "0:39:29", "remaining_time": "0:36:12", "throughput": 10837.46, "total_tokens": 25680384} +{"current_steps": 8155, "total_steps": 15621, "loss": 0.4235, "lr": 1.097725239308094e-06, "epoch": 0.5220536457333077, "percentage": 52.21, "elapsed_time": "0:39:30", "remaining_time": "0:36:10", "throughput": 10841.02, "total_tokens": 25696128} +{"current_steps": 8160, "total_steps": 15621, "loss": 0.2819, "lr": 1.0966131586127278e-06, "epoch": 0.5223737276742846, "percentage": 52.24, "elapsed_time": "0:39:30", "remaining_time": "0:36:07", "throughput": 10844.82, "total_tokens": 25712768} +{"current_steps": 8165, "total_steps": 15621, "loss": 0.4086, "lr": 1.0955009572946992e-06, "epoch": 0.5226938096152615, "percentage": 52.27, "elapsed_time": "0:39:31", "remaining_time": "0:36:05", "throughput": 10848.12, "total_tokens": 25727616} +{"current_steps": 8170, "total_steps": 15621, "loss": 0.4159, "lr": 1.094388636742604e-06, "epoch": 0.5230138915562383, "percentage": 52.3, "elapsed_time": "0:39:32", "remaining_time": "0:36:03", "throughput": 10851.99, "total_tokens": 25744384} +{"current_steps": 8175, "total_steps": 15621, "loss": 0.3516, "lr": 1.0932761983451878e-06, "epoch": 0.5233339734972153, "percentage": 52.33, "elapsed_time": "0:39:33", "remaining_time": "0:36:01", "throughput": 10855.66, "total_tokens": 25760640} +{"current_steps": 8180, "total_steps": 15621, "loss": 0.3157, "lr": 1.0921636434913425e-06, "epoch": 0.5236540554381922, "percentage": 52.37, "elapsed_time": "0:39:33", "remaining_time": "0:35:59", "throughput": 10859.28, "total_tokens": 25776640} +{"current_steps": 8185, "total_steps": 15621, "loss": 0.2979, "lr": 1.091050973570106e-06, "epoch": 0.5239741373791691, "percentage": 52.4, "elapsed_time": "0:39:34", "remaining_time": "0:35:57", "throughput": 10862.66, "total_tokens": 25791744} +{"current_steps": 8190, "total_steps": 15621, "loss": 0.5589, "lr": 1.08993818997066e-06, "epoch": 0.524294219320146, "percentage": 52.43, "elapsed_time": "0:39:35", "remaining_time": "0:35:54", "throughput": 10866.43, "total_tokens": 25808256} +{"current_steps": 8195, "total_steps": 15621, "loss": 0.4481, "lr": 1.0888252940823283e-06, "epoch": 0.5246143012611229, "percentage": 52.46, "elapsed_time": "0:39:35", "remaining_time": "0:35:52", "throughput": 10870.0, "total_tokens": 25824128} +{"current_steps": 8200, "total_steps": 15621, "loss": 0.4767, "lr": 1.0877122872945737e-06, "epoch": 0.5249343832020997, "percentage": 52.49, "elapsed_time": "0:39:36", "remaining_time": "0:35:50", "throughput": 10873.77, "total_tokens": 25840576} +{"current_steps": 8205, "total_steps": 15621, "loss": 0.3206, "lr": 1.0865991709969983e-06, "epoch": 0.5252544651430766, "percentage": 52.53, "elapsed_time": "0:39:37", "remaining_time": "0:35:48", "throughput": 10877.29, "total_tokens": 25856256} +{"current_steps": 8210, "total_steps": 15621, "loss": 0.4424, "lr": 1.0854859465793416e-06, "epoch": 0.5255745470840535, "percentage": 52.56, "elapsed_time": "0:39:37", "remaining_time": "0:35:46", "throughput": 10880.68, "total_tokens": 25871424} +{"current_steps": 8215, "total_steps": 15621, "loss": 0.4916, "lr": 1.0843726154314767e-06, "epoch": 0.5258946290250304, "percentage": 52.59, "elapsed_time": "0:39:38", "remaining_time": "0:35:44", "throughput": 10883.96, "total_tokens": 25886272} +{"current_steps": 8220, "total_steps": 15621, "loss": 0.4302, "lr": 1.083259178943411e-06, "epoch": 0.5262147109660072, "percentage": 52.62, "elapsed_time": "0:39:39", "remaining_time": "0:35:42", "throughput": 10887.48, "total_tokens": 25901952} +{"current_steps": 8225, "total_steps": 15621, "loss": 0.3779, "lr": 1.0821456385052822e-06, "epoch": 0.5265347929069842, "percentage": 52.65, "elapsed_time": "0:39:39", "remaining_time": "0:35:39", "throughput": 10891.04, "total_tokens": 25917888} +{"current_steps": 8230, "total_steps": 15621, "loss": 0.4074, "lr": 1.0810319955073598e-06, "epoch": 0.5268548748479611, "percentage": 52.69, "elapsed_time": "0:39:40", "remaining_time": "0:35:37", "throughput": 10894.61, "total_tokens": 25933824} +{"current_steps": 8235, "total_steps": 15621, "loss": 0.3842, "lr": 1.0799182513400393e-06, "epoch": 0.527174956788938, "percentage": 52.72, "elapsed_time": "0:39:41", "remaining_time": "0:35:35", "throughput": 10898.64, "total_tokens": 25951360} +{"current_steps": 8240, "total_steps": 15621, "loss": 0.3524, "lr": 1.0788044073938438e-06, "epoch": 0.5274950387299149, "percentage": 52.75, "elapsed_time": "0:39:41", "remaining_time": "0:35:33", "throughput": 10902.21, "total_tokens": 25967232} +{"current_steps": 8245, "total_steps": 15621, "loss": 0.4361, "lr": 1.0776904650594205e-06, "epoch": 0.5278151206708918, "percentage": 52.78, "elapsed_time": "0:39:42", "remaining_time": "0:35:31", "throughput": 10905.63, "total_tokens": 25982592} +{"current_steps": 8250, "total_steps": 15621, "loss": 0.4055, "lr": 1.0765764257275394e-06, "epoch": 0.5281352026118686, "percentage": 52.81, "elapsed_time": "0:39:43", "remaining_time": "0:35:29", "throughput": 10909.01, "total_tokens": 25997824} +{"current_steps": 8255, "total_steps": 15621, "loss": 0.4559, "lr": 1.0754622907890914e-06, "epoch": 0.5284552845528455, "percentage": 52.85, "elapsed_time": "0:39:43", "remaining_time": "0:35:27", "throughput": 10912.57, "total_tokens": 26013632} +{"current_steps": 8260, "total_steps": 15621, "loss": 0.3412, "lr": 1.0743480616350873e-06, "epoch": 0.5287753664938224, "percentage": 52.88, "elapsed_time": "0:39:44", "remaining_time": "0:35:24", "throughput": 10915.91, "total_tokens": 26028800} +{"current_steps": 8265, "total_steps": 15621, "loss": 0.3488, "lr": 1.0732337396566558e-06, "epoch": 0.5290954484347993, "percentage": 52.91, "elapsed_time": "0:39:45", "remaining_time": "0:35:22", "throughput": 10919.44, "total_tokens": 26044672} +{"current_steps": 8270, "total_steps": 15621, "loss": 0.3944, "lr": 1.07211932624504e-06, "epoch": 0.5294155303757762, "percentage": 52.94, "elapsed_time": "0:39:45", "remaining_time": "0:35:20", "throughput": 10922.81, "total_tokens": 26060544} +{"current_steps": 8275, "total_steps": 15621, "loss": 0.3714, "lr": 1.0710048227915988e-06, "epoch": 0.529735612316753, "percentage": 52.97, "elapsed_time": "0:39:46", "remaining_time": "0:35:18", "throughput": 10926.28, "total_tokens": 26076160} +{"current_steps": 8280, "total_steps": 15621, "loss": 0.4306, "lr": 1.0698902306878024e-06, "epoch": 0.53005569425773, "percentage": 53.01, "elapsed_time": "0:39:47", "remaining_time": "0:35:16", "throughput": 10929.93, "total_tokens": 26092352} +{"current_steps": 8285, "total_steps": 15621, "loss": 0.3033, "lr": 1.0687755513252325e-06, "epoch": 0.5303757761987069, "percentage": 53.04, "elapsed_time": "0:39:47", "remaining_time": "0:35:14", "throughput": 10933.31, "total_tokens": 26107776} +{"current_steps": 8290, "total_steps": 15621, "loss": 0.3065, "lr": 1.0676607860955794e-06, "epoch": 0.5306958581396838, "percentage": 53.07, "elapsed_time": "0:39:48", "remaining_time": "0:35:12", "throughput": 10936.89, "total_tokens": 26123712} +{"current_steps": 8295, "total_steps": 15621, "loss": 0.3837, "lr": 1.0665459363906404e-06, "epoch": 0.5310159400806607, "percentage": 53.1, "elapsed_time": "0:39:49", "remaining_time": "0:35:10", "throughput": 10940.34, "total_tokens": 26139200} +{"current_steps": 8300, "total_steps": 15621, "loss": 0.4238, "lr": 1.0654310036023185e-06, "epoch": 0.5313360220216375, "percentage": 53.13, "elapsed_time": "0:39:49", "remaining_time": "0:35:07", "throughput": 10943.47, "total_tokens": 26153600} +{"current_steps": 8305, "total_steps": 15621, "loss": 0.4224, "lr": 1.0643159891226203e-06, "epoch": 0.5316561039626144, "percentage": 53.17, "elapsed_time": "0:39:50", "remaining_time": "0:35:05", "throughput": 10947.05, "total_tokens": 26169600} +{"current_steps": 8310, "total_steps": 15621, "loss": 0.3419, "lr": 1.0632008943436545e-06, "epoch": 0.5319761859035913, "percentage": 53.2, "elapsed_time": "0:39:51", "remaining_time": "0:35:03", "throughput": 10950.66, "total_tokens": 26185536} +{"current_steps": 8315, "total_steps": 15621, "loss": 0.4642, "lr": 1.0620857206576299e-06, "epoch": 0.5322962678445682, "percentage": 53.23, "elapsed_time": "0:39:51", "remaining_time": "0:35:01", "throughput": 10954.21, "total_tokens": 26201536} +{"current_steps": 8320, "total_steps": 15621, "loss": 0.2997, "lr": 1.0609704694568546e-06, "epoch": 0.5326163497855451, "percentage": 53.26, "elapsed_time": "0:39:52", "remaining_time": "0:34:59", "throughput": 10957.5, "total_tokens": 26216576} +{"current_steps": 8325, "total_steps": 15621, "loss": 0.2991, "lr": 1.0598551421337318e-06, "epoch": 0.5329364317265219, "percentage": 53.29, "elapsed_time": "0:39:53", "remaining_time": "0:34:57", "throughput": 10961.07, "total_tokens": 26232640} +{"current_steps": 8330, "total_steps": 15621, "loss": 0.539, "lr": 1.0587397400807617e-06, "epoch": 0.5332565136674989, "percentage": 53.33, "elapsed_time": "0:39:53", "remaining_time": "0:34:55", "throughput": 10964.57, "total_tokens": 26248448} +{"current_steps": 8335, "total_steps": 15621, "loss": 0.5144, "lr": 1.057624264690536e-06, "epoch": 0.5335765956084758, "percentage": 53.36, "elapsed_time": "0:39:54", "remaining_time": "0:34:53", "throughput": 10967.97, "total_tokens": 26263872} +{"current_steps": 8340, "total_steps": 15621, "loss": 0.4616, "lr": 1.0565087173557394e-06, "epoch": 0.5338966775494527, "percentage": 53.39, "elapsed_time": "0:39:55", "remaining_time": "0:34:51", "throughput": 10971.54, "total_tokens": 26279872} +{"current_steps": 8345, "total_steps": 15621, "loss": 0.3469, "lr": 1.055393099469146e-06, "epoch": 0.5342167594904296, "percentage": 53.42, "elapsed_time": "0:39:55", "remaining_time": "0:34:49", "throughput": 10975.08, "total_tokens": 26295680} +{"current_steps": 8350, "total_steps": 15621, "loss": 0.4155, "lr": 1.054277412423617e-06, "epoch": 0.5345368414314065, "percentage": 53.45, "elapsed_time": "0:39:56", "remaining_time": "0:34:46", "throughput": 10978.45, "total_tokens": 26311040} +{"current_steps": 8355, "total_steps": 15621, "loss": 0.47, "lr": 1.0531616576121017e-06, "epoch": 0.5348569233723833, "percentage": 53.49, "elapsed_time": "0:39:57", "remaining_time": "0:34:44", "throughput": 10981.79, "total_tokens": 26326144} +{"current_steps": 8360, "total_steps": 15621, "loss": 0.336, "lr": 1.0520458364276325e-06, "epoch": 0.5351770053133602, "percentage": 53.52, "elapsed_time": "0:39:57", "remaining_time": "0:34:42", "throughput": 10985.27, "total_tokens": 26341952} +{"current_steps": 8365, "total_steps": 15621, "loss": 0.3636, "lr": 1.0509299502633256e-06, "epoch": 0.5354970872543371, "percentage": 53.55, "elapsed_time": "0:39:58", "remaining_time": "0:34:40", "throughput": 10988.46, "total_tokens": 26356672} +{"current_steps": 8370, "total_steps": 15621, "loss": 0.4452, "lr": 1.0498140005123777e-06, "epoch": 0.535817169195314, "percentage": 53.58, "elapsed_time": "0:39:59", "remaining_time": "0:34:38", "throughput": 10992.09, "total_tokens": 26373056} +{"current_steps": 8375, "total_steps": 15621, "loss": 0.4254, "lr": 1.0486979885680653e-06, "epoch": 0.5361372511362908, "percentage": 53.61, "elapsed_time": "0:39:59", "remaining_time": "0:34:36", "throughput": 10995.34, "total_tokens": 26388032} +{"current_steps": 8380, "total_steps": 15621, "loss": 0.4324, "lr": 1.0475819158237424e-06, "epoch": 0.5364573330772677, "percentage": 53.65, "elapsed_time": "0:40:00", "remaining_time": "0:34:34", "throughput": 10998.56, "total_tokens": 26402880} +{"current_steps": 8385, "total_steps": 15621, "loss": 0.481, "lr": 1.0464657836728389e-06, "epoch": 0.5367774150182447, "percentage": 53.68, "elapsed_time": "0:40:01", "remaining_time": "0:34:32", "throughput": 11002.21, "total_tokens": 26419328} +{"current_steps": 8390, "total_steps": 15621, "loss": 0.4169, "lr": 1.045349593508859e-06, "epoch": 0.5370974969592216, "percentage": 53.71, "elapsed_time": "0:40:01", "remaining_time": "0:34:30", "throughput": 11005.43, "total_tokens": 26434112} +{"current_steps": 8395, "total_steps": 15621, "loss": 0.2911, "lr": 1.0442333467253788e-06, "epoch": 0.5374175789001985, "percentage": 53.74, "elapsed_time": "0:40:02", "remaining_time": "0:34:28", "throughput": 11009.14, "total_tokens": 26450688} +{"current_steps": 8400, "total_steps": 15621, "loss": 0.3651, "lr": 1.0431170447160463e-06, "epoch": 0.5377376608411754, "percentage": 53.77, "elapsed_time": "0:40:03", "remaining_time": "0:34:25", "throughput": 11012.57, "total_tokens": 26466368} +{"current_steps": 8405, "total_steps": 15621, "loss": 0.3663, "lr": 1.0420006888745767e-06, "epoch": 0.5380577427821522, "percentage": 53.81, "elapsed_time": "0:40:03", "remaining_time": "0:34:23", "throughput": 11016.17, "total_tokens": 26482624} +{"current_steps": 8410, "total_steps": 15621, "loss": 0.3745, "lr": 1.0408842805947543e-06, "epoch": 0.5383778247231291, "percentage": 53.84, "elapsed_time": "0:40:04", "remaining_time": "0:34:21", "throughput": 11019.85, "total_tokens": 26499200} +{"current_steps": 8415, "total_steps": 15621, "loss": 0.5144, "lr": 1.0397678212704276e-06, "epoch": 0.538697906664106, "percentage": 53.87, "elapsed_time": "0:40:05", "remaining_time": "0:34:19", "throughput": 11023.05, "total_tokens": 26514048} +{"current_steps": 8420, "total_steps": 15621, "loss": 0.4061, "lr": 1.038651312295509e-06, "epoch": 0.5390179886050829, "percentage": 53.9, "elapsed_time": "0:40:05", "remaining_time": "0:34:17", "throughput": 11026.34, "total_tokens": 26529216} +{"current_steps": 8425, "total_steps": 15621, "loss": 0.4173, "lr": 1.037534755063973e-06, "epoch": 0.5393380705460598, "percentage": 53.93, "elapsed_time": "0:40:06", "remaining_time": "0:34:15", "throughput": 11029.85, "total_tokens": 26545152} +{"current_steps": 8430, "total_steps": 15621, "loss": 0.4124, "lr": 1.0364181509698548e-06, "epoch": 0.5396581524870366, "percentage": 53.97, "elapsed_time": "0:40:07", "remaining_time": "0:34:13", "throughput": 11033.2, "total_tokens": 26560512} +{"current_steps": 8435, "total_steps": 15621, "loss": 0.3606, "lr": 1.0353015014072476e-06, "epoch": 0.5399782344280136, "percentage": 54.0, "elapsed_time": "0:40:07", "remaining_time": "0:34:11", "throughput": 11036.42, "total_tokens": 26575488} +{"current_steps": 8440, "total_steps": 15621, "loss": 0.4008, "lr": 1.0341848077703013e-06, "epoch": 0.5402983163689905, "percentage": 54.03, "elapsed_time": "0:40:08", "remaining_time": "0:34:09", "throughput": 11039.84, "total_tokens": 26591040} +{"current_steps": 8445, "total_steps": 15621, "loss": 0.3228, "lr": 1.033068071453221e-06, "epoch": 0.5406183983099674, "percentage": 54.06, "elapsed_time": "0:40:09", "remaining_time": "0:34:07", "throughput": 11043.37, "total_tokens": 26606976} +{"current_steps": 8450, "total_steps": 15621, "loss": 0.372, "lr": 1.0319512938502653e-06, "epoch": 0.5409384802509443, "percentage": 54.09, "elapsed_time": "0:40:10", "remaining_time": "0:34:05", "throughput": 11046.99, "total_tokens": 26623296} +{"current_steps": 8455, "total_steps": 15621, "loss": 0.3241, "lr": 1.0308344763557444e-06, "epoch": 0.5412585621919211, "percentage": 54.13, "elapsed_time": "0:40:10", "remaining_time": "0:34:03", "throughput": 11050.25, "total_tokens": 26638336} +{"current_steps": 8460, "total_steps": 15621, "loss": 0.2886, "lr": 1.0297176203640175e-06, "epoch": 0.541578644132898, "percentage": 54.16, "elapsed_time": "0:40:11", "remaining_time": "0:34:01", "throughput": 11053.82, "total_tokens": 26654400} +{"current_steps": 8465, "total_steps": 15621, "loss": 0.3553, "lr": 1.0286007272694924e-06, "epoch": 0.5418987260738749, "percentage": 54.19, "elapsed_time": "0:40:11", "remaining_time": "0:33:59", "throughput": 11057.1, "total_tokens": 26669568} +{"current_steps": 8470, "total_steps": 15621, "loss": 0.4816, "lr": 1.0274837984666239e-06, "epoch": 0.5422188080148518, "percentage": 54.22, "elapsed_time": "0:40:12", "remaining_time": "0:33:56", "throughput": 11060.72, "total_tokens": 26686016} +{"current_steps": 8475, "total_steps": 15621, "loss": 0.4212, "lr": 1.02636683534991e-06, "epoch": 0.5425388899558287, "percentage": 54.25, "elapsed_time": "0:40:13", "remaining_time": "0:33:54", "throughput": 11064.09, "total_tokens": 26701504} +{"current_steps": 8480, "total_steps": 15621, "loss": 0.5995, "lr": 1.0252498393138928e-06, "epoch": 0.5428589718968055, "percentage": 54.29, "elapsed_time": "0:40:14", "remaining_time": "0:33:52", "throughput": 11067.49, "total_tokens": 26717120} +{"current_steps": 8485, "total_steps": 15621, "loss": 0.415, "lr": 1.0241328117531546e-06, "epoch": 0.5431790538377824, "percentage": 54.32, "elapsed_time": "0:40:14", "remaining_time": "0:33:50", "throughput": 11070.9, "total_tokens": 26732736} +{"current_steps": 8490, "total_steps": 15621, "loss": 0.4128, "lr": 1.0230157540623174e-06, "epoch": 0.5434991357787594, "percentage": 54.35, "elapsed_time": "0:40:15", "remaining_time": "0:33:48", "throughput": 11074.0, "total_tokens": 26747392} +{"current_steps": 8495, "total_steps": 15621, "loss": 0.4605, "lr": 1.0218986676360415e-06, "epoch": 0.5438192177197363, "percentage": 54.38, "elapsed_time": "0:40:15", "remaining_time": "0:33:46", "throughput": 11077.14, "total_tokens": 26762112} +{"current_steps": 8500, "total_steps": 15621, "loss": 0.3673, "lr": 1.0207815538690216e-06, "epoch": 0.5441392996607132, "percentage": 54.41, "elapsed_time": "0:40:16", "remaining_time": "0:33:44", "throughput": 11080.55, "total_tokens": 26777856} +{"current_steps": 8505, "total_steps": 15621, "loss": 0.3133, "lr": 1.0196644141559877e-06, "epoch": 0.54445938160169, "percentage": 54.45, "elapsed_time": "0:40:17", "remaining_time": "0:33:42", "throughput": 11084.1, "total_tokens": 26794048} +{"current_steps": 8510, "total_steps": 15621, "loss": 0.3397, "lr": 1.0185472498917021e-06, "epoch": 0.5447794635426669, "percentage": 54.48, "elapsed_time": "0:40:18", "remaining_time": "0:33:40", "throughput": 11087.54, "total_tokens": 26809792} +{"current_steps": 8515, "total_steps": 15621, "loss": 0.4261, "lr": 1.017430062470957e-06, "epoch": 0.5450995454836438, "percentage": 54.51, "elapsed_time": "0:40:18", "remaining_time": "0:33:38", "throughput": 11090.83, "total_tokens": 26825024} +{"current_steps": 8520, "total_steps": 15621, "loss": 0.3494, "lr": 1.016312853288574e-06, "epoch": 0.5454196274246207, "percentage": 54.54, "elapsed_time": "0:40:19", "remaining_time": "0:33:36", "throughput": 11094.5, "total_tokens": 26841536} +{"current_steps": 8525, "total_steps": 15621, "loss": 0.3875, "lr": 1.0151956237394027e-06, "epoch": 0.5457397093655976, "percentage": 54.57, "elapsed_time": "0:40:20", "remaining_time": "0:33:34", "throughput": 11098.01, "total_tokens": 26857600} +{"current_steps": 8530, "total_steps": 15621, "loss": 0.3999, "lr": 1.0140783752183164e-06, "epoch": 0.5460597913065744, "percentage": 54.61, "elapsed_time": "0:40:20", "remaining_time": "0:33:32", "throughput": 11101.7, "total_tokens": 26874176} +{"current_steps": 8535, "total_steps": 15621, "loss": 0.4338, "lr": 1.0129611091202138e-06, "epoch": 0.5463798732475513, "percentage": 54.64, "elapsed_time": "0:40:21", "remaining_time": "0:33:30", "throughput": 11105.21, "total_tokens": 26890176} +{"current_steps": 8540, "total_steps": 15621, "loss": 0.2926, "lr": 1.0118438268400135e-06, "epoch": 0.5466999551885282, "percentage": 54.67, "elapsed_time": "0:40:22", "remaining_time": "0:33:28", "throughput": 11108.54, "total_tokens": 26905728} +{"current_steps": 8545, "total_steps": 15621, "loss": 0.4599, "lr": 1.0107265297726568e-06, "epoch": 0.5470200371295052, "percentage": 54.7, "elapsed_time": "0:40:22", "remaining_time": "0:33:26", "throughput": 11111.87, "total_tokens": 26921280} +{"current_steps": 8550, "total_steps": 15621, "loss": 0.4048, "lr": 1.009609219313102e-06, "epoch": 0.5473401190704821, "percentage": 54.73, "elapsed_time": "0:40:23", "remaining_time": "0:33:24", "throughput": 11115.17, "total_tokens": 26936704} +{"current_steps": 8555, "total_steps": 15621, "loss": 0.3919, "lr": 1.0084918968563236e-06, "epoch": 0.547660201011459, "percentage": 54.77, "elapsed_time": "0:40:24", "remaining_time": "0:33:22", "throughput": 11118.6, "total_tokens": 26952448} +{"current_steps": 8560, "total_steps": 15621, "loss": 0.3917, "lr": 1.0073745637973124e-06, "epoch": 0.5479802829524358, "percentage": 54.8, "elapsed_time": "0:40:24", "remaining_time": "0:33:20", "throughput": 11121.85, "total_tokens": 26967680} +{"current_steps": 8565, "total_steps": 15621, "loss": 0.3606, "lr": 1.0062572215310718e-06, "epoch": 0.5483003648934127, "percentage": 54.83, "elapsed_time": "0:40:25", "remaining_time": "0:33:18", "throughput": 11124.96, "total_tokens": 26982400} +{"current_steps": 8570, "total_steps": 15621, "loss": 0.3227, "lr": 1.0051398714526165e-06, "epoch": 0.5486204468343896, "percentage": 54.86, "elapsed_time": "0:40:26", "remaining_time": "0:33:16", "throughput": 11128.45, "total_tokens": 26998400} +{"current_steps": 8575, "total_steps": 15621, "loss": 0.3731, "lr": 1.0040225149569712e-06, "epoch": 0.5489405287753665, "percentage": 54.89, "elapsed_time": "0:40:26", "remaining_time": "0:33:14", "throughput": 11132.33, "total_tokens": 27015936} +{"current_steps": 8580, "total_steps": 15621, "loss": 0.3339, "lr": 1.0029051534391693e-06, "epoch": 0.5492606107163434, "percentage": 54.93, "elapsed_time": "0:40:27", "remaining_time": "0:33:12", "throughput": 11135.43, "total_tokens": 27030528} +{"current_steps": 8585, "total_steps": 15621, "loss": 0.3793, "lr": 1.001787788294249e-06, "epoch": 0.5495806926573202, "percentage": 54.96, "elapsed_time": "0:40:28", "remaining_time": "0:33:09", "throughput": 11138.77, "total_tokens": 27046080} +{"current_steps": 8590, "total_steps": 15621, "loss": 0.4226, "lr": 1.0006704209172537e-06, "epoch": 0.5499007745982971, "percentage": 54.99, "elapsed_time": "0:40:28", "remaining_time": "0:33:07", "throughput": 11142.07, "total_tokens": 27061504} +{"current_steps": 8595, "total_steps": 15621, "loss": 0.4382, "lr": 9.995530527032301e-07, "epoch": 0.5502208565392741, "percentage": 55.02, "elapsed_time": "0:40:29", "remaining_time": "0:33:05", "throughput": 11145.41, "total_tokens": 27077056} +{"current_steps": 8600, "total_steps": 15621, "loss": 0.3435, "lr": 9.984356850472257e-07, "epoch": 0.550540938480251, "percentage": 55.05, "elapsed_time": "0:40:30", "remaining_time": "0:33:03", "throughput": 11149.46, "total_tokens": 27095168} +{"current_steps": 8602, "total_steps": 15621, "eval_loss": 0.40140706300735474, "epoch": 0.5506689712566417, "percentage": 55.07, "elapsed_time": "0:41:21", "remaining_time": "0:33:44", "throughput": 10923.08, "total_tokens": 27101056} +{"current_steps": 8605, "total_steps": 15621, "loss": 0.3753, "lr": 9.97318319344287e-07, "epoch": 0.5508610204212279, "percentage": 55.09, "elapsed_time": "0:44:17", "remaining_time": "0:36:06", "throughput": 10202.08, "total_tokens": 27110144} +{"current_steps": 8610, "total_steps": 15621, "loss": 0.5273, "lr": 9.962009569894577e-07, "epoch": 0.5511811023622047, "percentage": 55.12, "elapsed_time": "0:44:17", "remaining_time": "0:36:04", "throughput": 10205.15, "total_tokens": 27124864} +{"current_steps": 8615, "total_steps": 15621, "loss": 0.3813, "lr": 9.95083599377778e-07, "epoch": 0.5515011843031816, "percentage": 55.15, "elapsed_time": "0:44:18", "remaining_time": "0:36:02", "throughput": 10208.35, "total_tokens": 27140160} +{"current_steps": 8620, "total_steps": 15621, "loss": 0.3966, "lr": 9.939662479042828e-07, "epoch": 0.5518212662441585, "percentage": 55.18, "elapsed_time": "0:44:19", "remaining_time": "0:35:59", "throughput": 10211.61, "total_tokens": 27155712} +{"current_steps": 8625, "total_steps": 15621, "loss": 0.4682, "lr": 9.92848903963998e-07, "epoch": 0.5521413481851354, "percentage": 55.21, "elapsed_time": "0:44:19", "remaining_time": "0:35:57", "throughput": 10214.99, "total_tokens": 27171520} +{"current_steps": 8630, "total_steps": 15621, "loss": 0.441, "lr": 9.9173156895194e-07, "epoch": 0.5524614301261123, "percentage": 55.25, "elapsed_time": "0:44:20", "remaining_time": "0:35:55", "throughput": 10218.2, "total_tokens": 27186752} +{"current_steps": 8635, "total_steps": 15621, "loss": 0.3889, "lr": 9.906142442631154e-07, "epoch": 0.5527815120670891, "percentage": 55.28, "elapsed_time": "0:44:21", "remaining_time": "0:35:53", "throughput": 10221.32, "total_tokens": 27201664} +{"current_steps": 8640, "total_steps": 15621, "loss": 0.3914, "lr": 9.894969312925171e-07, "epoch": 0.553101594008066, "percentage": 55.31, "elapsed_time": "0:44:21", "remaining_time": "0:35:50", "throughput": 10225.03, "total_tokens": 27218880} +{"current_steps": 8645, "total_steps": 15621, "loss": 0.3477, "lr": 9.883796314351234e-07, "epoch": 0.5534216759490429, "percentage": 55.34, "elapsed_time": "0:44:22", "remaining_time": "0:35:48", "throughput": 10228.61, "total_tokens": 27235648} +{"current_steps": 8650, "total_steps": 15621, "loss": 0.3945, "lr": 9.872623460858966e-07, "epoch": 0.5537417578900199, "percentage": 55.37, "elapsed_time": "0:44:23", "remaining_time": "0:35:46", "throughput": 10231.79, "total_tokens": 27250880} +{"current_steps": 8655, "total_steps": 15621, "loss": 0.3152, "lr": 9.861450766397799e-07, "epoch": 0.5540618398309968, "percentage": 55.41, "elapsed_time": "0:44:24", "remaining_time": "0:35:44", "throughput": 10235.18, "total_tokens": 27266880} +{"current_steps": 8660, "total_steps": 15621, "loss": 0.4157, "lr": 9.850278244916976e-07, "epoch": 0.5543819217719737, "percentage": 55.44, "elapsed_time": "0:44:24", "remaining_time": "0:35:41", "throughput": 10238.54, "total_tokens": 27282816} +{"current_steps": 8665, "total_steps": 15621, "loss": 0.4323, "lr": 9.839105910365524e-07, "epoch": 0.5547020037129505, "percentage": 55.47, "elapsed_time": "0:44:25", "remaining_time": "0:35:39", "throughput": 10241.84, "total_tokens": 27298496} +{"current_steps": 8670, "total_steps": 15621, "loss": 0.3436, "lr": 9.827933776692235e-07, "epoch": 0.5550220856539274, "percentage": 55.5, "elapsed_time": "0:44:26", "remaining_time": "0:35:37", "throughput": 10245.06, "total_tokens": 27313856} +{"current_steps": 8675, "total_steps": 15621, "loss": 0.3362, "lr": 9.81676185784564e-07, "epoch": 0.5553421675949043, "percentage": 55.53, "elapsed_time": "0:44:26", "remaining_time": "0:35:35", "throughput": 10248.09, "total_tokens": 27328448} +{"current_steps": 8680, "total_steps": 15621, "loss": 0.4001, "lr": 9.805590167774021e-07, "epoch": 0.5556622495358812, "percentage": 55.57, "elapsed_time": "0:44:27", "remaining_time": "0:35:32", "throughput": 10251.33, "total_tokens": 27343872} +{"current_steps": 8685, "total_steps": 15621, "loss": 0.5593, "lr": 9.79441872042536e-07, "epoch": 0.555982331476858, "percentage": 55.6, "elapsed_time": "0:44:27", "remaining_time": "0:35:30", "throughput": 10254.41, "total_tokens": 27358720} +{"current_steps": 8690, "total_steps": 15621, "loss": 0.3818, "lr": 9.783247529747338e-07, "epoch": 0.5563024134178349, "percentage": 55.63, "elapsed_time": "0:44:28", "remaining_time": "0:35:28", "throughput": 10257.42, "total_tokens": 27373312} +{"current_steps": 8695, "total_steps": 15621, "loss": 0.358, "lr": 9.772076609687323e-07, "epoch": 0.5566224953588118, "percentage": 55.66, "elapsed_time": "0:44:29", "remaining_time": "0:35:26", "throughput": 10260.59, "total_tokens": 27388544} +{"current_steps": 8700, "total_steps": 15621, "loss": 0.3191, "lr": 9.760905974192334e-07, "epoch": 0.5569425772997888, "percentage": 55.69, "elapsed_time": "0:44:29", "remaining_time": "0:35:24", "throughput": 10264.11, "total_tokens": 27405120} +{"current_steps": 8705, "total_steps": 15621, "loss": 0.4284, "lr": 9.749735637209044e-07, "epoch": 0.5572626592407657, "percentage": 55.73, "elapsed_time": "0:44:30", "remaining_time": "0:35:21", "throughput": 10267.32, "total_tokens": 27420544} +{"current_steps": 8710, "total_steps": 15621, "loss": 0.3233, "lr": 9.738565612683754e-07, "epoch": 0.5575827411817426, "percentage": 55.76, "elapsed_time": "0:44:31", "remaining_time": "0:35:19", "throughput": 10270.43, "total_tokens": 27435456} +{"current_steps": 8715, "total_steps": 15621, "loss": 0.3406, "lr": 9.727395914562363e-07, "epoch": 0.5579028231227194, "percentage": 55.79, "elapsed_time": "0:44:32", "remaining_time": "0:35:17", "throughput": 10273.94, "total_tokens": 27452032} +{"current_steps": 8720, "total_steps": 15621, "loss": 0.4084, "lr": 9.716226556790372e-07, "epoch": 0.5582229050636963, "percentage": 55.82, "elapsed_time": "0:44:32", "remaining_time": "0:35:15", "throughput": 10277.2, "total_tokens": 27467520} +{"current_steps": 8725, "total_steps": 15621, "loss": 0.3149, "lr": 9.705057553312855e-07, "epoch": 0.5585429870046732, "percentage": 55.85, "elapsed_time": "0:44:33", "remaining_time": "0:35:12", "throughput": 10280.38, "total_tokens": 27482816} +{"current_steps": 8730, "total_steps": 15621, "loss": 0.3853, "lr": 9.693888918074452e-07, "epoch": 0.5588630689456501, "percentage": 55.89, "elapsed_time": "0:44:33", "remaining_time": "0:35:10", "throughput": 10283.43, "total_tokens": 27497600} +{"current_steps": 8735, "total_steps": 15621, "loss": 0.4952, "lr": 9.682720665019325e-07, "epoch": 0.559183150886627, "percentage": 55.92, "elapsed_time": "0:44:34", "remaining_time": "0:35:08", "throughput": 10286.69, "total_tokens": 27513344} +{"current_steps": 8740, "total_steps": 15621, "loss": 0.4144, "lr": 9.671552808091172e-07, "epoch": 0.5595032328276038, "percentage": 55.95, "elapsed_time": "0:44:35", "remaining_time": "0:35:06", "throughput": 10290.29, "total_tokens": 27530304} +{"current_steps": 8745, "total_steps": 15621, "loss": 0.3504, "lr": 9.660385361233195e-07, "epoch": 0.5598233147685807, "percentage": 55.98, "elapsed_time": "0:44:36", "remaining_time": "0:35:04", "throughput": 10293.46, "total_tokens": 27545664} +{"current_steps": 8750, "total_steps": 15621, "loss": 0.3053, "lr": 9.649218338388084e-07, "epoch": 0.5601433967095576, "percentage": 56.01, "elapsed_time": "0:44:36", "remaining_time": "0:35:01", "throughput": 10296.57, "total_tokens": 27560704} +{"current_steps": 8755, "total_steps": 15621, "loss": 0.4472, "lr": 9.638051753497994e-07, "epoch": 0.5604634786505346, "percentage": 56.05, "elapsed_time": "0:44:37", "remaining_time": "0:34:59", "throughput": 10300.09, "total_tokens": 27577472} +{"current_steps": 8760, "total_steps": 15621, "loss": 0.3676, "lr": 9.62688562050454e-07, "epoch": 0.5607835605915115, "percentage": 56.08, "elapsed_time": "0:44:38", "remaining_time": "0:34:57", "throughput": 10303.32, "total_tokens": 27592960} +{"current_steps": 8765, "total_steps": 15621, "loss": 0.4074, "lr": 9.615719953348772e-07, "epoch": 0.5611036425324883, "percentage": 56.11, "elapsed_time": "0:44:38", "remaining_time": "0:34:55", "throughput": 10307.02, "total_tokens": 27610304} +{"current_steps": 8770, "total_steps": 15621, "loss": 0.568, "lr": 9.604554765971148e-07, "epoch": 0.5614237244734652, "percentage": 56.14, "elapsed_time": "0:44:39", "remaining_time": "0:34:53", "throughput": 10310.89, "total_tokens": 27628288} +{"current_steps": 8775, "total_steps": 15621, "loss": 0.4119, "lr": 9.593390072311549e-07, "epoch": 0.5617438064144421, "percentage": 56.17, "elapsed_time": "0:44:40", "remaining_time": "0:34:51", "throughput": 10314.14, "total_tokens": 27643904} +{"current_steps": 8780, "total_steps": 15621, "loss": 0.3703, "lr": 9.582225886309216e-07, "epoch": 0.562063888355419, "percentage": 56.21, "elapsed_time": "0:44:40", "remaining_time": "0:34:48", "throughput": 10317.55, "total_tokens": 27660224} +{"current_steps": 8785, "total_steps": 15621, "loss": 0.3098, "lr": 9.571062221902767e-07, "epoch": 0.5623839702963959, "percentage": 56.24, "elapsed_time": "0:44:41", "remaining_time": "0:34:46", "throughput": 10320.64, "total_tokens": 27675136} +{"current_steps": 8790, "total_steps": 15621, "loss": 0.3557, "lr": 9.559899093030175e-07, "epoch": 0.5627040522373727, "percentage": 56.27, "elapsed_time": "0:44:42", "remaining_time": "0:34:44", "throughput": 10323.73, "total_tokens": 27690176} +{"current_steps": 8795, "total_steps": 15621, "loss": 0.3065, "lr": 9.54873651362873e-07, "epoch": 0.5630241341783496, "percentage": 56.3, "elapsed_time": "0:44:42", "remaining_time": "0:34:42", "throughput": 10326.63, "total_tokens": 27704512} +{"current_steps": 8800, "total_steps": 15621, "loss": 0.4565, "lr": 9.537574497635043e-07, "epoch": 0.5633442161193265, "percentage": 56.33, "elapsed_time": "0:44:43", "remaining_time": "0:34:40", "throughput": 10329.95, "total_tokens": 27720448} +{"current_steps": 8805, "total_steps": 15621, "loss": 0.5121, "lr": 9.52641305898503e-07, "epoch": 0.5636642980603035, "percentage": 56.37, "elapsed_time": "0:44:44", "remaining_time": "0:34:37", "throughput": 10333.13, "total_tokens": 27735808} +{"current_steps": 8810, "total_steps": 15621, "loss": 0.3203, "lr": 9.515252211613873e-07, "epoch": 0.5639843800012804, "percentage": 56.4, "elapsed_time": "0:44:44", "remaining_time": "0:34:35", "throughput": 10336.11, "total_tokens": 27750464} +{"current_steps": 8815, "total_steps": 15621, "loss": 0.4539, "lr": 9.504091969456021e-07, "epoch": 0.5643044619422573, "percentage": 56.43, "elapsed_time": "0:44:45", "remaining_time": "0:34:33", "throughput": 10338.89, "total_tokens": 27764352} +{"current_steps": 8820, "total_steps": 15621, "loss": 0.3435, "lr": 9.492932346445165e-07, "epoch": 0.5646245438832341, "percentage": 56.46, "elapsed_time": "0:44:46", "remaining_time": "0:34:31", "throughput": 10342.05, "total_tokens": 27779840} +{"current_steps": 8825, "total_steps": 15621, "loss": 0.2767, "lr": 9.48177335651423e-07, "epoch": 0.564944625824211, "percentage": 56.49, "elapsed_time": "0:44:46", "remaining_time": "0:34:29", "throughput": 10345.5, "total_tokens": 27796352} +{"current_steps": 8830, "total_steps": 15621, "loss": 0.343, "lr": 9.470615013595346e-07, "epoch": 0.5652647077651879, "percentage": 56.53, "elapsed_time": "0:44:47", "remaining_time": "0:34:26", "throughput": 10348.37, "total_tokens": 27810624} +{"current_steps": 8835, "total_steps": 15621, "loss": 0.4395, "lr": 9.459457331619829e-07, "epoch": 0.5655847897061648, "percentage": 56.56, "elapsed_time": "0:44:48", "remaining_time": "0:34:24", "throughput": 10351.31, "total_tokens": 27825152} +{"current_steps": 8840, "total_steps": 15621, "loss": 0.4142, "lr": 9.448300324518182e-07, "epoch": 0.5659048716471416, "percentage": 56.59, "elapsed_time": "0:44:48", "remaining_time": "0:34:22", "throughput": 10354.44, "total_tokens": 27840384} +{"current_steps": 8845, "total_steps": 15621, "loss": 0.3014, "lr": 9.437144006220058e-07, "epoch": 0.5662249535881185, "percentage": 56.62, "elapsed_time": "0:44:49", "remaining_time": "0:34:20", "throughput": 10357.8, "total_tokens": 27856640} +{"current_steps": 8850, "total_steps": 15621, "loss": 0.2097, "lr": 9.425988390654249e-07, "epoch": 0.5665450355290954, "percentage": 56.65, "elapsed_time": "0:44:50", "remaining_time": "0:34:18", "throughput": 10361.14, "total_tokens": 27872768} +{"current_steps": 8855, "total_steps": 15621, "loss": 0.5239, "lr": 9.414833491748677e-07, "epoch": 0.5668651174700723, "percentage": 56.69, "elapsed_time": "0:44:50", "remaining_time": "0:34:15", "throughput": 10364.12, "total_tokens": 27887488} +{"current_steps": 8860, "total_steps": 15621, "loss": 0.2943, "lr": 9.40367932343036e-07, "epoch": 0.5671851994110493, "percentage": 56.72, "elapsed_time": "0:44:51", "remaining_time": "0:34:13", "throughput": 10367.21, "total_tokens": 27902720} +{"current_steps": 8865, "total_steps": 15621, "loss": 0.3817, "lr": 9.392525899625407e-07, "epoch": 0.5675052813520262, "percentage": 56.75, "elapsed_time": "0:44:52", "remaining_time": "0:34:11", "throughput": 10370.35, "total_tokens": 27918080} +{"current_steps": 8870, "total_steps": 15621, "loss": 0.3887, "lr": 9.381373234259004e-07, "epoch": 0.567825363293003, "percentage": 56.78, "elapsed_time": "0:44:52", "remaining_time": "0:34:09", "throughput": 10373.61, "total_tokens": 27933760} +{"current_steps": 8875, "total_steps": 15621, "loss": 0.3858, "lr": 9.370221341255382e-07, "epoch": 0.5681454452339799, "percentage": 56.81, "elapsed_time": "0:44:53", "remaining_time": "0:34:07", "throughput": 10376.7, "total_tokens": 27948992} +{"current_steps": 8880, "total_steps": 15621, "loss": 0.3428, "lr": 9.359070234537807e-07, "epoch": 0.5684655271749568, "percentage": 56.85, "elapsed_time": "0:44:54", "remaining_time": "0:34:05", "throughput": 10380.44, "total_tokens": 27966848} +{"current_steps": 8885, "total_steps": 15621, "loss": 0.3816, "lr": 9.34791992802857e-07, "epoch": 0.5687856091159337, "percentage": 56.88, "elapsed_time": "0:44:54", "remaining_time": "0:34:03", "throughput": 10383.42, "total_tokens": 27981696} +{"current_steps": 8890, "total_steps": 15621, "loss": 0.2578, "lr": 9.336770435648963e-07, "epoch": 0.5691056910569106, "percentage": 56.91, "elapsed_time": "0:44:55", "remaining_time": "0:34:00", "throughput": 10386.64, "total_tokens": 27997376} +{"current_steps": 8895, "total_steps": 15621, "loss": 0.4013, "lr": 9.325621771319246e-07, "epoch": 0.5694257729978874, "percentage": 56.94, "elapsed_time": "0:44:56", "remaining_time": "0:33:58", "throughput": 10390.1, "total_tokens": 28014016} +{"current_steps": 8900, "total_steps": 15621, "loss": 0.4245, "lr": 9.314473948958673e-07, "epoch": 0.5697458549388643, "percentage": 56.97, "elapsed_time": "0:44:56", "remaining_time": "0:33:56", "throughput": 10393.49, "total_tokens": 28030400} +{"current_steps": 8905, "total_steps": 15621, "loss": 0.3464, "lr": 9.303326982485422e-07, "epoch": 0.5700659368798412, "percentage": 57.01, "elapsed_time": "0:44:57", "remaining_time": "0:33:54", "throughput": 10396.99, "total_tokens": 28047104} +{"current_steps": 8910, "total_steps": 15621, "loss": 0.3751, "lr": 9.29218088581661e-07, "epoch": 0.5703860188208181, "percentage": 57.04, "elapsed_time": "0:44:58", "remaining_time": "0:33:52", "throughput": 10400.31, "total_tokens": 28063168} +{"current_steps": 8915, "total_steps": 15621, "loss": 0.3567, "lr": 9.281035672868278e-07, "epoch": 0.5707061007617951, "percentage": 57.07, "elapsed_time": "0:44:58", "remaining_time": "0:33:50", "throughput": 10403.57, "total_tokens": 28079104} +{"current_steps": 8920, "total_steps": 15621, "loss": 0.4098, "lr": 9.269891357555348e-07, "epoch": 0.571026182702772, "percentage": 57.1, "elapsed_time": "0:44:59", "remaining_time": "0:33:48", "throughput": 10406.77, "total_tokens": 28094720} +{"current_steps": 8925, "total_steps": 15621, "loss": 0.2775, "lr": 9.25874795379163e-07, "epoch": 0.5713462646437488, "percentage": 57.13, "elapsed_time": "0:45:00", "remaining_time": "0:33:45", "throughput": 10410.07, "total_tokens": 28110848} +{"current_steps": 8930, "total_steps": 15621, "loss": 0.4246, "lr": 9.247605475489793e-07, "epoch": 0.5716663465847257, "percentage": 57.17, "elapsed_time": "0:45:01", "remaining_time": "0:33:43", "throughput": 10413.41, "total_tokens": 28127040} +{"current_steps": 8935, "total_steps": 15621, "loss": 0.3106, "lr": 9.236463936561358e-07, "epoch": 0.5719864285257026, "percentage": 57.2, "elapsed_time": "0:45:01", "remaining_time": "0:33:41", "throughput": 10416.8, "total_tokens": 28143424} +{"current_steps": 8940, "total_steps": 15621, "loss": 0.5312, "lr": 9.225323350916661e-07, "epoch": 0.5723065104666795, "percentage": 57.23, "elapsed_time": "0:45:02", "remaining_time": "0:33:39", "throughput": 10419.86, "total_tokens": 28158528} +{"current_steps": 8945, "total_steps": 15621, "loss": 0.3963, "lr": 9.214183732464855e-07, "epoch": 0.5726265924076563, "percentage": 57.26, "elapsed_time": "0:45:03", "remaining_time": "0:33:37", "throughput": 10422.97, "total_tokens": 28173888} +{"current_steps": 8950, "total_steps": 15621, "loss": 0.3663, "lr": 9.203045095113886e-07, "epoch": 0.5729466743486332, "percentage": 57.29, "elapsed_time": "0:45:03", "remaining_time": "0:33:35", "throughput": 10426.73, "total_tokens": 28191872} +{"current_steps": 8955, "total_steps": 15621, "loss": 0.4394, "lr": 9.191907452770476e-07, "epoch": 0.5732667562896101, "percentage": 57.33, "elapsed_time": "0:45:04", "remaining_time": "0:33:33", "throughput": 10429.77, "total_tokens": 28206912} +{"current_steps": 8960, "total_steps": 15621, "loss": 0.4103, "lr": 9.180770819340095e-07, "epoch": 0.573586838230587, "percentage": 57.36, "elapsed_time": "0:45:05", "remaining_time": "0:33:31", "throughput": 10432.91, "total_tokens": 28222336} +{"current_steps": 8965, "total_steps": 15621, "loss": 0.3816, "lr": 9.169635208726967e-07, "epoch": 0.573906920171564, "percentage": 57.39, "elapsed_time": "0:45:05", "remaining_time": "0:33:28", "throughput": 10436.12, "total_tokens": 28238144} +{"current_steps": 8970, "total_steps": 15621, "loss": 0.3919, "lr": 9.15850063483403e-07, "epoch": 0.5742270021125409, "percentage": 57.42, "elapsed_time": "0:45:06", "remaining_time": "0:33:26", "throughput": 10439.21, "total_tokens": 28253376} +{"current_steps": 8975, "total_steps": 15621, "loss": 0.3549, "lr": 9.147367111562928e-07, "epoch": 0.5745470840535177, "percentage": 57.45, "elapsed_time": "0:45:07", "remaining_time": "0:33:24", "throughput": 10442.49, "total_tokens": 28269248} +{"current_steps": 8980, "total_steps": 15621, "loss": 0.4151, "lr": 9.136234652814005e-07, "epoch": 0.5748671659944946, "percentage": 57.49, "elapsed_time": "0:45:07", "remaining_time": "0:33:22", "throughput": 10445.83, "total_tokens": 28285440} +{"current_steps": 8985, "total_steps": 15621, "loss": 0.3061, "lr": 9.125103272486255e-07, "epoch": 0.5751872479354715, "percentage": 57.52, "elapsed_time": "0:45:08", "remaining_time": "0:33:20", "throughput": 10448.92, "total_tokens": 28300736} +{"current_steps": 8990, "total_steps": 15621, "loss": 0.3626, "lr": 9.11397298447734e-07, "epoch": 0.5755073298764484, "percentage": 57.55, "elapsed_time": "0:45:09", "remaining_time": "0:33:18", "throughput": 10451.94, "total_tokens": 28315712} +{"current_steps": 8995, "total_steps": 15621, "loss": 0.342, "lr": 9.10284380268356e-07, "epoch": 0.5758274118174252, "percentage": 57.58, "elapsed_time": "0:45:09", "remaining_time": "0:33:16", "throughput": 10455.28, "total_tokens": 28332032} +{"current_steps": 9000, "total_steps": 15621, "loss": 0.4546, "lr": 9.091715740999828e-07, "epoch": 0.5761474937584021, "percentage": 57.61, "elapsed_time": "0:45:10", "remaining_time": "0:33:14", "throughput": 10458.53, "total_tokens": 28347968} +{"current_steps": 9005, "total_steps": 15621, "loss": 0.39, "lr": 9.080588813319654e-07, "epoch": 0.576467575699379, "percentage": 57.65, "elapsed_time": "0:45:11", "remaining_time": "0:33:11", "throughput": 10461.55, "total_tokens": 28362944} +{"current_steps": 9010, "total_steps": 15621, "loss": 0.2894, "lr": 9.069463033535143e-07, "epoch": 0.5767876576403559, "percentage": 57.68, "elapsed_time": "0:45:11", "remaining_time": "0:33:09", "throughput": 10464.75, "total_tokens": 28378624} +{"current_steps": 9015, "total_steps": 15621, "loss": 0.3832, "lr": 9.058338415536962e-07, "epoch": 0.5771077395813328, "percentage": 57.71, "elapsed_time": "0:45:12", "remaining_time": "0:33:07", "throughput": 10467.87, "total_tokens": 28394048} +{"current_steps": 9020, "total_steps": 15621, "loss": 0.3796, "lr": 9.04721497321432e-07, "epoch": 0.5774278215223098, "percentage": 57.74, "elapsed_time": "0:45:13", "remaining_time": "0:33:05", "throughput": 10471.03, "total_tokens": 28409664} +{"current_steps": 9025, "total_steps": 15621, "loss": 0.3794, "lr": 9.036092720454977e-07, "epoch": 0.5777479034632866, "percentage": 57.77, "elapsed_time": "0:45:13", "remaining_time": "0:33:03", "throughput": 10474.08, "total_tokens": 28424768} +{"current_steps": 9030, "total_steps": 15621, "loss": 0.3439, "lr": 9.024971671145189e-07, "epoch": 0.5780679854042635, "percentage": 57.81, "elapsed_time": "0:45:14", "remaining_time": "0:33:01", "throughput": 10477.01, "total_tokens": 28439424} +{"current_steps": 9035, "total_steps": 15621, "loss": 0.443, "lr": 9.013851839169718e-07, "epoch": 0.5783880673452404, "percentage": 57.84, "elapsed_time": "0:45:15", "remaining_time": "0:32:59", "throughput": 10480.46, "total_tokens": 28456064} +{"current_steps": 9040, "total_steps": 15621, "loss": 0.3457, "lr": 9.002733238411801e-07, "epoch": 0.5787081492862173, "percentage": 57.87, "elapsed_time": "0:45:15", "remaining_time": "0:32:57", "throughput": 10483.9, "total_tokens": 28472768} +{"current_steps": 9045, "total_steps": 15621, "loss": 0.3528, "lr": 8.991615882753147e-07, "epoch": 0.5790282312271942, "percentage": 57.9, "elapsed_time": "0:45:16", "remaining_time": "0:32:55", "throughput": 10487.13, "total_tokens": 28488704} +{"current_steps": 9050, "total_steps": 15621, "loss": 0.4516, "lr": 8.980499786073904e-07, "epoch": 0.579348313168171, "percentage": 57.93, "elapsed_time": "0:45:17", "remaining_time": "0:32:52", "throughput": 10490.02, "total_tokens": 28503808} +{"current_steps": 9055, "total_steps": 15621, "loss": 0.4616, "lr": 8.969384962252645e-07, "epoch": 0.5796683951091479, "percentage": 57.97, "elapsed_time": "0:45:17", "remaining_time": "0:32:50", "throughput": 10493.41, "total_tokens": 28520320} +{"current_steps": 9060, "total_steps": 15621, "loss": 0.4395, "lr": 8.958271425166366e-07, "epoch": 0.5799884770501248, "percentage": 58.0, "elapsed_time": "0:45:18", "remaining_time": "0:32:48", "throughput": 10496.51, "total_tokens": 28535680} +{"current_steps": 9065, "total_steps": 15621, "loss": 0.3943, "lr": 8.947159188690442e-07, "epoch": 0.5803085589911017, "percentage": 58.03, "elapsed_time": "0:45:19", "remaining_time": "0:32:46", "throughput": 10499.7, "total_tokens": 28551488} +{"current_steps": 9070, "total_steps": 15621, "loss": 0.4633, "lr": 8.93604826669863e-07, "epoch": 0.5806286409320787, "percentage": 58.06, "elapsed_time": "0:45:19", "remaining_time": "0:32:44", "throughput": 10502.84, "total_tokens": 28567040} +{"current_steps": 9075, "total_steps": 15621, "loss": 0.389, "lr": 8.924938673063052e-07, "epoch": 0.5809487228730555, "percentage": 58.09, "elapsed_time": "0:45:20", "remaining_time": "0:32:42", "throughput": 10505.69, "total_tokens": 28581568} +{"current_steps": 9080, "total_steps": 15621, "loss": 0.3616, "lr": 8.913830421654166e-07, "epoch": 0.5812688048140324, "percentage": 58.13, "elapsed_time": "0:45:21", "remaining_time": "0:32:40", "throughput": 10508.82, "total_tokens": 28596992} +{"current_steps": 9085, "total_steps": 15621, "loss": 0.4752, "lr": 8.902723526340746e-07, "epoch": 0.5815888867550093, "percentage": 58.16, "elapsed_time": "0:45:21", "remaining_time": "0:32:38", "throughput": 10512.33, "total_tokens": 28613952} +{"current_steps": 9090, "total_steps": 15621, "loss": 0.4343, "lr": 8.89161800098989e-07, "epoch": 0.5819089686959862, "percentage": 58.19, "elapsed_time": "0:45:22", "remaining_time": "0:32:36", "throughput": 10515.28, "total_tokens": 28628736} +{"current_steps": 9095, "total_steps": 15621, "loss": 0.3683, "lr": 8.880513859466974e-07, "epoch": 0.5822290506369631, "percentage": 58.22, "elapsed_time": "0:45:23", "remaining_time": "0:32:34", "throughput": 10518.56, "total_tokens": 28644928} +{"current_steps": 9100, "total_steps": 15621, "loss": 0.2861, "lr": 8.869411115635645e-07, "epoch": 0.5825491325779399, "percentage": 58.25, "elapsed_time": "0:45:23", "remaining_time": "0:32:31", "throughput": 10521.86, "total_tokens": 28661184} +{"current_steps": 9105, "total_steps": 15621, "loss": 0.2823, "lr": 8.858309783357816e-07, "epoch": 0.5828692145189168, "percentage": 58.29, "elapsed_time": "0:45:24", "remaining_time": "0:32:29", "throughput": 10524.76, "total_tokens": 28675776} +{"current_steps": 9110, "total_steps": 15621, "loss": 0.4335, "lr": 8.847209876493629e-07, "epoch": 0.5831892964598937, "percentage": 58.32, "elapsed_time": "0:45:25", "remaining_time": "0:32:27", "throughput": 10528.08, "total_tokens": 28692160} +{"current_steps": 9115, "total_steps": 15621, "loss": 0.2627, "lr": 8.836111408901441e-07, "epoch": 0.5835093784008706, "percentage": 58.35, "elapsed_time": "0:45:25", "remaining_time": "0:32:25", "throughput": 10531.1, "total_tokens": 28707328} +{"current_steps": 9120, "total_steps": 15621, "loss": 0.4159, "lr": 8.825014394437828e-07, "epoch": 0.5838294603418475, "percentage": 58.38, "elapsed_time": "0:45:26", "remaining_time": "0:32:23", "throughput": 10534.15, "total_tokens": 28722624} +{"current_steps": 9125, "total_steps": 15621, "loss": 0.4013, "lr": 8.813918846957542e-07, "epoch": 0.5841495422828245, "percentage": 58.41, "elapsed_time": "0:45:27", "remaining_time": "0:32:21", "throughput": 10537.18, "total_tokens": 28737856} +{"current_steps": 9130, "total_steps": 15621, "loss": 0.4447, "lr": 8.802824780313499e-07, "epoch": 0.5844696242238013, "percentage": 58.45, "elapsed_time": "0:45:27", "remaining_time": "0:32:19", "throughput": 10540.08, "total_tokens": 28752448} +{"current_steps": 9135, "total_steps": 15621, "loss": 0.3924, "lr": 8.791732208356771e-07, "epoch": 0.5847897061647782, "percentage": 58.48, "elapsed_time": "0:45:28", "remaining_time": "0:32:17", "throughput": 10543.12, "total_tokens": 28767616} +{"current_steps": 9140, "total_steps": 15621, "loss": 0.4676, "lr": 8.780641144936573e-07, "epoch": 0.5851097881057551, "percentage": 58.51, "elapsed_time": "0:45:29", "remaining_time": "0:32:15", "throughput": 10546.04, "total_tokens": 28782400} +{"current_steps": 9145, "total_steps": 15621, "loss": 0.446, "lr": 8.76955160390022e-07, "epoch": 0.585429870046732, "percentage": 58.54, "elapsed_time": "0:45:29", "remaining_time": "0:32:13", "throughput": 10549.25, "total_tokens": 28798336} +{"current_steps": 9150, "total_steps": 15621, "loss": 0.2893, "lr": 8.758463599093136e-07, "epoch": 0.5857499519877089, "percentage": 58.57, "elapsed_time": "0:45:30", "remaining_time": "0:32:11", "throughput": 10552.48, "total_tokens": 28814336} +{"current_steps": 9155, "total_steps": 15621, "loss": 0.5245, "lr": 8.747377144358825e-07, "epoch": 0.5860700339286857, "percentage": 58.61, "elapsed_time": "0:45:31", "remaining_time": "0:32:09", "throughput": 10555.81, "total_tokens": 28830656} +{"current_steps": 9160, "total_steps": 15621, "loss": 0.4169, "lr": 8.736292253538861e-07, "epoch": 0.5863901158696626, "percentage": 58.64, "elapsed_time": "0:45:31", "remaining_time": "0:32:06", "throughput": 10559.02, "total_tokens": 28846656} +{"current_steps": 9165, "total_steps": 15621, "loss": 0.3115, "lr": 8.725208940472851e-07, "epoch": 0.5867101978106395, "percentage": 58.67, "elapsed_time": "0:45:32", "remaining_time": "0:32:04", "throughput": 10562.31, "total_tokens": 28862848} +{"current_steps": 9170, "total_steps": 15621, "loss": 0.4071, "lr": 8.714127218998448e-07, "epoch": 0.5870302797516164, "percentage": 58.7, "elapsed_time": "0:45:33", "remaining_time": "0:32:02", "throughput": 10565.42, "total_tokens": 28878400} +{"current_steps": 9175, "total_steps": 15621, "loss": 0.5141, "lr": 8.70304710295131e-07, "epoch": 0.5873503616925934, "percentage": 58.74, "elapsed_time": "0:45:33", "remaining_time": "0:32:00", "throughput": 10568.43, "total_tokens": 28893568} +{"current_steps": 9180, "total_steps": 15621, "loss": 0.3766, "lr": 8.691968606165092e-07, "epoch": 0.5876704436335702, "percentage": 58.77, "elapsed_time": "0:45:34", "remaining_time": "0:31:58", "throughput": 10571.71, "total_tokens": 28909824} +{"current_steps": 9185, "total_steps": 15621, "loss": 0.3189, "lr": 8.680891742471429e-07, "epoch": 0.5879905255745471, "percentage": 58.8, "elapsed_time": "0:45:35", "remaining_time": "0:31:56", "throughput": 10574.87, "total_tokens": 28925568} +{"current_steps": 9190, "total_steps": 15621, "loss": 0.3236, "lr": 8.669816525699912e-07, "epoch": 0.588310607515524, "percentage": 58.83, "elapsed_time": "0:45:35", "remaining_time": "0:31:54", "throughput": 10577.95, "total_tokens": 28941056} +{"current_steps": 9195, "total_steps": 15621, "loss": 0.4153, "lr": 8.658742969678079e-07, "epoch": 0.5886306894565009, "percentage": 58.86, "elapsed_time": "0:45:36", "remaining_time": "0:31:52", "throughput": 10580.75, "total_tokens": 28955456} +{"current_steps": 9200, "total_steps": 15621, "loss": 0.2925, "lr": 8.647671088231398e-07, "epoch": 0.5889507713974778, "percentage": 58.9, "elapsed_time": "0:45:37", "remaining_time": "0:31:50", "throughput": 10583.85, "total_tokens": 28971136} +{"current_steps": 9205, "total_steps": 15621, "loss": 0.4144, "lr": 8.636600895183245e-07, "epoch": 0.5892708533384546, "percentage": 58.93, "elapsed_time": "0:45:38", "remaining_time": "0:31:48", "throughput": 10587.39, "total_tokens": 28988480} +{"current_steps": 9210, "total_steps": 15621, "loss": 0.3702, "lr": 8.625532404354877e-07, "epoch": 0.5895909352794315, "percentage": 58.96, "elapsed_time": "0:45:38", "remaining_time": "0:31:46", "throughput": 10590.61, "total_tokens": 29004544} +{"current_steps": 9215, "total_steps": 15621, "loss": 0.3944, "lr": 8.614465629565443e-07, "epoch": 0.5899110172204084, "percentage": 58.99, "elapsed_time": "0:45:39", "remaining_time": "0:31:44", "throughput": 10593.52, "total_tokens": 29019328} +{"current_steps": 9220, "total_steps": 15621, "loss": 0.3414, "lr": 8.603400584631939e-07, "epoch": 0.5902310991613853, "percentage": 59.02, "elapsed_time": "0:45:40", "remaining_time": "0:31:42", "throughput": 10596.58, "total_tokens": 29034752} +{"current_steps": 9225, "total_steps": 15621, "loss": 0.4473, "lr": 8.592337283369198e-07, "epoch": 0.5905511811023622, "percentage": 59.06, "elapsed_time": "0:45:40", "remaining_time": "0:31:40", "throughput": 10599.77, "total_tokens": 29050816} +{"current_steps": 9230, "total_steps": 15621, "loss": 0.2833, "lr": 8.581275739589893e-07, "epoch": 0.5908712630433391, "percentage": 59.09, "elapsed_time": "0:45:41", "remaining_time": "0:31:38", "throughput": 10602.75, "total_tokens": 29065920} +{"current_steps": 9235, "total_steps": 15621, "loss": 0.509, "lr": 8.570215967104481e-07, "epoch": 0.591191344984316, "percentage": 59.12, "elapsed_time": "0:45:42", "remaining_time": "0:31:36", "throughput": 10605.74, "total_tokens": 29080960} +{"current_steps": 9240, "total_steps": 15621, "loss": 0.4754, "lr": 8.559157979721225e-07, "epoch": 0.5915114269252929, "percentage": 59.15, "elapsed_time": "0:45:42", "remaining_time": "0:31:34", "throughput": 10608.89, "total_tokens": 29096768} +{"current_steps": 9245, "total_steps": 15621, "loss": 0.5592, "lr": 8.548101791246145e-07, "epoch": 0.5918315088662698, "percentage": 59.18, "elapsed_time": "0:45:43", "remaining_time": "0:31:32", "throughput": 10612.02, "total_tokens": 29112448} +{"current_steps": 9250, "total_steps": 15621, "loss": 0.3436, "lr": 8.537047415483028e-07, "epoch": 0.5921515908072467, "percentage": 59.22, "elapsed_time": "0:45:44", "remaining_time": "0:31:29", "throughput": 10615.04, "total_tokens": 29127808} +{"current_steps": 9255, "total_steps": 15621, "loss": 0.2783, "lr": 8.525994866233388e-07, "epoch": 0.5924716727482235, "percentage": 59.25, "elapsed_time": "0:45:44", "remaining_time": "0:31:27", "throughput": 10618.03, "total_tokens": 29142912} +{"current_steps": 9260, "total_steps": 15621, "loss": 0.3963, "lr": 8.514944157296464e-07, "epoch": 0.5927917546892004, "percentage": 59.28, "elapsed_time": "0:45:45", "remaining_time": "0:31:25", "throughput": 10621.27, "total_tokens": 29159168} +{"current_steps": 9265, "total_steps": 15621, "loss": 0.3875, "lr": 8.503895302469199e-07, "epoch": 0.5931118366301773, "percentage": 59.31, "elapsed_time": "0:45:46", "remaining_time": "0:31:23", "throughput": 10624.54, "total_tokens": 29175488} +{"current_steps": 9270, "total_steps": 15621, "loss": 0.4151, "lr": 8.492848315546214e-07, "epoch": 0.5934319185711542, "percentage": 59.34, "elapsed_time": "0:45:46", "remaining_time": "0:31:21", "throughput": 10627.62, "total_tokens": 29191104} +{"current_steps": 9275, "total_steps": 15621, "loss": 0.4485, "lr": 8.4818032103198e-07, "epoch": 0.5937520005121311, "percentage": 59.38, "elapsed_time": "0:45:47", "remaining_time": "0:31:19", "throughput": 10630.59, "total_tokens": 29206208} +{"current_steps": 9280, "total_steps": 15621, "loss": 0.4186, "lr": 8.470760000579906e-07, "epoch": 0.5940720824531079, "percentage": 59.41, "elapsed_time": "0:45:48", "remaining_time": "0:31:17", "throughput": 10633.52, "total_tokens": 29221312} +{"current_steps": 9285, "total_steps": 15621, "loss": 0.5047, "lr": 8.459718700114108e-07, "epoch": 0.5943921643940849, "percentage": 59.44, "elapsed_time": "0:45:48", "remaining_time": "0:31:15", "throughput": 10636.58, "total_tokens": 29236800} +{"current_steps": 9290, "total_steps": 15621, "loss": 0.4508, "lr": 8.448679322707595e-07, "epoch": 0.5947122463350618, "percentage": 59.47, "elapsed_time": "0:45:49", "remaining_time": "0:31:13", "throughput": 10639.7, "total_tokens": 29252480} +{"current_steps": 9295, "total_steps": 15621, "loss": 0.6011, "lr": 8.437641882143163e-07, "epoch": 0.5950323282760387, "percentage": 59.5, "elapsed_time": "0:45:50", "remaining_time": "0:31:11", "throughput": 10642.51, "total_tokens": 29266944} +{"current_steps": 9300, "total_steps": 15621, "loss": 0.3106, "lr": 8.426606392201185e-07, "epoch": 0.5953524102170156, "percentage": 59.54, "elapsed_time": "0:45:50", "remaining_time": "0:31:09", "throughput": 10645.63, "total_tokens": 29282816} +{"current_steps": 9305, "total_steps": 15621, "loss": 0.3154, "lr": 8.415572866659599e-07, "epoch": 0.5956724921579925, "percentage": 59.57, "elapsed_time": "0:45:51", "remaining_time": "0:31:07", "throughput": 10648.58, "total_tokens": 29297984} +{"current_steps": 9310, "total_steps": 15621, "loss": 0.3652, "lr": 8.404541319293896e-07, "epoch": 0.5959925740989693, "percentage": 59.6, "elapsed_time": "0:45:52", "remaining_time": "0:31:05", "throughput": 10651.69, "total_tokens": 29313664} +{"current_steps": 9315, "total_steps": 15621, "loss": 0.593, "lr": 8.393511763877086e-07, "epoch": 0.5963126560399462, "percentage": 59.63, "elapsed_time": "0:45:52", "remaining_time": "0:31:03", "throughput": 10654.83, "total_tokens": 29329472} +{"current_steps": 9320, "total_steps": 15621, "loss": 0.4438, "lr": 8.3824842141797e-07, "epoch": 0.5966327379809231, "percentage": 59.66, "elapsed_time": "0:45:53", "remaining_time": "0:31:01", "throughput": 10658.17, "total_tokens": 29346048} +{"current_steps": 9325, "total_steps": 15621, "loss": 0.3806, "lr": 8.371458683969765e-07, "epoch": 0.5969528199219, "percentage": 59.7, "elapsed_time": "0:45:54", "remaining_time": "0:30:59", "throughput": 10661.23, "total_tokens": 29361664} +{"current_steps": 9330, "total_steps": 15621, "loss": 0.3848, "lr": 8.360435187012787e-07, "epoch": 0.5972729018628768, "percentage": 59.73, "elapsed_time": "0:45:54", "remaining_time": "0:30:57", "throughput": 10664.2, "total_tokens": 29376896} +{"current_steps": 9335, "total_steps": 15621, "loss": 0.3866, "lr": 8.349413737071725e-07, "epoch": 0.5975929838038538, "percentage": 59.76, "elapsed_time": "0:45:55", "remaining_time": "0:30:55", "throughput": 10667.3, "total_tokens": 29392640} +{"current_steps": 9340, "total_steps": 15621, "loss": 0.4486, "lr": 8.338394347906994e-07, "epoch": 0.5979130657448307, "percentage": 59.79, "elapsed_time": "0:45:56", "remaining_time": "0:30:53", "throughput": 10670.24, "total_tokens": 29407808} +{"current_steps": 9345, "total_steps": 15621, "loss": 0.3114, "lr": 8.327377033276431e-07, "epoch": 0.5982331476858076, "percentage": 59.82, "elapsed_time": "0:45:56", "remaining_time": "0:30:51", "throughput": 10673.09, "total_tokens": 29422528} +{"current_steps": 9350, "total_steps": 15621, "loss": 0.3484, "lr": 8.316361806935279e-07, "epoch": 0.5985532296267845, "percentage": 59.86, "elapsed_time": "0:45:57", "remaining_time": "0:30:49", "throughput": 10676.18, "total_tokens": 29438272} +{"current_steps": 9355, "total_steps": 15621, "loss": 0.4397, "lr": 8.305348682636177e-07, "epoch": 0.5988733115677614, "percentage": 59.89, "elapsed_time": "0:45:58", "remaining_time": "0:30:47", "throughput": 10679.12, "total_tokens": 29453376} +{"current_steps": 9360, "total_steps": 15621, "loss": 0.4149, "lr": 8.294337674129144e-07, "epoch": 0.5991933935087382, "percentage": 59.92, "elapsed_time": "0:45:58", "remaining_time": "0:30:45", "throughput": 10682.27, "total_tokens": 29469248} +{"current_steps": 9365, "total_steps": 15621, "loss": 0.2745, "lr": 8.283328795161554e-07, "epoch": 0.5995134754497151, "percentage": 59.95, "elapsed_time": "0:45:59", "remaining_time": "0:30:43", "throughput": 10685.57, "total_tokens": 29485888} +{"current_steps": 9370, "total_steps": 15621, "loss": 0.3205, "lr": 8.272322059478114e-07, "epoch": 0.599833557390692, "percentage": 59.98, "elapsed_time": "0:46:00", "remaining_time": "0:30:41", "throughput": 10688.46, "total_tokens": 29500864} +{"current_steps": 9375, "total_steps": 15621, "loss": 0.2427, "lr": 8.261317480820871e-07, "epoch": 0.6001536393316689, "percentage": 60.02, "elapsed_time": "0:46:00", "remaining_time": "0:30:39", "throughput": 10691.48, "total_tokens": 29516288} +{"current_steps": 9380, "total_steps": 15621, "loss": 0.4129, "lr": 8.250315072929168e-07, "epoch": 0.6004737212726458, "percentage": 60.05, "elapsed_time": "0:46:01", "remaining_time": "0:30:37", "throughput": 10694.27, "total_tokens": 29530880} +{"current_steps": 9384, "total_steps": 15621, "eval_loss": 0.39462828636169434, "epoch": 0.6007297868254273, "percentage": 60.07, "elapsed_time": "0:46:52", "remaining_time": "0:31:09", "throughput": 10504.4, "total_tokens": 29544576} +{"current_steps": 9385, "total_steps": 15621, "loss": 0.35, "lr": 8.239314849539637e-07, "epoch": 0.6007938032136226, "percentage": 60.08, "elapsed_time": "1:02:55", "remaining_time": "0:41:48", "throughput": 7825.91, "total_tokens": 29547840} +{"current_steps": 9390, "total_steps": 15621, "loss": 0.4234, "lr": 8.228316824386193e-07, "epoch": 0.6011138851545996, "percentage": 60.11, "elapsed_time": "1:02:56", "remaining_time": "0:41:45", "throughput": 7828.78, "total_tokens": 29564096} +{"current_steps": 9395, "total_steps": 15621, "loss": 0.378, "lr": 8.217321011199995e-07, "epoch": 0.6014339670955765, "percentage": 60.14, "elapsed_time": "1:02:56", "remaining_time": "0:41:42", "throughput": 7831.49, "total_tokens": 29579520} +{"current_steps": 9400, "total_steps": 15621, "loss": 0.433, "lr": 8.206327423709441e-07, "epoch": 0.6017540490365534, "percentage": 60.18, "elapsed_time": "1:02:57", "remaining_time": "0:41:40", "throughput": 7834.02, "total_tokens": 29594048} +{"current_steps": 9405, "total_steps": 15621, "loss": 0.3913, "lr": 8.195336075640163e-07, "epoch": 0.6020741309775303, "percentage": 60.21, "elapsed_time": "1:02:58", "remaining_time": "0:41:37", "throughput": 7836.9, "total_tokens": 29610368} +{"current_steps": 9410, "total_steps": 15621, "loss": 0.4248, "lr": 8.184346980714984e-07, "epoch": 0.6023942129185071, "percentage": 60.24, "elapsed_time": "1:02:58", "remaining_time": "0:41:34", "throughput": 7839.6, "total_tokens": 29625792} +{"current_steps": 9415, "total_steps": 15621, "loss": 0.3563, "lr": 8.173360152653914e-07, "epoch": 0.602714294859484, "percentage": 60.27, "elapsed_time": "1:02:59", "remaining_time": "0:41:31", "throughput": 7842.5, "total_tokens": 29642240} +{"current_steps": 9420, "total_steps": 15621, "loss": 0.3138, "lr": 8.162375605174143e-07, "epoch": 0.6030343768004609, "percentage": 60.3, "elapsed_time": "1:03:00", "remaining_time": "0:41:28", "throughput": 7845.3, "total_tokens": 29658176} +{"current_steps": 9425, "total_steps": 15621, "loss": 0.3068, "lr": 8.151393351990005e-07, "epoch": 0.6033544587414378, "percentage": 60.34, "elapsed_time": "1:03:01", "remaining_time": "0:41:25", "throughput": 7848.37, "total_tokens": 29675392} +{"current_steps": 9430, "total_steps": 15621, "loss": 0.4185, "lr": 8.140413406812971e-07, "epoch": 0.6036745406824147, "percentage": 60.37, "elapsed_time": "1:03:01", "remaining_time": "0:41:22", "throughput": 7850.92, "total_tokens": 29690048} +{"current_steps": 9435, "total_steps": 15621, "loss": 0.3111, "lr": 8.129435783351635e-07, "epoch": 0.6039946226233915, "percentage": 60.4, "elapsed_time": "1:03:02", "remaining_time": "0:41:19", "throughput": 7853.55, "total_tokens": 29705088} +{"current_steps": 9440, "total_steps": 15621, "loss": 0.4421, "lr": 8.118460495311685e-07, "epoch": 0.6043147045643685, "percentage": 60.43, "elapsed_time": "1:03:03", "remaining_time": "0:41:17", "throughput": 7856.25, "total_tokens": 29720576} +{"current_steps": 9445, "total_steps": 15621, "loss": 0.4352, "lr": 8.107487556395901e-07, "epoch": 0.6046347865053454, "percentage": 60.46, "elapsed_time": "1:03:03", "remaining_time": "0:41:14", "throughput": 7859.14, "total_tokens": 29736896} +{"current_steps": 9450, "total_steps": 15621, "loss": 0.3688, "lr": 8.096516980304115e-07, "epoch": 0.6049548684463223, "percentage": 60.5, "elapsed_time": "1:03:04", "remaining_time": "0:41:11", "throughput": 7861.91, "total_tokens": 29752768} +{"current_steps": 9455, "total_steps": 15621, "loss": 0.3448, "lr": 8.085548780733238e-07, "epoch": 0.6052749503872992, "percentage": 60.53, "elapsed_time": "1:03:05", "remaining_time": "0:41:08", "throughput": 7864.7, "total_tokens": 29768640} +{"current_steps": 9460, "total_steps": 15621, "loss": 0.3368, "lr": 8.074582971377182e-07, "epoch": 0.605595032328276, "percentage": 60.56, "elapsed_time": "1:03:05", "remaining_time": "0:41:05", "throughput": 7867.81, "total_tokens": 29786240} +{"current_steps": 9465, "total_steps": 15621, "loss": 0.4407, "lr": 8.063619565926892e-07, "epoch": 0.6059151142692529, "percentage": 60.59, "elapsed_time": "1:03:06", "remaining_time": "0:41:02", "throughput": 7870.6, "total_tokens": 29802176} +{"current_steps": 9470, "total_steps": 15621, "loss": 0.3992, "lr": 8.052658578070313e-07, "epoch": 0.6062351962102298, "percentage": 60.62, "elapsed_time": "1:03:07", "remaining_time": "0:40:59", "throughput": 7873.29, "total_tokens": 29817600} +{"current_steps": 9475, "total_steps": 15621, "loss": 0.3233, "lr": 8.041700021492362e-07, "epoch": 0.6065552781512067, "percentage": 60.66, "elapsed_time": "1:03:07", "remaining_time": "0:40:57", "throughput": 7875.97, "total_tokens": 29832960} +{"current_steps": 9480, "total_steps": 15621, "loss": 0.2929, "lr": 8.030743909874924e-07, "epoch": 0.6068753600921836, "percentage": 60.69, "elapsed_time": "1:03:08", "remaining_time": "0:40:54", "throughput": 7878.66, "total_tokens": 29848448} +{"current_steps": 9485, "total_steps": 15621, "loss": 0.3299, "lr": 8.019790256896839e-07, "epoch": 0.6071954420331604, "percentage": 60.72, "elapsed_time": "1:03:09", "remaining_time": "0:40:51", "throughput": 7881.23, "total_tokens": 29863296} +{"current_steps": 9490, "total_steps": 15621, "loss": 0.3934, "lr": 8.008839076233871e-07, "epoch": 0.6075155239741373, "percentage": 60.75, "elapsed_time": "1:03:09", "remaining_time": "0:40:48", "throughput": 7884.2, "total_tokens": 29880128} +{"current_steps": 9495, "total_steps": 15621, "loss": 0.3564, "lr": 7.997890381558691e-07, "epoch": 0.6078356059151143, "percentage": 60.78, "elapsed_time": "1:03:10", "remaining_time": "0:40:45", "throughput": 7886.85, "total_tokens": 29895296} +{"current_steps": 9500, "total_steps": 15621, "loss": 0.434, "lr": 7.986944186540878e-07, "epoch": 0.6081556878560912, "percentage": 60.82, "elapsed_time": "1:03:11", "remaining_time": "0:40:42", "throughput": 7889.65, "total_tokens": 29911296} +{"current_steps": 9505, "total_steps": 15621, "loss": 0.4603, "lr": 7.976000504846885e-07, "epoch": 0.6084757697970681, "percentage": 60.85, "elapsed_time": "1:03:11", "remaining_time": "0:40:39", "throughput": 7892.38, "total_tokens": 29926912} +{"current_steps": 9510, "total_steps": 15621, "loss": 0.4725, "lr": 7.965059350140024e-07, "epoch": 0.608795851738045, "percentage": 60.88, "elapsed_time": "1:03:12", "remaining_time": "0:40:37", "throughput": 7895.06, "total_tokens": 29942272} +{"current_steps": 9515, "total_steps": 15621, "loss": 0.4093, "lr": 7.954120736080461e-07, "epoch": 0.6091159336790218, "percentage": 60.91, "elapsed_time": "1:03:13", "remaining_time": "0:40:34", "throughput": 7897.79, "total_tokens": 29958016} +{"current_steps": 9520, "total_steps": 15621, "loss": 0.5561, "lr": 7.943184676325178e-07, "epoch": 0.6094360156199987, "percentage": 60.94, "elapsed_time": "1:03:13", "remaining_time": "0:40:31", "throughput": 7900.73, "total_tokens": 29974720} +{"current_steps": 9525, "total_steps": 15621, "loss": 0.4295, "lr": 7.932251184527974e-07, "epoch": 0.6097560975609756, "percentage": 60.98, "elapsed_time": "1:03:14", "remaining_time": "0:40:28", "throughput": 7903.71, "total_tokens": 29991680} +{"current_steps": 9530, "total_steps": 15621, "loss": 0.2678, "lr": 7.921320274339446e-07, "epoch": 0.6100761795019525, "percentage": 61.01, "elapsed_time": "1:03:15", "remaining_time": "0:40:25", "throughput": 7906.41, "total_tokens": 30007168} +{"current_steps": 9535, "total_steps": 15621, "loss": 0.34, "lr": 7.910391959406966e-07, "epoch": 0.6103962614429294, "percentage": 61.04, "elapsed_time": "1:03:15", "remaining_time": "0:40:22", "throughput": 7909.09, "total_tokens": 30022656} +{"current_steps": 9540, "total_steps": 15621, "loss": 0.3896, "lr": 7.899466253374653e-07, "epoch": 0.6107163433839062, "percentage": 61.07, "elapsed_time": "1:03:16", "remaining_time": "0:40:20", "throughput": 7911.78, "total_tokens": 30038144} +{"current_steps": 9545, "total_steps": 15621, "loss": 0.3321, "lr": 7.88854316988339e-07, "epoch": 0.6110364253248832, "percentage": 61.1, "elapsed_time": "1:03:17", "remaining_time": "0:40:17", "throughput": 7914.85, "total_tokens": 30055488} +{"current_steps": 9550, "total_steps": 15621, "loss": 0.3085, "lr": 7.877622722570771e-07, "epoch": 0.6113565072658601, "percentage": 61.14, "elapsed_time": "1:03:18", "remaining_time": "0:40:14", "throughput": 7917.55, "total_tokens": 30071040} +{"current_steps": 9555, "total_steps": 15621, "loss": 0.4224, "lr": 7.866704925071101e-07, "epoch": 0.611676589206837, "percentage": 61.17, "elapsed_time": "1:03:18", "remaining_time": "0:40:11", "throughput": 7920.53, "total_tokens": 30088000} +{"current_steps": 9560, "total_steps": 15621, "loss": 0.4359, "lr": 7.855789791015377e-07, "epoch": 0.6119966711478139, "percentage": 61.2, "elapsed_time": "1:03:19", "remaining_time": "0:40:08", "throughput": 7923.12, "total_tokens": 30103040} +{"current_steps": 9565, "total_steps": 15621, "loss": 0.3887, "lr": 7.844877334031277e-07, "epoch": 0.6123167530887907, "percentage": 61.23, "elapsed_time": "1:03:20", "remaining_time": "0:40:05", "throughput": 7925.67, "total_tokens": 30117760} +{"current_steps": 9570, "total_steps": 15621, "loss": 0.4969, "lr": 7.833967567743131e-07, "epoch": 0.6126368350297676, "percentage": 61.26, "elapsed_time": "1:03:20", "remaining_time": "0:40:03", "throughput": 7928.48, "total_tokens": 30133888} +{"current_steps": 9575, "total_steps": 15621, "loss": 0.3596, "lr": 7.823060505771903e-07, "epoch": 0.6129569169707445, "percentage": 61.3, "elapsed_time": "1:03:21", "remaining_time": "0:40:00", "throughput": 7931.14, "total_tokens": 30149312} +{"current_steps": 9580, "total_steps": 15621, "loss": 0.4176, "lr": 7.812156161735199e-07, "epoch": 0.6132769989117214, "percentage": 61.33, "elapsed_time": "1:03:22", "remaining_time": "0:39:57", "throughput": 7933.64, "total_tokens": 30163840} +{"current_steps": 9585, "total_steps": 15621, "loss": 0.5474, "lr": 7.801254549247215e-07, "epoch": 0.6135970808526983, "percentage": 61.36, "elapsed_time": "1:03:22", "remaining_time": "0:39:54", "throughput": 7936.58, "total_tokens": 30180544} +{"current_steps": 9590, "total_steps": 15621, "loss": 0.338, "lr": 7.790355681918739e-07, "epoch": 0.6139171627936751, "percentage": 61.39, "elapsed_time": "1:03:23", "remaining_time": "0:39:51", "throughput": 7939.46, "total_tokens": 30197120} +{"current_steps": 9595, "total_steps": 15621, "loss": 0.4222, "lr": 7.779459573357144e-07, "epoch": 0.614237244734652, "percentage": 61.42, "elapsed_time": "1:03:24", "remaining_time": "0:39:49", "throughput": 7942.28, "total_tokens": 30213376} +{"current_steps": 9600, "total_steps": 15621, "loss": 0.4138, "lr": 7.768566237166338e-07, "epoch": 0.614557326675629, "percentage": 61.46, "elapsed_time": "1:03:24", "remaining_time": "0:39:46", "throughput": 7945.02, "total_tokens": 30229120} +{"current_steps": 9605, "total_steps": 15621, "loss": 0.5188, "lr": 7.757675686946786e-07, "epoch": 0.6148774086166059, "percentage": 61.49, "elapsed_time": "1:03:25", "remaining_time": "0:39:43", "throughput": 7947.69, "total_tokens": 30244544} +{"current_steps": 9610, "total_steps": 15621, "loss": 0.4258, "lr": 7.746787936295468e-07, "epoch": 0.6151974905575828, "percentage": 61.52, "elapsed_time": "1:03:26", "remaining_time": "0:39:40", "throughput": 7950.54, "total_tokens": 30260864} +{"current_steps": 9615, "total_steps": 15621, "loss": 0.3681, "lr": 7.735902998805868e-07, "epoch": 0.6155175724985597, "percentage": 61.55, "elapsed_time": "1:03:26", "remaining_time": "0:39:37", "throughput": 7953.03, "total_tokens": 30275456} +{"current_steps": 9620, "total_steps": 15621, "loss": 0.4284, "lr": 7.725020888067955e-07, "epoch": 0.6158376544395365, "percentage": 61.58, "elapsed_time": "1:03:27", "remaining_time": "0:39:35", "throughput": 7955.71, "total_tokens": 30291008} +{"current_steps": 9625, "total_steps": 15621, "loss": 0.4779, "lr": 7.714141617668176e-07, "epoch": 0.6161577363805134, "percentage": 61.62, "elapsed_time": "1:03:28", "remaining_time": "0:39:32", "throughput": 7958.45, "total_tokens": 30306816} +{"current_steps": 9630, "total_steps": 15621, "loss": 0.3342, "lr": 7.703265201189426e-07, "epoch": 0.6164778183214903, "percentage": 61.65, "elapsed_time": "1:03:28", "remaining_time": "0:39:29", "throughput": 7961.09, "total_tokens": 30322240} +{"current_steps": 9635, "total_steps": 15621, "loss": 0.3333, "lr": 7.692391652211036e-07, "epoch": 0.6167979002624672, "percentage": 61.68, "elapsed_time": "1:03:29", "remaining_time": "0:39:26", "throughput": 7963.81, "total_tokens": 30338048} +{"current_steps": 9640, "total_steps": 15621, "loss": 0.3256, "lr": 7.681520984308769e-07, "epoch": 0.617117982203444, "percentage": 61.71, "elapsed_time": "1:03:30", "remaining_time": "0:39:23", "throughput": 7966.57, "total_tokens": 30353984} +{"current_steps": 9645, "total_steps": 15621, "loss": 0.496, "lr": 7.670653211054772e-07, "epoch": 0.6174380641444209, "percentage": 61.74, "elapsed_time": "1:03:30", "remaining_time": "0:39:21", "throughput": 7969.33, "total_tokens": 30370048} +{"current_steps": 9650, "total_steps": 15621, "loss": 0.4137, "lr": 7.659788346017591e-07, "epoch": 0.6177581460853978, "percentage": 61.78, "elapsed_time": "1:03:31", "remaining_time": "0:39:18", "throughput": 7971.95, "total_tokens": 30385344} +{"current_steps": 9655, "total_steps": 15621, "loss": 0.3994, "lr": 7.648926402762133e-07, "epoch": 0.6180782280263748, "percentage": 61.81, "elapsed_time": "1:03:32", "remaining_time": "0:39:15", "throughput": 7974.55, "total_tokens": 30400576} +{"current_steps": 9660, "total_steps": 15621, "loss": 0.3861, "lr": 7.638067394849671e-07, "epoch": 0.6183983099673517, "percentage": 61.84, "elapsed_time": "1:03:32", "remaining_time": "0:39:12", "throughput": 7977.1, "total_tokens": 30415424} +{"current_steps": 9665, "total_steps": 15621, "loss": 0.3971, "lr": 7.627211335837797e-07, "epoch": 0.6187183919083286, "percentage": 61.87, "elapsed_time": "1:03:33", "remaining_time": "0:39:10", "throughput": 7979.69, "total_tokens": 30430592} +{"current_steps": 9670, "total_steps": 15621, "loss": 0.4285, "lr": 7.616358239280427e-07, "epoch": 0.6190384738493054, "percentage": 61.9, "elapsed_time": "1:03:34", "remaining_time": "0:39:07", "throughput": 7982.32, "total_tokens": 30445952} +{"current_steps": 9675, "total_steps": 15621, "loss": 0.3194, "lr": 7.605508118727787e-07, "epoch": 0.6193585557902823, "percentage": 61.94, "elapsed_time": "1:03:34", "remaining_time": "0:39:04", "throughput": 7985.01, "total_tokens": 30461568} +{"current_steps": 9680, "total_steps": 15621, "loss": 0.3642, "lr": 7.594660987726373e-07, "epoch": 0.6196786377312592, "percentage": 61.97, "elapsed_time": "1:03:35", "remaining_time": "0:39:01", "throughput": 7987.58, "total_tokens": 30476672} +{"current_steps": 9685, "total_steps": 15621, "loss": 0.3969, "lr": 7.583816859818956e-07, "epoch": 0.6199987196722361, "percentage": 62.0, "elapsed_time": "1:03:36", "remaining_time": "0:38:58", "throughput": 7990.33, "total_tokens": 30492672} +{"current_steps": 9690, "total_steps": 15621, "loss": 0.3783, "lr": 7.57297574854456e-07, "epoch": 0.620318801613213, "percentage": 62.03, "elapsed_time": "1:03:36", "remaining_time": "0:38:56", "throughput": 7992.88, "total_tokens": 30507712} +{"current_steps": 9695, "total_steps": 15621, "loss": 0.4477, "lr": 7.56213766743844e-07, "epoch": 0.6206388835541898, "percentage": 62.06, "elapsed_time": "1:03:37", "remaining_time": "0:38:53", "throughput": 7995.71, "total_tokens": 30524032} +{"current_steps": 9700, "total_steps": 15621, "loss": 0.3281, "lr": 7.551302630032064e-07, "epoch": 0.6209589654951667, "percentage": 62.1, "elapsed_time": "1:03:38", "remaining_time": "0:38:50", "throughput": 7998.4, "total_tokens": 30539776} +{"current_steps": 9705, "total_steps": 15621, "loss": 0.3758, "lr": 7.540470649853106e-07, "epoch": 0.6212790474361437, "percentage": 62.13, "elapsed_time": "1:03:38", "remaining_time": "0:38:47", "throughput": 8000.95, "total_tokens": 30554752} +{"current_steps": 9710, "total_steps": 15621, "loss": 0.3955, "lr": 7.529641740425419e-07, "epoch": 0.6215991293771206, "percentage": 62.16, "elapsed_time": "1:03:39", "remaining_time": "0:38:45", "throughput": 8003.94, "total_tokens": 30571968} +{"current_steps": 9715, "total_steps": 15621, "loss": 0.449, "lr": 7.518815915269023e-07, "epoch": 0.6219192113180975, "percentage": 62.19, "elapsed_time": "1:03:40", "remaining_time": "0:38:42", "throughput": 8006.56, "total_tokens": 30587264} +{"current_steps": 9720, "total_steps": 15621, "loss": 0.3823, "lr": 7.507993187900092e-07, "epoch": 0.6222392932590743, "percentage": 62.22, "elapsed_time": "1:03:40", "remaining_time": "0:38:39", "throughput": 8009.29, "total_tokens": 30603200} +{"current_steps": 9725, "total_steps": 15621, "loss": 0.4186, "lr": 7.497173571830926e-07, "epoch": 0.6225593752000512, "percentage": 62.26, "elapsed_time": "1:03:41", "remaining_time": "0:38:36", "throughput": 8011.78, "total_tokens": 30617856} +{"current_steps": 9730, "total_steps": 15621, "loss": 0.4631, "lr": 7.486357080569938e-07, "epoch": 0.6228794571410281, "percentage": 62.29, "elapsed_time": "1:03:42", "remaining_time": "0:38:34", "throughput": 8014.25, "total_tokens": 30632448} +{"current_steps": 9735, "total_steps": 15621, "loss": 0.3768, "lr": 7.47554372762165e-07, "epoch": 0.623199539082005, "percentage": 62.32, "elapsed_time": "1:03:42", "remaining_time": "0:38:31", "throughput": 8016.86, "total_tokens": 30647680} +{"current_steps": 9740, "total_steps": 15621, "loss": 0.4872, "lr": 7.464733526486662e-07, "epoch": 0.6235196210229819, "percentage": 62.35, "elapsed_time": "1:03:43", "remaining_time": "0:38:28", "throughput": 8019.59, "total_tokens": 30663616} +{"current_steps": 9745, "total_steps": 15621, "loss": 0.3515, "lr": 7.453926490661628e-07, "epoch": 0.6238397029639587, "percentage": 62.38, "elapsed_time": "1:03:44", "remaining_time": "0:38:25", "throughput": 8022.93, "total_tokens": 30682496} +{"current_steps": 9750, "total_steps": 15621, "loss": 0.3687, "lr": 7.443122633639267e-07, "epoch": 0.6241597849049356, "percentage": 62.42, "elapsed_time": "1:03:45", "remaining_time": "0:38:23", "throughput": 8025.51, "total_tokens": 30697664} +{"current_steps": 9755, "total_steps": 15621, "loss": 0.3856, "lr": 7.432321968908319e-07, "epoch": 0.6244798668459125, "percentage": 62.45, "elapsed_time": "1:03:45", "remaining_time": "0:38:20", "throughput": 8028.21, "total_tokens": 30713408} +{"current_steps": 9760, "total_steps": 15621, "loss": 0.3178, "lr": 7.421524509953543e-07, "epoch": 0.6247999487868895, "percentage": 62.48, "elapsed_time": "1:03:46", "remaining_time": "0:38:17", "throughput": 8031.19, "total_tokens": 30730496} +{"current_steps": 9765, "total_steps": 15621, "loss": 0.4143, "lr": 7.410730270255687e-07, "epoch": 0.6251200307278664, "percentage": 62.51, "elapsed_time": "1:03:47", "remaining_time": "0:38:15", "throughput": 8033.78, "total_tokens": 30745664} +{"current_steps": 9770, "total_steps": 15621, "loss": 0.3747, "lr": 7.399939263291493e-07, "epoch": 0.6254401126688433, "percentage": 62.54, "elapsed_time": "1:03:47", "remaining_time": "0:38:12", "throughput": 8036.39, "total_tokens": 30760960} +{"current_steps": 9775, "total_steps": 15621, "loss": 0.479, "lr": 7.389151502533657e-07, "epoch": 0.6257601946098201, "percentage": 62.58, "elapsed_time": "1:03:48", "remaining_time": "0:38:09", "throughput": 8038.91, "total_tokens": 30775872} +{"current_steps": 9780, "total_steps": 15621, "loss": 0.3696, "lr": 7.378367001450819e-07, "epoch": 0.626080276550797, "percentage": 62.61, "elapsed_time": "1:03:49", "remaining_time": "0:38:06", "throughput": 8041.58, "total_tokens": 30791424} +{"current_steps": 9785, "total_steps": 15621, "loss": 0.426, "lr": 7.367585773507567e-07, "epoch": 0.6264003584917739, "percentage": 62.64, "elapsed_time": "1:03:49", "remaining_time": "0:38:04", "throughput": 8044.38, "total_tokens": 30807680} +{"current_steps": 9790, "total_steps": 15621, "loss": 0.4515, "lr": 7.356807832164385e-07, "epoch": 0.6267204404327508, "percentage": 62.67, "elapsed_time": "1:03:50", "remaining_time": "0:38:01", "throughput": 8047.12, "total_tokens": 30823680} +{"current_steps": 9795, "total_steps": 15621, "loss": 0.4401, "lr": 7.346033190877654e-07, "epoch": 0.6270405223737276, "percentage": 62.7, "elapsed_time": "1:03:51", "remaining_time": "0:37:58", "throughput": 8049.8, "total_tokens": 30839360} +{"current_steps": 9800, "total_steps": 15621, "loss": 0.3541, "lr": 7.335261863099651e-07, "epoch": 0.6273606043147045, "percentage": 62.74, "elapsed_time": "1:03:51", "remaining_time": "0:37:55", "throughput": 8052.44, "total_tokens": 30854784} +{"current_steps": 9805, "total_steps": 15621, "loss": 0.4232, "lr": 7.324493862278498e-07, "epoch": 0.6276806862556814, "percentage": 62.77, "elapsed_time": "1:03:52", "remaining_time": "0:37:53", "throughput": 8055.15, "total_tokens": 30870592} +{"current_steps": 9810, "total_steps": 15621, "loss": 0.4636, "lr": 7.313729201858167e-07, "epoch": 0.6280007681966584, "percentage": 62.8, "elapsed_time": "1:03:53", "remaining_time": "0:37:50", "throughput": 8057.77, "total_tokens": 30885952} +{"current_steps": 9815, "total_steps": 15621, "loss": 0.3329, "lr": 7.302967895278473e-07, "epoch": 0.6283208501376353, "percentage": 62.83, "elapsed_time": "1:03:53", "remaining_time": "0:37:47", "throughput": 8060.52, "total_tokens": 30902080} +{"current_steps": 9820, "total_steps": 15621, "loss": 0.4042, "lr": 7.292209955975028e-07, "epoch": 0.6286409320786122, "percentage": 62.86, "elapsed_time": "1:03:54", "remaining_time": "0:37:45", "throughput": 8063.5, "total_tokens": 30919232} +{"current_steps": 9825, "total_steps": 15621, "loss": 0.4078, "lr": 7.281455397379244e-07, "epoch": 0.628961014019589, "percentage": 62.9, "elapsed_time": "1:03:55", "remaining_time": "0:37:42", "throughput": 8066.5, "total_tokens": 30936448} +{"current_steps": 9830, "total_steps": 15621, "loss": 0.3225, "lr": 7.270704232918316e-07, "epoch": 0.6292810959605659, "percentage": 62.93, "elapsed_time": "1:03:55", "remaining_time": "0:37:39", "throughput": 8069.2, "total_tokens": 30952256} +{"current_steps": 9835, "total_steps": 15621, "loss": 0.4216, "lr": 7.2599564760152e-07, "epoch": 0.6296011779015428, "percentage": 62.96, "elapsed_time": "1:03:56", "remaining_time": "0:37:37", "throughput": 8071.76, "total_tokens": 30967360} +{"current_steps": 9840, "total_steps": 15621, "loss": 0.3852, "lr": 7.249212140088592e-07, "epoch": 0.6299212598425197, "percentage": 62.99, "elapsed_time": "1:03:57", "remaining_time": "0:37:34", "throughput": 8074.23, "total_tokens": 30982016} +{"current_steps": 9845, "total_steps": 15621, "loss": 0.3347, "lr": 7.23847123855293e-07, "epoch": 0.6302413417834966, "percentage": 63.02, "elapsed_time": "1:03:57", "remaining_time": "0:37:31", "throughput": 8076.98, "total_tokens": 30998080} +{"current_steps": 9850, "total_steps": 15621, "loss": 0.2805, "lr": 7.227733784818349e-07, "epoch": 0.6305614237244734, "percentage": 63.06, "elapsed_time": "1:03:58", "remaining_time": "0:37:28", "throughput": 8079.54, "total_tokens": 31013184} +{"current_steps": 9855, "total_steps": 15621, "loss": 0.3804, "lr": 7.216999792290683e-07, "epoch": 0.6308815056654503, "percentage": 63.09, "elapsed_time": "1:03:59", "remaining_time": "0:37:26", "throughput": 8082.2, "total_tokens": 31028800} +{"current_steps": 9860, "total_steps": 15621, "loss": 0.49, "lr": 7.206269274371457e-07, "epoch": 0.6312015876064272, "percentage": 63.12, "elapsed_time": "1:03:59", "remaining_time": "0:37:23", "throughput": 8084.92, "total_tokens": 31044736} +{"current_steps": 9865, "total_steps": 15621, "loss": 0.3496, "lr": 7.195542244457845e-07, "epoch": 0.6315216695474042, "percentage": 63.15, "elapsed_time": "1:04:00", "remaining_time": "0:37:20", "throughput": 8087.5, "total_tokens": 31059968} +{"current_steps": 9870, "total_steps": 15621, "loss": 0.3266, "lr": 7.184818715942666e-07, "epoch": 0.6318417514883811, "percentage": 63.18, "elapsed_time": "1:04:01", "remaining_time": "0:37:18", "throughput": 8090.01, "total_tokens": 31074880} +{"current_steps": 9875, "total_steps": 15621, "loss": 0.355, "lr": 7.174098702214374e-07, "epoch": 0.6321618334293579, "percentage": 63.22, "elapsed_time": "1:04:01", "remaining_time": "0:37:15", "throughput": 8092.66, "total_tokens": 31090432} +{"current_steps": 9880, "total_steps": 15621, "loss": 0.37, "lr": 7.163382216657033e-07, "epoch": 0.6324819153703348, "percentage": 63.25, "elapsed_time": "1:04:02", "remaining_time": "0:37:12", "throughput": 8095.53, "total_tokens": 31107264} +{"current_steps": 9885, "total_steps": 15621, "loss": 0.3444, "lr": 7.152669272650302e-07, "epoch": 0.6328019973113117, "percentage": 63.28, "elapsed_time": "1:04:03", "remaining_time": "0:37:10", "throughput": 8098.42, "total_tokens": 31124096} +{"current_steps": 9890, "total_steps": 15621, "loss": 0.3869, "lr": 7.141959883569411e-07, "epoch": 0.6331220792522886, "percentage": 63.31, "elapsed_time": "1:04:03", "remaining_time": "0:37:07", "throughput": 8100.88, "total_tokens": 31138752} +{"current_steps": 9895, "total_steps": 15621, "loss": 0.4701, "lr": 7.131254062785165e-07, "epoch": 0.6334421611932655, "percentage": 63.34, "elapsed_time": "1:04:04", "remaining_time": "0:37:04", "throughput": 8103.47, "total_tokens": 31154048} +{"current_steps": 9900, "total_steps": 15621, "loss": 0.5118, "lr": 7.120551823663907e-07, "epoch": 0.6337622431342423, "percentage": 63.38, "elapsed_time": "1:04:05", "remaining_time": "0:37:02", "throughput": 8106.26, "total_tokens": 31170304} +{"current_steps": 9905, "total_steps": 15621, "loss": 0.2817, "lr": 7.109853179567499e-07, "epoch": 0.6340823250752192, "percentage": 63.41, "elapsed_time": "1:04:05", "remaining_time": "0:36:59", "throughput": 8108.99, "total_tokens": 31186368} +{"current_steps": 9910, "total_steps": 15621, "loss": 0.4235, "lr": 7.099158143853337e-07, "epoch": 0.6344024070161961, "percentage": 63.44, "elapsed_time": "1:04:06", "remaining_time": "0:36:56", "throughput": 8111.58, "total_tokens": 31201664} +{"current_steps": 9915, "total_steps": 15621, "loss": 0.3891, "lr": 7.088466729874289e-07, "epoch": 0.634722488957173, "percentage": 63.47, "elapsed_time": "1:04:07", "remaining_time": "0:36:54", "throughput": 8114.22, "total_tokens": 31217216} +{"current_steps": 9920, "total_steps": 15621, "loss": 0.3784, "lr": 7.077778950978713e-07, "epoch": 0.63504257089815, "percentage": 63.5, "elapsed_time": "1:04:07", "remaining_time": "0:36:51", "throughput": 8117.05, "total_tokens": 31233728} +{"current_steps": 9925, "total_steps": 15621, "loss": 0.4682, "lr": 7.06709482051043e-07, "epoch": 0.6353626528391269, "percentage": 63.54, "elapsed_time": "1:04:08", "remaining_time": "0:36:48", "throughput": 8119.76, "total_tokens": 31249664} +{"current_steps": 9930, "total_steps": 15621, "loss": 0.3033, "lr": 7.056414351808698e-07, "epoch": 0.6356827347801037, "percentage": 63.57, "elapsed_time": "1:04:09", "remaining_time": "0:36:46", "throughput": 8122.43, "total_tokens": 31265408} +{"current_steps": 9935, "total_steps": 15621, "loss": 0.3517, "lr": 7.045737558208206e-07, "epoch": 0.6360028167210806, "percentage": 63.6, "elapsed_time": "1:04:09", "remaining_time": "0:36:43", "throughput": 8125.08, "total_tokens": 31281088} +{"current_steps": 9940, "total_steps": 15621, "loss": 0.4014, "lr": 7.035064453039064e-07, "epoch": 0.6363228986620575, "percentage": 63.63, "elapsed_time": "1:04:10", "remaining_time": "0:36:40", "throughput": 8127.68, "total_tokens": 31296512} +{"current_steps": 9945, "total_steps": 15621, "loss": 0.3772, "lr": 7.024395049626766e-07, "epoch": 0.6366429806030344, "percentage": 63.66, "elapsed_time": "1:04:11", "remaining_time": "0:36:38", "throughput": 8130.31, "total_tokens": 31312000} +{"current_steps": 9950, "total_steps": 15621, "loss": 0.3408, "lr": 7.013729361292182e-07, "epoch": 0.6369630625440112, "percentage": 63.7, "elapsed_time": "1:04:11", "remaining_time": "0:36:35", "throughput": 8132.91, "total_tokens": 31327488} +{"current_steps": 9955, "total_steps": 15621, "loss": 0.3065, "lr": 7.003067401351554e-07, "epoch": 0.6372831444849881, "percentage": 63.73, "elapsed_time": "1:04:12", "remaining_time": "0:36:32", "throughput": 8135.71, "total_tokens": 31343936} +{"current_steps": 9960, "total_steps": 15621, "loss": 0.406, "lr": 6.992409183116465e-07, "epoch": 0.637603226425965, "percentage": 63.76, "elapsed_time": "1:04:13", "remaining_time": "0:36:30", "throughput": 8138.29, "total_tokens": 31359232} +{"current_steps": 9965, "total_steps": 15621, "loss": 0.3724, "lr": 6.981754719893826e-07, "epoch": 0.6379233083669419, "percentage": 63.79, "elapsed_time": "1:04:13", "remaining_time": "0:36:27", "throughput": 8141.08, "total_tokens": 31375616} +{"current_steps": 9970, "total_steps": 15621, "loss": 0.4679, "lr": 6.971104024985852e-07, "epoch": 0.6382433903079189, "percentage": 63.82, "elapsed_time": "1:04:14", "remaining_time": "0:36:24", "throughput": 8143.81, "total_tokens": 31391680} +{"current_steps": 9975, "total_steps": 15621, "loss": 0.3809, "lr": 6.960457111690068e-07, "epoch": 0.6385634722488958, "percentage": 63.86, "elapsed_time": "1:04:15", "remaining_time": "0:36:22", "throughput": 8146.47, "total_tokens": 31407424} +{"current_steps": 9980, "total_steps": 15621, "loss": 0.3787, "lr": 6.94981399329927e-07, "epoch": 0.6388835541898726, "percentage": 63.89, "elapsed_time": "1:04:16", "remaining_time": "0:36:19", "throughput": 8149.07, "total_tokens": 31422912} +{"current_steps": 9985, "total_steps": 15621, "loss": 0.3921, "lr": 6.939174683101509e-07, "epoch": 0.6392036361308495, "percentage": 63.92, "elapsed_time": "1:04:16", "remaining_time": "0:36:16", "throughput": 8151.79, "total_tokens": 31438912} +{"current_steps": 9990, "total_steps": 15621, "loss": 0.2898, "lr": 6.9285391943801e-07, "epoch": 0.6395237180718264, "percentage": 63.95, "elapsed_time": "1:04:17", "remaining_time": "0:36:14", "throughput": 8154.56, "total_tokens": 31455168} +{"current_steps": 9995, "total_steps": 15621, "loss": 0.3133, "lr": 6.917907540413569e-07, "epoch": 0.6398438000128033, "percentage": 63.98, "elapsed_time": "1:04:18", "remaining_time": "0:36:11", "throughput": 8157.16, "total_tokens": 31470592} +{"current_steps": 10000, "total_steps": 15621, "loss": 0.3477, "lr": 6.907279734475659e-07, "epoch": 0.6401638819537802, "percentage": 64.02, "elapsed_time": "1:04:18", "remaining_time": "0:36:08", "throughput": 8159.66, "total_tokens": 31485632} +{"current_steps": 10005, "total_steps": 15621, "loss": 0.3725, "lr": 6.896655789835317e-07, "epoch": 0.640483963894757, "percentage": 64.05, "elapsed_time": "1:04:19", "remaining_time": "0:36:06", "throughput": 8162.12, "total_tokens": 31500352} +{"current_steps": 10010, "total_steps": 15621, "loss": 0.3702, "lr": 6.886035719756656e-07, "epoch": 0.6408040458357339, "percentage": 64.08, "elapsed_time": "1:04:20", "remaining_time": "0:36:03", "throughput": 8164.94, "total_tokens": 31516928} +{"current_steps": 10015, "total_steps": 15621, "loss": 0.279, "lr": 6.875419537498959e-07, "epoch": 0.6411241277767108, "percentage": 64.11, "elapsed_time": "1:04:20", "remaining_time": "0:36:01", "throughput": 8167.57, "total_tokens": 31532608} +{"current_steps": 10020, "total_steps": 15621, "loss": 0.6005, "lr": 6.864807256316658e-07, "epoch": 0.6414442097176877, "percentage": 64.14, "elapsed_time": "1:04:21", "remaining_time": "0:35:58", "throughput": 8170.29, "total_tokens": 31548608} +{"current_steps": 10025, "total_steps": 15621, "loss": 0.4117, "lr": 6.854198889459311e-07, "epoch": 0.6417642916586647, "percentage": 64.18, "elapsed_time": "1:04:22", "remaining_time": "0:35:55", "throughput": 8172.91, "total_tokens": 31564224} +{"current_steps": 10030, "total_steps": 15621, "loss": 0.2567, "lr": 6.84359445017158e-07, "epoch": 0.6420843735996415, "percentage": 64.21, "elapsed_time": "1:04:22", "remaining_time": "0:35:53", "throughput": 8175.41, "total_tokens": 31579200} +{"current_steps": 10035, "total_steps": 15621, "loss": 0.4257, "lr": 6.832993951693244e-07, "epoch": 0.6424044555406184, "percentage": 64.24, "elapsed_time": "1:04:23", "remaining_time": "0:35:50", "throughput": 8178.03, "total_tokens": 31594816} +{"current_steps": 10040, "total_steps": 15621, "loss": 0.3547, "lr": 6.822397407259144e-07, "epoch": 0.6427245374815953, "percentage": 64.27, "elapsed_time": "1:04:24", "remaining_time": "0:35:47", "throughput": 8180.66, "total_tokens": 31610432} +{"current_steps": 10045, "total_steps": 15621, "loss": 0.3794, "lr": 6.811804830099186e-07, "epoch": 0.6430446194225722, "percentage": 64.3, "elapsed_time": "1:04:24", "remaining_time": "0:35:45", "throughput": 8183.58, "total_tokens": 31627520} +{"current_steps": 10050, "total_steps": 15621, "loss": 0.3557, "lr": 6.801216233438336e-07, "epoch": 0.6433647013635491, "percentage": 64.34, "elapsed_time": "1:04:25", "remaining_time": "0:35:42", "throughput": 8186.45, "total_tokens": 31644352} +{"current_steps": 10055, "total_steps": 15621, "loss": 0.3919, "lr": 6.790631630496575e-07, "epoch": 0.6436847833045259, "percentage": 64.37, "elapsed_time": "1:04:26", "remaining_time": "0:35:40", "throughput": 8189.1, "total_tokens": 31660160} +{"current_steps": 10060, "total_steps": 15621, "loss": 0.45, "lr": 6.780051034488903e-07, "epoch": 0.6440048652455028, "percentage": 64.4, "elapsed_time": "1:04:26", "remaining_time": "0:35:37", "throughput": 8191.84, "total_tokens": 31676352} +{"current_steps": 10065, "total_steps": 15621, "loss": 0.3409, "lr": 6.769474458625323e-07, "epoch": 0.6443249471864797, "percentage": 64.43, "elapsed_time": "1:04:27", "remaining_time": "0:35:34", "throughput": 8194.5, "total_tokens": 31692160} +{"current_steps": 10070, "total_steps": 15621, "loss": 0.316, "lr": 6.758901916110813e-07, "epoch": 0.6446450291274566, "percentage": 64.46, "elapsed_time": "1:04:28", "remaining_time": "0:35:32", "throughput": 8197.1, "total_tokens": 31707712} +{"current_steps": 10075, "total_steps": 15621, "loss": 0.3278, "lr": 6.748333420145315e-07, "epoch": 0.6449651110684336, "percentage": 64.5, "elapsed_time": "1:04:28", "remaining_time": "0:35:29", "throughput": 8199.81, "total_tokens": 31723776} +{"current_steps": 10080, "total_steps": 15621, "loss": 0.4116, "lr": 6.737768983923718e-07, "epoch": 0.6452851930094105, "percentage": 64.53, "elapsed_time": "1:04:29", "remaining_time": "0:35:27", "throughput": 8202.69, "total_tokens": 31740672} +{"current_steps": 10085, "total_steps": 15621, "loss": 0.2941, "lr": 6.727208620635849e-07, "epoch": 0.6456052749503873, "percentage": 64.56, "elapsed_time": "1:04:30", "remaining_time": "0:35:24", "throughput": 8205.18, "total_tokens": 31755648} +{"current_steps": 10090, "total_steps": 15621, "loss": 0.4488, "lr": 6.716652343466446e-07, "epoch": 0.6459253568913642, "percentage": 64.59, "elapsed_time": "1:04:30", "remaining_time": "0:35:21", "throughput": 8207.68, "total_tokens": 31770624} +{"current_steps": 10095, "total_steps": 15621, "loss": 0.3044, "lr": 6.706100165595139e-07, "epoch": 0.6462454388323411, "percentage": 64.62, "elapsed_time": "1:04:31", "remaining_time": "0:35:19", "throughput": 8210.41, "total_tokens": 31786816} +{"current_steps": 10100, "total_steps": 15621, "loss": 0.3924, "lr": 6.695552100196452e-07, "epoch": 0.646565520773318, "percentage": 64.66, "elapsed_time": "1:04:32", "remaining_time": "0:35:16", "throughput": 8212.91, "total_tokens": 31801792} +{"current_steps": 10105, "total_steps": 15621, "loss": 0.5025, "lr": 6.685008160439769e-07, "epoch": 0.6468856027142948, "percentage": 64.69, "elapsed_time": "1:04:32", "remaining_time": "0:35:14", "throughput": 8215.82, "total_tokens": 31818944} +{"current_steps": 10110, "total_steps": 15621, "loss": 0.406, "lr": 6.674468359489313e-07, "epoch": 0.6472056846552717, "percentage": 64.72, "elapsed_time": "1:04:33", "remaining_time": "0:35:11", "throughput": 8218.35, "total_tokens": 31834176} +{"current_steps": 10115, "total_steps": 15621, "loss": 0.3488, "lr": 6.663932710504163e-07, "epoch": 0.6475257665962486, "percentage": 64.75, "elapsed_time": "1:04:34", "remaining_time": "0:35:08", "throughput": 8221.03, "total_tokens": 31850176} +{"current_steps": 10120, "total_steps": 15621, "loss": 0.3845, "lr": 6.653401226638192e-07, "epoch": 0.6478458485372255, "percentage": 64.78, "elapsed_time": "1:04:34", "remaining_time": "0:35:06", "throughput": 8223.62, "total_tokens": 31865600} +{"current_steps": 10125, "total_steps": 15621, "loss": 0.3985, "lr": 6.64287392104008e-07, "epoch": 0.6481659304782024, "percentage": 64.82, "elapsed_time": "1:04:35", "remaining_time": "0:35:03", "throughput": 8226.09, "total_tokens": 31880512} +{"current_steps": 10130, "total_steps": 15621, "loss": 0.4502, "lr": 6.632350806853299e-07, "epoch": 0.6484860124191794, "percentage": 64.85, "elapsed_time": "1:04:36", "remaining_time": "0:35:01", "throughput": 8228.77, "total_tokens": 31896512} +{"current_steps": 10135, "total_steps": 15621, "loss": 0.4127, "lr": 6.621831897216074e-07, "epoch": 0.6488060943601562, "percentage": 64.88, "elapsed_time": "1:04:36", "remaining_time": "0:34:58", "throughput": 8231.5, "total_tokens": 31912768} +{"current_steps": 10140, "total_steps": 15621, "loss": 0.4332, "lr": 6.611317205261387e-07, "epoch": 0.6491261763011331, "percentage": 64.91, "elapsed_time": "1:04:37", "remaining_time": "0:34:55", "throughput": 8233.93, "total_tokens": 31927488} +{"current_steps": 10145, "total_steps": 15621, "loss": 0.3464, "lr": 6.60080674411696e-07, "epoch": 0.64944625824211, "percentage": 64.94, "elapsed_time": "1:04:38", "remaining_time": "0:34:53", "throughput": 8236.47, "total_tokens": 31942784} +{"current_steps": 10150, "total_steps": 15621, "loss": 0.3139, "lr": 6.590300526905225e-07, "epoch": 0.6497663401830869, "percentage": 64.98, "elapsed_time": "1:04:38", "remaining_time": "0:34:50", "throughput": 8239.1, "total_tokens": 31958528} +{"current_steps": 10155, "total_steps": 15621, "loss": 0.4675, "lr": 6.579798566743313e-07, "epoch": 0.6500864221240638, "percentage": 65.01, "elapsed_time": "1:04:39", "remaining_time": "0:34:48", "throughput": 8241.66, "total_tokens": 31974016} +{"current_steps": 10160, "total_steps": 15621, "loss": 0.3272, "lr": 6.569300876743049e-07, "epoch": 0.6504065040650406, "percentage": 65.04, "elapsed_time": "1:04:40", "remaining_time": "0:34:45", "throughput": 8244.48, "total_tokens": 31990720} +{"current_steps": 10165, "total_steps": 15621, "loss": 0.324, "lr": 6.558807470010923e-07, "epoch": 0.6507265860060175, "percentage": 65.07, "elapsed_time": "1:04:40", "remaining_time": "0:34:43", "throughput": 8247.26, "total_tokens": 32007168} +{"current_steps": 10166, "total_steps": 15621, "eval_loss": 0.38159435987472534, "epoch": 0.6507906023942129, "percentage": 65.08, "elapsed_time": "1:05:31", "remaining_time": "0:35:09", "throughput": 8141.57, "total_tokens": 32010176} +{"current_steps": 10170, "total_steps": 15621, "loss": 0.355, "lr": 6.548318359648071e-07, "epoch": 0.6510466679469944, "percentage": 65.1, "elapsed_time": "1:09:19", "remaining_time": "0:37:09", "throughput": 7697.71, "total_tokens": 32022208} +{"current_steps": 10175, "total_steps": 15621, "loss": 0.4036, "lr": 6.537833558750279e-07, "epoch": 0.6513667498879713, "percentage": 65.14, "elapsed_time": "1:09:20", "remaining_time": "0:37:06", "throughput": 7700.22, "total_tokens": 32037760} +{"current_steps": 10180, "total_steps": 15621, "loss": 0.3108, "lr": 6.527353080407938e-07, "epoch": 0.6516868318289483, "percentage": 65.17, "elapsed_time": "1:09:21", "remaining_time": "0:37:04", "throughput": 7702.63, "total_tokens": 32052800} +{"current_steps": 10185, "total_steps": 15621, "loss": 0.3491, "lr": 6.516876937706048e-07, "epoch": 0.6520069137699251, "percentage": 65.2, "elapsed_time": "1:09:21", "remaining_time": "0:37:01", "throughput": 7705.12, "total_tokens": 32068288} +{"current_steps": 10190, "total_steps": 15621, "loss": 0.3769, "lr": 6.506405143724196e-07, "epoch": 0.652326995710902, "percentage": 65.23, "elapsed_time": "1:09:22", "remaining_time": "0:36:58", "throughput": 7707.48, "total_tokens": 32083200} +{"current_steps": 10195, "total_steps": 15621, "loss": 0.4685, "lr": 6.495937711536546e-07, "epoch": 0.6526470776518789, "percentage": 65.26, "elapsed_time": "1:09:23", "remaining_time": "0:36:55", "throughput": 7709.93, "total_tokens": 32098432} +{"current_steps": 10200, "total_steps": 15621, "loss": 0.4177, "lr": 6.485474654211803e-07, "epoch": 0.6529671595928558, "percentage": 65.3, "elapsed_time": "1:09:23", "remaining_time": "0:36:53", "throughput": 7712.61, "total_tokens": 32114944} +{"current_steps": 10205, "total_steps": 15621, "loss": 0.3062, "lr": 6.475015984813217e-07, "epoch": 0.6532872415338327, "percentage": 65.33, "elapsed_time": "1:09:24", "remaining_time": "0:36:50", "throughput": 7715.3, "total_tokens": 32131520} +{"current_steps": 10210, "total_steps": 15621, "loss": 0.321, "lr": 6.464561716398564e-07, "epoch": 0.6536073234748095, "percentage": 65.36, "elapsed_time": "1:09:25", "remaining_time": "0:36:47", "throughput": 7717.79, "total_tokens": 32147008} +{"current_steps": 10215, "total_steps": 15621, "loss": 0.3851, "lr": 6.454111862020122e-07, "epoch": 0.6539274054157864, "percentage": 65.39, "elapsed_time": "1:09:25", "remaining_time": "0:36:44", "throughput": 7720.27, "total_tokens": 32162560} +{"current_steps": 10220, "total_steps": 15621, "loss": 0.3665, "lr": 6.443666434724649e-07, "epoch": 0.6542474873567633, "percentage": 65.42, "elapsed_time": "1:09:26", "remaining_time": "0:36:41", "throughput": 7722.56, "total_tokens": 32177024} +{"current_steps": 10225, "total_steps": 15621, "loss": 0.542, "lr": 6.43322544755339e-07, "epoch": 0.6545675692977402, "percentage": 65.46, "elapsed_time": "1:09:27", "remaining_time": "0:36:39", "throughput": 7725.13, "total_tokens": 32193024} +{"current_steps": 10230, "total_steps": 15621, "loss": 0.3447, "lr": 6.422788913542038e-07, "epoch": 0.6548876512387171, "percentage": 65.49, "elapsed_time": "1:09:27", "remaining_time": "0:36:36", "throughput": 7727.67, "total_tokens": 32208896} +{"current_steps": 10235, "total_steps": 15621, "loss": 0.338, "lr": 6.412356845720726e-07, "epoch": 0.655207733179694, "percentage": 65.52, "elapsed_time": "1:09:28", "remaining_time": "0:36:33", "throughput": 7730.31, "total_tokens": 32225280} +{"current_steps": 10240, "total_steps": 15621, "loss": 0.3601, "lr": 6.40192925711402e-07, "epoch": 0.6555278151206709, "percentage": 65.55, "elapsed_time": "1:09:29", "remaining_time": "0:36:30", "throughput": 7732.77, "total_tokens": 32240768} +{"current_steps": 10245, "total_steps": 15621, "loss": 0.3259, "lr": 6.39150616074088e-07, "epoch": 0.6558478970616478, "percentage": 65.58, "elapsed_time": "1:09:30", "remaining_time": "0:36:28", "throughput": 7735.18, "total_tokens": 32255872} +{"current_steps": 10250, "total_steps": 15621, "loss": 0.4068, "lr": 6.381087569614668e-07, "epoch": 0.6561679790026247, "percentage": 65.62, "elapsed_time": "1:09:30", "remaining_time": "0:36:25", "throughput": 7737.87, "total_tokens": 32272512} +{"current_steps": 10255, "total_steps": 15621, "loss": 0.3801, "lr": 6.370673496743116e-07, "epoch": 0.6564880609436016, "percentage": 65.65, "elapsed_time": "1:09:31", "remaining_time": "0:36:22", "throughput": 7740.03, "total_tokens": 32286272} +{"current_steps": 10260, "total_steps": 15621, "loss": 0.4224, "lr": 6.360263955128315e-07, "epoch": 0.6568081428845784, "percentage": 65.68, "elapsed_time": "1:09:32", "remaining_time": "0:36:19", "throughput": 7742.54, "total_tokens": 32301952} +{"current_steps": 10265, "total_steps": 15621, "loss": 0.3657, "lr": 6.349858957766701e-07, "epoch": 0.6571282248255553, "percentage": 65.71, "elapsed_time": "1:09:32", "remaining_time": "0:36:17", "throughput": 7745.14, "total_tokens": 32318208} +{"current_steps": 10270, "total_steps": 15621, "loss": 0.3385, "lr": 6.339458517649036e-07, "epoch": 0.6574483067665322, "percentage": 65.74, "elapsed_time": "1:09:33", "remaining_time": "0:36:14", "throughput": 7747.58, "total_tokens": 32333504} +{"current_steps": 10275, "total_steps": 15621, "loss": 0.3685, "lr": 6.329062647760395e-07, "epoch": 0.6577683887075091, "percentage": 65.78, "elapsed_time": "1:09:34", "remaining_time": "0:36:11", "throughput": 7750.29, "total_tokens": 32350208} +{"current_steps": 10280, "total_steps": 15621, "loss": 0.3259, "lr": 6.318671361080137e-07, "epoch": 0.658088470648486, "percentage": 65.81, "elapsed_time": "1:09:34", "remaining_time": "0:36:08", "throughput": 7752.71, "total_tokens": 32365376} +{"current_steps": 10285, "total_steps": 15621, "loss": 0.3411, "lr": 6.308284670581906e-07, "epoch": 0.6584085525894628, "percentage": 65.84, "elapsed_time": "1:09:35", "remaining_time": "0:36:06", "throughput": 7755.25, "total_tokens": 32381248} +{"current_steps": 10290, "total_steps": 15621, "loss": 0.47, "lr": 6.297902589233612e-07, "epoch": 0.6587286345304398, "percentage": 65.87, "elapsed_time": "1:09:36", "remaining_time": "0:36:03", "throughput": 7757.58, "total_tokens": 32395968} +{"current_steps": 10295, "total_steps": 15621, "loss": 0.3728, "lr": 6.287525129997404e-07, "epoch": 0.6590487164714167, "percentage": 65.9, "elapsed_time": "1:09:36", "remaining_time": "0:36:00", "throughput": 7760.06, "total_tokens": 32411456} +{"current_steps": 10300, "total_steps": 15621, "loss": 0.4016, "lr": 6.277152305829656e-07, "epoch": 0.6593687984123936, "percentage": 65.94, "elapsed_time": "1:09:37", "remaining_time": "0:35:58", "throughput": 7762.52, "total_tokens": 32426880} +{"current_steps": 10305, "total_steps": 15621, "loss": 0.326, "lr": 6.266784129680968e-07, "epoch": 0.6596888803533705, "percentage": 65.97, "elapsed_time": "1:09:38", "remaining_time": "0:35:55", "throughput": 7764.99, "total_tokens": 32442368} +{"current_steps": 10310, "total_steps": 15621, "loss": 0.3979, "lr": 6.256420614496129e-07, "epoch": 0.6600089622943474, "percentage": 66.0, "elapsed_time": "1:09:38", "remaining_time": "0:35:52", "throughput": 7767.47, "total_tokens": 32457920} +{"current_steps": 10315, "total_steps": 15621, "loss": 0.4182, "lr": 6.246061773214102e-07, "epoch": 0.6603290442353242, "percentage": 66.03, "elapsed_time": "1:09:39", "remaining_time": "0:35:49", "throughput": 7769.97, "total_tokens": 32473536} +{"current_steps": 10320, "total_steps": 15621, "loss": 0.4073, "lr": 6.235707618768032e-07, "epoch": 0.6606491261763011, "percentage": 66.06, "elapsed_time": "1:09:40", "remaining_time": "0:35:47", "throughput": 7772.65, "total_tokens": 32490240} +{"current_steps": 10325, "total_steps": 15621, "loss": 0.344, "lr": 6.225358164085196e-07, "epoch": 0.660969208117278, "percentage": 66.1, "elapsed_time": "1:09:40", "remaining_time": "0:35:44", "throughput": 7775.12, "total_tokens": 32505728} +{"current_steps": 10330, "total_steps": 15621, "loss": 0.3463, "lr": 6.21501342208701e-07, "epoch": 0.6612892900582549, "percentage": 66.13, "elapsed_time": "1:09:41", "remaining_time": "0:35:41", "throughput": 7777.52, "total_tokens": 32520960} +{"current_steps": 10335, "total_steps": 15621, "loss": 0.3945, "lr": 6.204673405689007e-07, "epoch": 0.6616093719992318, "percentage": 66.16, "elapsed_time": "1:09:42", "remaining_time": "0:35:38", "throughput": 7779.9, "total_tokens": 32535872} +{"current_steps": 10340, "total_steps": 15621, "loss": 0.3129, "lr": 6.194338127800823e-07, "epoch": 0.6619294539402087, "percentage": 66.19, "elapsed_time": "1:09:42", "remaining_time": "0:35:36", "throughput": 7782.56, "total_tokens": 32552448} +{"current_steps": 10345, "total_steps": 15621, "loss": 0.3936, "lr": 6.184007601326165e-07, "epoch": 0.6622495358811856, "percentage": 66.22, "elapsed_time": "1:09:43", "remaining_time": "0:35:33", "throughput": 7784.9, "total_tokens": 32567232} +{"current_steps": 10350, "total_steps": 15621, "loss": 0.37, "lr": 6.173681839162824e-07, "epoch": 0.6625696178221625, "percentage": 66.26, "elapsed_time": "1:09:44", "remaining_time": "0:35:30", "throughput": 7787.5, "total_tokens": 32583360} +{"current_steps": 10355, "total_steps": 15621, "loss": 0.3328, "lr": 6.163360854202635e-07, "epoch": 0.6628896997631394, "percentage": 66.29, "elapsed_time": "1:09:44", "remaining_time": "0:35:28", "throughput": 7789.93, "total_tokens": 32598656} +{"current_steps": 10360, "total_steps": 15621, "loss": 0.3189, "lr": 6.153044659331461e-07, "epoch": 0.6632097817041163, "percentage": 66.32, "elapsed_time": "1:09:45", "remaining_time": "0:35:25", "throughput": 7792.39, "total_tokens": 32614144} +{"current_steps": 10365, "total_steps": 15621, "loss": 0.3708, "lr": 6.142733267429203e-07, "epoch": 0.6635298636450931, "percentage": 66.35, "elapsed_time": "1:09:46", "remaining_time": "0:35:22", "throughput": 7794.75, "total_tokens": 32629120} +{"current_steps": 10370, "total_steps": 15621, "loss": 0.4218, "lr": 6.132426691369748e-07, "epoch": 0.66384994558607, "percentage": 66.38, "elapsed_time": "1:09:46", "remaining_time": "0:35:20", "throughput": 7797.46, "total_tokens": 32645952} +{"current_steps": 10375, "total_steps": 15621, "loss": 0.3955, "lr": 6.122124944020977e-07, "epoch": 0.6641700275270469, "percentage": 66.42, "elapsed_time": "1:09:47", "remaining_time": "0:35:17", "throughput": 7799.97, "total_tokens": 32661696} +{"current_steps": 10380, "total_steps": 15621, "loss": 0.3779, "lr": 6.111828038244749e-07, "epoch": 0.6644901094680238, "percentage": 66.45, "elapsed_time": "1:09:48", "remaining_time": "0:35:14", "throughput": 7802.51, "total_tokens": 32677760} +{"current_steps": 10385, "total_steps": 15621, "loss": 0.3063, "lr": 6.101535986896866e-07, "epoch": 0.6648101914090007, "percentage": 66.48, "elapsed_time": "1:09:48", "remaining_time": "0:35:11", "throughput": 7805.03, "total_tokens": 32693568} +{"current_steps": 10390, "total_steps": 15621, "loss": 0.2929, "lr": 6.091248802827076e-07, "epoch": 0.6651302733499775, "percentage": 66.51, "elapsed_time": "1:09:49", "remaining_time": "0:35:09", "throughput": 7807.43, "total_tokens": 32708736} +{"current_steps": 10395, "total_steps": 15621, "loss": 0.3258, "lr": 6.080966498879048e-07, "epoch": 0.6654503552909545, "percentage": 66.55, "elapsed_time": "1:09:50", "remaining_time": "0:35:06", "throughput": 7810.11, "total_tokens": 32725440} +{"current_steps": 10400, "total_steps": 15621, "loss": 0.293, "lr": 6.070689087890363e-07, "epoch": 0.6657704372319314, "percentage": 66.58, "elapsed_time": "1:09:50", "remaining_time": "0:35:03", "throughput": 7812.51, "total_tokens": 32740608} +{"current_steps": 10405, "total_steps": 15621, "loss": 0.4026, "lr": 6.060416582692487e-07, "epoch": 0.6660905191729083, "percentage": 66.61, "elapsed_time": "1:09:51", "remaining_time": "0:35:01", "throughput": 7815.02, "total_tokens": 32756416} +{"current_steps": 10410, "total_steps": 15621, "loss": 0.3334, "lr": 6.05014899611076e-07, "epoch": 0.6664106011138852, "percentage": 66.64, "elapsed_time": "1:09:52", "remaining_time": "0:34:58", "throughput": 7817.48, "total_tokens": 32771904} +{"current_steps": 10415, "total_steps": 15621, "loss": 0.3801, "lr": 6.039886340964391e-07, "epoch": 0.666730683054862, "percentage": 66.67, "elapsed_time": "1:09:52", "remaining_time": "0:34:55", "throughput": 7819.94, "total_tokens": 32787392} +{"current_steps": 10420, "total_steps": 15621, "loss": 0.3367, "lr": 6.029628630066423e-07, "epoch": 0.6670507649958389, "percentage": 66.71, "elapsed_time": "1:09:53", "remaining_time": "0:34:53", "throughput": 7822.44, "total_tokens": 32803136} +{"current_steps": 10425, "total_steps": 15621, "loss": 0.4266, "lr": 6.019375876223724e-07, "epoch": 0.6673708469368158, "percentage": 66.74, "elapsed_time": "1:09:54", "remaining_time": "0:34:50", "throughput": 7824.89, "total_tokens": 32818624} +{"current_steps": 10430, "total_steps": 15621, "loss": 0.4689, "lr": 6.009128092236982e-07, "epoch": 0.6676909288777927, "percentage": 66.77, "elapsed_time": "1:09:54", "remaining_time": "0:34:47", "throughput": 7827.31, "total_tokens": 32833920} +{"current_steps": 10435, "total_steps": 15621, "loss": 0.3876, "lr": 5.998885290900679e-07, "epoch": 0.6680110108187696, "percentage": 66.8, "elapsed_time": "1:09:55", "remaining_time": "0:34:45", "throughput": 7829.6, "total_tokens": 32848512} +{"current_steps": 10440, "total_steps": 15621, "loss": 0.3414, "lr": 5.988647485003061e-07, "epoch": 0.6683310927597464, "percentage": 66.83, "elapsed_time": "1:09:56", "remaining_time": "0:34:42", "throughput": 7832.25, "total_tokens": 32865088} +{"current_steps": 10445, "total_steps": 15621, "loss": 0.4652, "lr": 5.978414687326164e-07, "epoch": 0.6686511747007234, "percentage": 66.87, "elapsed_time": "1:09:56", "remaining_time": "0:34:39", "throughput": 7834.97, "total_tokens": 32882048} +{"current_steps": 10450, "total_steps": 15621, "loss": 0.3775, "lr": 5.968186910645745e-07, "epoch": 0.6689712566417003, "percentage": 66.9, "elapsed_time": "1:09:57", "remaining_time": "0:34:37", "throughput": 7837.6, "total_tokens": 32898624} +{"current_steps": 10455, "total_steps": 15621, "loss": 0.5049, "lr": 5.957964167731305e-07, "epoch": 0.6692913385826772, "percentage": 66.93, "elapsed_time": "1:09:58", "remaining_time": "0:34:34", "throughput": 7840.05, "total_tokens": 32914176} +{"current_steps": 10460, "total_steps": 15621, "loss": 0.4117, "lr": 5.947746471346065e-07, "epoch": 0.6696114205236541, "percentage": 66.96, "elapsed_time": "1:09:58", "remaining_time": "0:34:31", "throughput": 7842.76, "total_tokens": 32931136} +{"current_steps": 10465, "total_steps": 15621, "loss": 0.3321, "lr": 5.937533834246932e-07, "epoch": 0.669931502464631, "percentage": 66.99, "elapsed_time": "1:09:59", "remaining_time": "0:34:29", "throughput": 7845.39, "total_tokens": 32947648} +{"current_steps": 10470, "total_steps": 15621, "loss": 0.3795, "lr": 5.927326269184504e-07, "epoch": 0.6702515844056078, "percentage": 67.03, "elapsed_time": "1:10:00", "remaining_time": "0:34:26", "throughput": 7848.03, "total_tokens": 32964224} +{"current_steps": 10475, "total_steps": 15621, "loss": 0.4602, "lr": 5.917123788903049e-07, "epoch": 0.6705716663465847, "percentage": 67.06, "elapsed_time": "1:10:01", "remaining_time": "0:34:23", "throughput": 7850.91, "total_tokens": 32982080} +{"current_steps": 10480, "total_steps": 15621, "loss": 0.4674, "lr": 5.906926406140484e-07, "epoch": 0.6708917482875616, "percentage": 67.09, "elapsed_time": "1:10:01", "remaining_time": "0:34:21", "throughput": 7853.33, "total_tokens": 32997440} +{"current_steps": 10485, "total_steps": 15621, "loss": 0.424, "lr": 5.896734133628354e-07, "epoch": 0.6712118302285385, "percentage": 67.12, "elapsed_time": "1:10:02", "remaining_time": "0:34:18", "throughput": 7855.8, "total_tokens": 33013056} +{"current_steps": 10490, "total_steps": 15621, "loss": 0.3804, "lr": 5.886546984091838e-07, "epoch": 0.6715319121695154, "percentage": 67.15, "elapsed_time": "1:10:03", "remaining_time": "0:34:15", "throughput": 7858.21, "total_tokens": 33028416} +{"current_steps": 10495, "total_steps": 15621, "loss": 0.3567, "lr": 5.876364970249711e-07, "epoch": 0.6718519941104922, "percentage": 67.19, "elapsed_time": "1:10:03", "remaining_time": "0:34:13", "throughput": 7860.47, "total_tokens": 33042880} +{"current_steps": 10500, "total_steps": 15621, "loss": 0.2744, "lr": 5.866188104814336e-07, "epoch": 0.6721720760514692, "percentage": 67.22, "elapsed_time": "1:10:04", "remaining_time": "0:34:10", "throughput": 7862.88, "total_tokens": 33058240} +{"current_steps": 10505, "total_steps": 15621, "loss": 0.3833, "lr": 5.856016400491646e-07, "epoch": 0.6724921579924461, "percentage": 67.25, "elapsed_time": "1:10:05", "remaining_time": "0:34:07", "throughput": 7865.35, "total_tokens": 33073920} +{"current_steps": 10510, "total_steps": 15621, "loss": 0.3158, "lr": 5.845849869981136e-07, "epoch": 0.672812239933423, "percentage": 67.28, "elapsed_time": "1:10:05", "remaining_time": "0:34:05", "throughput": 7867.78, "total_tokens": 33089344} +{"current_steps": 10515, "total_steps": 15621, "loss": 0.3608, "lr": 5.835688525975842e-07, "epoch": 0.6731323218743999, "percentage": 67.31, "elapsed_time": "1:10:06", "remaining_time": "0:34:02", "throughput": 7870.13, "total_tokens": 33104384} +{"current_steps": 10520, "total_steps": 15621, "loss": 0.3926, "lr": 5.825532381162311e-07, "epoch": 0.6734524038153767, "percentage": 67.35, "elapsed_time": "1:10:07", "remaining_time": "0:33:59", "throughput": 7872.59, "total_tokens": 33120064} +{"current_steps": 10525, "total_steps": 15621, "loss": 0.3889, "lr": 5.815381448220619e-07, "epoch": 0.6737724857563536, "percentage": 67.38, "elapsed_time": "1:10:07", "remaining_time": "0:33:57", "throughput": 7875.12, "total_tokens": 33136128} +{"current_steps": 10530, "total_steps": 15621, "loss": 0.3599, "lr": 5.805235739824327e-07, "epoch": 0.6740925676973305, "percentage": 67.41, "elapsed_time": "1:10:08", "remaining_time": "0:33:54", "throughput": 7878.13, "total_tokens": 33154816} +{"current_steps": 10535, "total_steps": 15621, "loss": 0.5053, "lr": 5.795095268640458e-07, "epoch": 0.6744126496383074, "percentage": 67.44, "elapsed_time": "1:10:09", "remaining_time": "0:33:52", "throughput": 7880.49, "total_tokens": 33169920} +{"current_steps": 10540, "total_steps": 15621, "loss": 0.5436, "lr": 5.784960047329519e-07, "epoch": 0.6747327315792843, "percentage": 67.47, "elapsed_time": "1:10:09", "remaining_time": "0:33:49", "throughput": 7883.35, "total_tokens": 33187712} +{"current_steps": 10545, "total_steps": 15621, "loss": 0.3931, "lr": 5.774830088545452e-07, "epoch": 0.6750528135202611, "percentage": 67.51, "elapsed_time": "1:10:10", "remaining_time": "0:33:46", "throughput": 7885.72, "total_tokens": 33202880} +{"current_steps": 10550, "total_steps": 15621, "loss": 0.3059, "lr": 5.76470540493563e-07, "epoch": 0.6753728954612381, "percentage": 67.54, "elapsed_time": "1:10:11", "remaining_time": "0:33:44", "throughput": 7888.26, "total_tokens": 33218944} +{"current_steps": 10555, "total_steps": 15621, "loss": 0.4468, "lr": 5.754586009140836e-07, "epoch": 0.675692977402215, "percentage": 67.57, "elapsed_time": "1:10:11", "remaining_time": "0:33:41", "throughput": 7890.73, "total_tokens": 33234688} +{"current_steps": 10560, "total_steps": 15621, "loss": 0.3582, "lr": 5.744471913795256e-07, "epoch": 0.6760130593431919, "percentage": 67.6, "elapsed_time": "1:10:12", "remaining_time": "0:33:38", "throughput": 7893.12, "total_tokens": 33249920} +{"current_steps": 10565, "total_steps": 15621, "loss": 0.3455, "lr": 5.734363131526459e-07, "epoch": 0.6763331412841688, "percentage": 67.63, "elapsed_time": "1:10:13", "remaining_time": "0:33:36", "throughput": 7895.62, "total_tokens": 33265792} +{"current_steps": 10570, "total_steps": 15621, "loss": 0.3779, "lr": 5.724259674955377e-07, "epoch": 0.6766532232251457, "percentage": 67.67, "elapsed_time": "1:10:13", "remaining_time": "0:33:33", "throughput": 7897.98, "total_tokens": 33280832} +{"current_steps": 10575, "total_steps": 15621, "loss": 0.3829, "lr": 5.714161556696291e-07, "epoch": 0.6769733051661225, "percentage": 67.7, "elapsed_time": "1:10:14", "remaining_time": "0:33:31", "throughput": 7900.46, "total_tokens": 33296576} +{"current_steps": 10580, "total_steps": 15621, "loss": 0.3425, "lr": 5.704068789356824e-07, "epoch": 0.6772933871070994, "percentage": 67.73, "elapsed_time": "1:10:15", "remaining_time": "0:33:28", "throughput": 7903.71, "total_tokens": 33316672} +{"current_steps": 10585, "total_steps": 15621, "loss": 0.3569, "lr": 5.693981385537912e-07, "epoch": 0.6776134690480763, "percentage": 67.76, "elapsed_time": "1:10:15", "remaining_time": "0:33:25", "throughput": 7906.0, "total_tokens": 33331456} +{"current_steps": 10590, "total_steps": 15621, "loss": 0.3483, "lr": 5.683899357833801e-07, "epoch": 0.6779335509890532, "percentage": 67.79, "elapsed_time": "1:10:16", "remaining_time": "0:33:23", "throughput": 7908.39, "total_tokens": 33346752} +{"current_steps": 10595, "total_steps": 15621, "loss": 0.4486, "lr": 5.673822718832015e-07, "epoch": 0.67825363293003, "percentage": 67.83, "elapsed_time": "1:10:17", "remaining_time": "0:33:20", "throughput": 7910.88, "total_tokens": 33362688} +{"current_steps": 10600, "total_steps": 15621, "loss": 0.3732, "lr": 5.663751481113362e-07, "epoch": 0.6785737148710069, "percentage": 67.86, "elapsed_time": "1:10:17", "remaining_time": "0:33:17", "throughput": 7913.2, "total_tokens": 33377600} +{"current_steps": 10605, "total_steps": 15621, "loss": 0.4346, "lr": 5.653685657251896e-07, "epoch": 0.6788937968119839, "percentage": 67.89, "elapsed_time": "1:10:18", "remaining_time": "0:33:15", "throughput": 7915.64, "total_tokens": 33393280} +{"current_steps": 10610, "total_steps": 15621, "loss": 0.378, "lr": 5.643625259814922e-07, "epoch": 0.6792138787529608, "percentage": 67.92, "elapsed_time": "1:10:19", "remaining_time": "0:33:12", "throughput": 7918.3, "total_tokens": 33410112} +{"current_steps": 10615, "total_steps": 15621, "loss": 0.3557, "lr": 5.633570301362953e-07, "epoch": 0.6795339606939377, "percentage": 67.95, "elapsed_time": "1:10:20", "remaining_time": "0:33:10", "throughput": 7920.85, "total_tokens": 33426624} +{"current_steps": 10620, "total_steps": 15621, "loss": 0.3642, "lr": 5.623520794449739e-07, "epoch": 0.6798540426349146, "percentage": 67.99, "elapsed_time": "1:10:20", "remaining_time": "0:33:07", "throughput": 7923.3, "total_tokens": 33442240} +{"current_steps": 10625, "total_steps": 15621, "loss": 0.4764, "lr": 5.613476751622195e-07, "epoch": 0.6801741245758914, "percentage": 68.02, "elapsed_time": "1:10:21", "remaining_time": "0:33:04", "throughput": 7925.84, "total_tokens": 33458432} +{"current_steps": 10630, "total_steps": 15621, "loss": 0.4373, "lr": 5.603438185420426e-07, "epoch": 0.6804942065168683, "percentage": 68.05, "elapsed_time": "1:10:22", "remaining_time": "0:33:02", "throughput": 7928.26, "total_tokens": 33473856} +{"current_steps": 10635, "total_steps": 15621, "loss": 0.473, "lr": 5.593405108377714e-07, "epoch": 0.6808142884578452, "percentage": 68.08, "elapsed_time": "1:10:22", "remaining_time": "0:32:59", "throughput": 7930.65, "total_tokens": 33489216} +{"current_steps": 10640, "total_steps": 15621, "loss": 0.4676, "lr": 5.583377533020457e-07, "epoch": 0.6811343703988221, "percentage": 68.11, "elapsed_time": "1:10:23", "remaining_time": "0:32:57", "throughput": 7933.17, "total_tokens": 33505280} +{"current_steps": 10645, "total_steps": 15621, "loss": 0.2929, "lr": 5.573355471868201e-07, "epoch": 0.681454452339799, "percentage": 68.15, "elapsed_time": "1:10:24", "remaining_time": "0:32:54", "throughput": 7935.54, "total_tokens": 33520512} +{"current_steps": 10650, "total_steps": 15621, "loss": 0.3535, "lr": 5.563338937433621e-07, "epoch": 0.6817745342807758, "percentage": 68.18, "elapsed_time": "1:10:24", "remaining_time": "0:32:51", "throughput": 7938.19, "total_tokens": 33537344} +{"current_steps": 10655, "total_steps": 15621, "loss": 0.2518, "lr": 5.553327942222472e-07, "epoch": 0.6820946162217527, "percentage": 68.21, "elapsed_time": "1:10:25", "remaining_time": "0:32:49", "throughput": 7940.47, "total_tokens": 33552128} +{"current_steps": 10660, "total_steps": 15621, "loss": 0.3535, "lr": 5.54332249873359e-07, "epoch": 0.6824146981627297, "percentage": 68.24, "elapsed_time": "1:10:26", "remaining_time": "0:32:46", "throughput": 7942.74, "total_tokens": 33566784} +{"current_steps": 10665, "total_steps": 15621, "loss": 0.2955, "lr": 5.533322619458896e-07, "epoch": 0.6827347801037066, "percentage": 68.27, "elapsed_time": "1:10:26", "remaining_time": "0:32:44", "throughput": 7945.12, "total_tokens": 33582080} +{"current_steps": 10670, "total_steps": 15621, "loss": 0.4268, "lr": 5.52332831688336e-07, "epoch": 0.6830548620446835, "percentage": 68.31, "elapsed_time": "1:10:27", "remaining_time": "0:32:41", "throughput": 7947.4, "total_tokens": 33596864} +{"current_steps": 10675, "total_steps": 15621, "loss": 0.3527, "lr": 5.513339603484981e-07, "epoch": 0.6833749439856603, "percentage": 68.34, "elapsed_time": "1:10:28", "remaining_time": "0:32:38", "throughput": 7949.93, "total_tokens": 33613056} +{"current_steps": 10680, "total_steps": 15621, "loss": 0.4979, "lr": 5.503356491734785e-07, "epoch": 0.6836950259266372, "percentage": 68.37, "elapsed_time": "1:10:28", "remaining_time": "0:32:36", "throughput": 7952.26, "total_tokens": 33628160} +{"current_steps": 10685, "total_steps": 15621, "loss": 0.4457, "lr": 5.493378994096806e-07, "epoch": 0.6840151078676141, "percentage": 68.4, "elapsed_time": "1:10:29", "remaining_time": "0:32:33", "throughput": 7954.95, "total_tokens": 33645184} +{"current_steps": 10690, "total_steps": 15621, "loss": 0.39, "lr": 5.483407123028067e-07, "epoch": 0.684335189808591, "percentage": 68.43, "elapsed_time": "1:10:30", "remaining_time": "0:32:31", "throughput": 7957.36, "total_tokens": 33660800} +{"current_steps": 10695, "total_steps": 15621, "loss": 0.4734, "lr": 5.473440890978566e-07, "epoch": 0.6846552717495679, "percentage": 68.47, "elapsed_time": "1:10:30", "remaining_time": "0:32:28", "throughput": 7959.85, "total_tokens": 33676736} +{"current_steps": 10700, "total_steps": 15621, "loss": 0.4094, "lr": 5.463480310391261e-07, "epoch": 0.6849753536905447, "percentage": 68.5, "elapsed_time": "1:10:31", "remaining_time": "0:32:26", "throughput": 7962.37, "total_tokens": 33692928} +{"current_steps": 10705, "total_steps": 15621, "loss": 0.3824, "lr": 5.453525393702052e-07, "epoch": 0.6852954356315216, "percentage": 68.53, "elapsed_time": "1:10:32", "remaining_time": "0:32:23", "throughput": 7964.76, "total_tokens": 33708352} +{"current_steps": 10710, "total_steps": 15621, "loss": 0.3687, "lr": 5.443576153339771e-07, "epoch": 0.6856155175724986, "percentage": 68.56, "elapsed_time": "1:10:32", "remaining_time": "0:32:20", "throughput": 7967.18, "total_tokens": 33723968} +{"current_steps": 10715, "total_steps": 15621, "loss": 0.3238, "lr": 5.433632601726159e-07, "epoch": 0.6859355995134755, "percentage": 68.59, "elapsed_time": "1:10:33", "remaining_time": "0:32:18", "throughput": 7969.53, "total_tokens": 33739200} +{"current_steps": 10720, "total_steps": 15621, "loss": 0.3306, "lr": 5.42369475127586e-07, "epoch": 0.6862556814544524, "percentage": 68.63, "elapsed_time": "1:10:34", "remaining_time": "0:32:15", "throughput": 7971.97, "total_tokens": 33754944} +{"current_steps": 10725, "total_steps": 15621, "loss": 0.4715, "lr": 5.413762614396396e-07, "epoch": 0.6865757633954293, "percentage": 68.66, "elapsed_time": "1:10:34", "remaining_time": "0:32:13", "throughput": 7974.19, "total_tokens": 33769472} +{"current_steps": 10730, "total_steps": 15621, "loss": 0.4267, "lr": 5.403836203488157e-07, "epoch": 0.6868958453364061, "percentage": 68.69, "elapsed_time": "1:10:35", "remaining_time": "0:32:10", "throughput": 7976.57, "total_tokens": 33784896} +{"current_steps": 10735, "total_steps": 15621, "loss": 0.3686, "lr": 5.393915530944382e-07, "epoch": 0.687215927277383, "percentage": 68.72, "elapsed_time": "1:10:36", "remaining_time": "0:32:08", "throughput": 7978.94, "total_tokens": 33800320} +{"current_steps": 10740, "total_steps": 15621, "loss": 0.3743, "lr": 5.384000609151145e-07, "epoch": 0.6875360092183599, "percentage": 68.75, "elapsed_time": "1:10:36", "remaining_time": "0:32:05", "throughput": 7981.53, "total_tokens": 33816896} +{"current_steps": 10745, "total_steps": 15621, "loss": 0.3655, "lr": 5.374091450487353e-07, "epoch": 0.6878560911593368, "percentage": 68.79, "elapsed_time": "1:10:37", "remaining_time": "0:32:02", "throughput": 7984.1, "total_tokens": 33833344} +{"current_steps": 10750, "total_steps": 15621, "loss": 0.3346, "lr": 5.364188067324693e-07, "epoch": 0.6881761731003136, "percentage": 68.82, "elapsed_time": "1:10:38", "remaining_time": "0:32:00", "throughput": 7986.67, "total_tokens": 33849856} +{"current_steps": 10755, "total_steps": 15621, "loss": 0.3566, "lr": 5.354290472027659e-07, "epoch": 0.6884962550412905, "percentage": 68.85, "elapsed_time": "1:10:38", "remaining_time": "0:31:57", "throughput": 7989.07, "total_tokens": 33865344} +{"current_steps": 10760, "total_steps": 15621, "loss": 0.4921, "lr": 5.344398676953525e-07, "epoch": 0.6888163369822674, "percentage": 68.88, "elapsed_time": "1:10:39", "remaining_time": "0:31:55", "throughput": 7991.65, "total_tokens": 33881792} +{"current_steps": 10765, "total_steps": 15621, "loss": 0.4873, "lr": 5.334512694452303e-07, "epoch": 0.6891364189232444, "percentage": 68.91, "elapsed_time": "1:10:40", "remaining_time": "0:31:52", "throughput": 7994.22, "total_tokens": 33898368} +{"current_steps": 10770, "total_steps": 15621, "loss": 0.345, "lr": 5.324632536866755e-07, "epoch": 0.6894565008642213, "percentage": 68.95, "elapsed_time": "1:10:41", "remaining_time": "0:31:50", "throughput": 7996.7, "total_tokens": 33914368} +{"current_steps": 10775, "total_steps": 15621, "loss": 0.349, "lr": 5.314758216532386e-07, "epoch": 0.6897765828051982, "percentage": 68.98, "elapsed_time": "1:10:41", "remaining_time": "0:31:47", "throughput": 7999.07, "total_tokens": 33929728} +{"current_steps": 10780, "total_steps": 15621, "loss": 0.3866, "lr": 5.304889745777396e-07, "epoch": 0.690096664746175, "percentage": 69.01, "elapsed_time": "1:10:42", "remaining_time": "0:31:45", "throughput": 8001.35, "total_tokens": 33944704} +{"current_steps": 10785, "total_steps": 15621, "loss": 0.6472, "lr": 5.295027136922678e-07, "epoch": 0.6904167466871519, "percentage": 69.04, "elapsed_time": "1:10:43", "remaining_time": "0:31:42", "throughput": 8003.73, "total_tokens": 33960128} +{"current_steps": 10790, "total_steps": 15621, "loss": 0.4201, "lr": 5.285170402281827e-07, "epoch": 0.6907368286281288, "percentage": 69.07, "elapsed_time": "1:10:43", "remaining_time": "0:31:40", "throughput": 8006.02, "total_tokens": 33975104} +{"current_steps": 10795, "total_steps": 15621, "loss": 0.459, "lr": 5.275319554161087e-07, "epoch": 0.6910569105691057, "percentage": 69.11, "elapsed_time": "1:10:44", "remaining_time": "0:31:37", "throughput": 8008.43, "total_tokens": 33990720} +{"current_steps": 10800, "total_steps": 15621, "loss": 0.4207, "lr": 5.265474604859356e-07, "epoch": 0.6913769925100826, "percentage": 69.14, "elapsed_time": "1:10:45", "remaining_time": "0:31:34", "throughput": 8010.82, "total_tokens": 34006272} +{"current_steps": 10805, "total_steps": 15621, "loss": 0.3828, "lr": 5.255635566668171e-07, "epoch": 0.6916970744510594, "percentage": 69.17, "elapsed_time": "1:10:45", "remaining_time": "0:31:32", "throughput": 8013.32, "total_tokens": 34022400} +{"current_steps": 10810, "total_steps": 15621, "loss": 0.3811, "lr": 5.245802451871686e-07, "epoch": 0.6920171563920363, "percentage": 69.2, "elapsed_time": "1:10:46", "remaining_time": "0:31:29", "throughput": 8015.86, "total_tokens": 34038720} +{"current_steps": 10815, "total_steps": 15621, "loss": 0.4381, "lr": 5.235975272746663e-07, "epoch": 0.6923372383330133, "percentage": 69.23, "elapsed_time": "1:10:47", "remaining_time": "0:31:27", "throughput": 8018.18, "total_tokens": 34053760} +{"current_steps": 10820, "total_steps": 15621, "loss": 0.3033, "lr": 5.226154041562442e-07, "epoch": 0.6926573202739902, "percentage": 69.27, "elapsed_time": "1:10:47", "remaining_time": "0:31:24", "throughput": 8020.62, "total_tokens": 34069568} +{"current_steps": 10825, "total_steps": 15621, "loss": 0.4078, "lr": 5.216338770580953e-07, "epoch": 0.6929774022149671, "percentage": 69.3, "elapsed_time": "1:10:48", "remaining_time": "0:31:22", "throughput": 8023.34, "total_tokens": 34086912} +{"current_steps": 10830, "total_steps": 15621, "loss": 0.359, "lr": 5.206529472056678e-07, "epoch": 0.6932974841559439, "percentage": 69.33, "elapsed_time": "1:10:49", "remaining_time": "0:31:19", "throughput": 8025.6, "total_tokens": 34101696} +{"current_steps": 10835, "total_steps": 15621, "loss": 0.3084, "lr": 5.196726158236637e-07, "epoch": 0.6936175660969208, "percentage": 69.36, "elapsed_time": "1:10:49", "remaining_time": "0:31:17", "throughput": 8027.76, "total_tokens": 34115904} +{"current_steps": 10840, "total_steps": 15621, "loss": 0.3404, "lr": 5.186928841360384e-07, "epoch": 0.6939376480378977, "percentage": 69.39, "elapsed_time": "1:10:50", "remaining_time": "0:31:14", "throughput": 8030.14, "total_tokens": 34131328} +{"current_steps": 10845, "total_steps": 15621, "loss": 0.4466, "lr": 5.177137533659985e-07, "epoch": 0.6942577299788746, "percentage": 69.43, "elapsed_time": "1:10:51", "remaining_time": "0:31:12", "throughput": 8032.83, "total_tokens": 34148544} +{"current_steps": 10850, "total_steps": 15621, "loss": 0.4562, "lr": 5.167352247360002e-07, "epoch": 0.6945778119198515, "percentage": 69.46, "elapsed_time": "1:10:51", "remaining_time": "0:31:09", "throughput": 8035.13, "total_tokens": 34163520} +{"current_steps": 10855, "total_steps": 15621, "loss": 0.398, "lr": 5.157572994677479e-07, "epoch": 0.6948978938608283, "percentage": 69.49, "elapsed_time": "1:10:52", "remaining_time": "0:31:07", "throughput": 8037.41, "total_tokens": 34178368} +{"current_steps": 10860, "total_steps": 15621, "loss": 0.4086, "lr": 5.147799787821929e-07, "epoch": 0.6952179758018052, "percentage": 69.52, "elapsed_time": "1:10:53", "remaining_time": "0:31:04", "throughput": 8039.8, "total_tokens": 34193920} +{"current_steps": 10865, "total_steps": 15621, "loss": 0.4939, "lr": 5.138032638995315e-07, "epoch": 0.6955380577427821, "percentage": 69.55, "elapsed_time": "1:10:53", "remaining_time": "0:31:02", "throughput": 8042.33, "total_tokens": 34210176} +{"current_steps": 10870, "total_steps": 15621, "loss": 0.3602, "lr": 5.128271560392037e-07, "epoch": 0.6958581396837591, "percentage": 69.59, "elapsed_time": "1:10:54", "remaining_time": "0:30:59", "throughput": 8045.01, "total_tokens": 34227328} +{"current_steps": 10875, "total_steps": 15621, "loss": 0.3959, "lr": 5.118516564198916e-07, "epoch": 0.696178221624736, "percentage": 69.62, "elapsed_time": "1:10:55", "remaining_time": "0:30:56", "throughput": 8047.24, "total_tokens": 34241984} +{"current_steps": 10880, "total_steps": 15621, "loss": 0.3339, "lr": 5.108767662595175e-07, "epoch": 0.6964983035657129, "percentage": 69.65, "elapsed_time": "1:10:55", "remaining_time": "0:30:54", "throughput": 8049.51, "total_tokens": 34256896} +{"current_steps": 10885, "total_steps": 15621, "loss": 0.3904, "lr": 5.099024867752446e-07, "epoch": 0.6968183855066897, "percentage": 69.68, "elapsed_time": "1:10:56", "remaining_time": "0:30:51", "throughput": 8052.12, "total_tokens": 34273792} +{"current_steps": 10890, "total_steps": 15621, "loss": 0.3381, "lr": 5.089288191834709e-07, "epoch": 0.6971384674476666, "percentage": 69.71, "elapsed_time": "1:10:57", "remaining_time": "0:30:49", "throughput": 8054.75, "total_tokens": 34290752} +{"current_steps": 10895, "total_steps": 15621, "loss": 0.3422, "lr": 5.079557646998318e-07, "epoch": 0.6974585493886435, "percentage": 69.75, "elapsed_time": "1:10:57", "remaining_time": "0:30:46", "throughput": 8057.52, "total_tokens": 34308416} +{"current_steps": 10900, "total_steps": 15621, "loss": 0.3981, "lr": 5.069833245391981e-07, "epoch": 0.6977786313296204, "percentage": 69.78, "elapsed_time": "1:10:58", "remaining_time": "0:30:44", "throughput": 8059.87, "total_tokens": 34323776} +{"current_steps": 10905, "total_steps": 15621, "loss": 0.2941, "lr": 5.060114999156728e-07, "epoch": 0.6980987132705972, "percentage": 69.81, "elapsed_time": "1:10:59", "remaining_time": "0:30:41", "throughput": 8062.18, "total_tokens": 34338944} +{"current_steps": 10910, "total_steps": 15621, "loss": 0.3407, "lr": 5.050402920425895e-07, "epoch": 0.6984187952115741, "percentage": 69.84, "elapsed_time": "1:10:59", "remaining_time": "0:30:39", "throughput": 8064.56, "total_tokens": 34354432} +{"current_steps": 10915, "total_steps": 15621, "loss": 0.2503, "lr": 5.040697021325128e-07, "epoch": 0.698738877152551, "percentage": 69.87, "elapsed_time": "1:11:00", "remaining_time": "0:30:36", "throughput": 8067.0, "total_tokens": 34370432} +{"current_steps": 10920, "total_steps": 15621, "loss": 0.4438, "lr": 5.030997313972361e-07, "epoch": 0.699058959093528, "percentage": 69.91, "elapsed_time": "1:11:01", "remaining_time": "0:30:34", "throughput": 8069.47, "total_tokens": 34386496} +{"current_steps": 10925, "total_steps": 15621, "loss": 0.3692, "lr": 5.021303810477795e-07, "epoch": 0.6993790410345049, "percentage": 69.94, "elapsed_time": "1:11:01", "remaining_time": "0:30:31", "throughput": 8071.95, "total_tokens": 34402560} +{"current_steps": 10930, "total_steps": 15621, "loss": 0.2937, "lr": 5.011616522943869e-07, "epoch": 0.6996991229754818, "percentage": 69.97, "elapsed_time": "1:11:02", "remaining_time": "0:30:29", "throughput": 8074.4, "total_tokens": 34418496} +{"current_steps": 10935, "total_steps": 15621, "loss": 0.2772, "lr": 5.001935463465289e-07, "epoch": 0.7000192049164586, "percentage": 70.0, "elapsed_time": "1:11:03", "remaining_time": "0:30:26", "throughput": 8076.89, "total_tokens": 34434752} +{"current_steps": 10940, "total_steps": 15621, "loss": 0.3775, "lr": 4.99226064412897e-07, "epoch": 0.7003392868574355, "percentage": 70.03, "elapsed_time": "1:11:04", "remaining_time": "0:30:24", "throughput": 8079.25, "total_tokens": 34450176} +{"current_steps": 10945, "total_steps": 15621, "loss": 0.4286, "lr": 4.982592077014026e-07, "epoch": 0.7006593687984124, "percentage": 70.07, "elapsed_time": "1:11:04", "remaining_time": "0:30:21", "throughput": 8081.62, "total_tokens": 34465600} +{"current_steps": 10948, "total_steps": 15621, "eval_loss": 0.3744131922721863, "epoch": 0.7008514179629985, "percentage": 70.09, "elapsed_time": "1:11:55", "remaining_time": "0:30:42", "throughput": 7988.2, "total_tokens": 34475136} +{"current_steps": 10950, "total_steps": 15621, "loss": 0.2973, "lr": 4.97292977419179e-07, "epoch": 0.7009794507393893, "percentage": 70.1, "elapsed_time": "1:13:55", "remaining_time": "0:31:32", "throughput": 7773.4, "total_tokens": 34481600} +{"current_steps": 10955, "total_steps": 15621, "loss": 0.2881, "lr": 4.963273747725755e-07, "epoch": 0.7012995326803662, "percentage": 70.13, "elapsed_time": "1:13:56", "remaining_time": "0:31:29", "throughput": 7776.02, "total_tokens": 34498752} +{"current_steps": 10960, "total_steps": 15621, "loss": 0.413, "lr": 4.953624009671582e-07, "epoch": 0.701619614621343, "percentage": 70.16, "elapsed_time": "1:13:57", "remaining_time": "0:31:27", "throughput": 7778.35, "total_tokens": 34514240} +{"current_steps": 10965, "total_steps": 15621, "loss": 0.4164, "lr": 4.943980572077086e-07, "epoch": 0.7019396965623199, "percentage": 70.19, "elapsed_time": "1:13:57", "remaining_time": "0:31:24", "throughput": 7780.5, "total_tokens": 34528704} +{"current_steps": 10970, "total_steps": 15621, "loss": 0.3207, "lr": 4.934343446982209e-07, "epoch": 0.7022597785032968, "percentage": 70.23, "elapsed_time": "1:13:58", "remaining_time": "0:31:21", "throughput": 7782.91, "total_tokens": 34544704} +{"current_steps": 10975, "total_steps": 15621, "loss": 0.3836, "lr": 4.924712646419016e-07, "epoch": 0.7025798604442738, "percentage": 70.26, "elapsed_time": "1:13:59", "remaining_time": "0:31:19", "throughput": 7785.19, "total_tokens": 34560000} +{"current_steps": 10980, "total_steps": 15621, "loss": 0.3222, "lr": 4.915088182411674e-07, "epoch": 0.7028999423852507, "percentage": 70.29, "elapsed_time": "1:13:59", "remaining_time": "0:31:16", "throughput": 7787.49, "total_tokens": 34575296} +{"current_steps": 10985, "total_steps": 15621, "loss": 0.3897, "lr": 4.905470066976439e-07, "epoch": 0.7032200243262275, "percentage": 70.32, "elapsed_time": "1:14:00", "remaining_time": "0:31:14", "throughput": 7789.75, "total_tokens": 34590528} +{"current_steps": 10990, "total_steps": 15621, "loss": 0.4156, "lr": 4.895858312121644e-07, "epoch": 0.7035401062672044, "percentage": 70.35, "elapsed_time": "1:14:01", "remaining_time": "0:31:11", "throughput": 7791.96, "total_tokens": 34605312} +{"current_steps": 10995, "total_steps": 15621, "loss": 0.4342, "lr": 4.886252929847674e-07, "epoch": 0.7038601882081813, "percentage": 70.39, "elapsed_time": "1:14:01", "remaining_time": "0:31:08", "throughput": 7794.27, "total_tokens": 34620736} +{"current_steps": 11000, "total_steps": 15621, "loss": 0.4627, "lr": 4.876653932146963e-07, "epoch": 0.7041802701491582, "percentage": 70.42, "elapsed_time": "1:14:02", "remaining_time": "0:31:06", "throughput": 7796.67, "total_tokens": 34636736} +{"current_steps": 11005, "total_steps": 15621, "loss": 0.3895, "lr": 4.86706133100397e-07, "epoch": 0.7045003520901351, "percentage": 70.45, "elapsed_time": "1:14:03", "remaining_time": "0:31:03", "throughput": 7798.91, "total_tokens": 34651776} +{"current_steps": 11010, "total_steps": 15621, "loss": 0.2889, "lr": 4.857475138395178e-07, "epoch": 0.7048204340311119, "percentage": 70.48, "elapsed_time": "1:14:03", "remaining_time": "0:31:01", "throughput": 7801.04, "total_tokens": 34666176} +{"current_steps": 11015, "total_steps": 15621, "loss": 0.2493, "lr": 4.847895366289054e-07, "epoch": 0.7051405159720888, "percentage": 70.51, "elapsed_time": "1:14:04", "remaining_time": "0:30:58", "throughput": 7803.44, "total_tokens": 34682112} +{"current_steps": 11020, "total_steps": 15621, "loss": 0.3825, "lr": 4.838322026646057e-07, "epoch": 0.7054605979130657, "percentage": 70.55, "elapsed_time": "1:14:05", "remaining_time": "0:30:55", "throughput": 7805.66, "total_tokens": 34697024} +{"current_steps": 11025, "total_steps": 15621, "loss": 0.371, "lr": 4.82875513141861e-07, "epoch": 0.7057806798540426, "percentage": 70.58, "elapsed_time": "1:14:05", "remaining_time": "0:30:53", "throughput": 7808.0, "total_tokens": 34712704} +{"current_steps": 11030, "total_steps": 15621, "loss": 0.375, "lr": 4.819194692551106e-07, "epoch": 0.7061007617950196, "percentage": 70.61, "elapsed_time": "1:14:06", "remaining_time": "0:30:50", "throughput": 7810.32, "total_tokens": 34728256} +{"current_steps": 11035, "total_steps": 15621, "loss": 0.435, "lr": 4.809640721979855e-07, "epoch": 0.7064208437359965, "percentage": 70.64, "elapsed_time": "1:14:07", "remaining_time": "0:30:48", "throughput": 7812.76, "total_tokens": 34744512} +{"current_steps": 11040, "total_steps": 15621, "loss": 0.4181, "lr": 4.8000932316331e-07, "epoch": 0.7067409256769733, "percentage": 70.67, "elapsed_time": "1:14:07", "remaining_time": "0:30:45", "throughput": 7814.89, "total_tokens": 34758912} +{"current_steps": 11045, "total_steps": 15621, "loss": 0.3914, "lr": 4.790552233431002e-07, "epoch": 0.7070610076179502, "percentage": 70.71, "elapsed_time": "1:14:08", "remaining_time": "0:30:43", "throughput": 7817.29, "total_tokens": 34774848} +{"current_steps": 11050, "total_steps": 15621, "loss": 0.416, "lr": 4.781017739285611e-07, "epoch": 0.7073810895589271, "percentage": 70.74, "elapsed_time": "1:14:09", "remaining_time": "0:30:40", "throughput": 7819.54, "total_tokens": 34790016} +{"current_steps": 11055, "total_steps": 15621, "loss": 0.3528, "lr": 4.771489761100842e-07, "epoch": 0.707701171499904, "percentage": 70.77, "elapsed_time": "1:14:09", "remaining_time": "0:30:37", "throughput": 7821.77, "total_tokens": 34804992} +{"current_steps": 11060, "total_steps": 15621, "loss": 0.2746, "lr": 4.761968310772501e-07, "epoch": 0.7080212534408808, "percentage": 70.8, "elapsed_time": "1:14:10", "remaining_time": "0:30:35", "throughput": 7824.04, "total_tokens": 34820288} +{"current_steps": 11065, "total_steps": 15621, "loss": 0.2814, "lr": 4.7524534001882267e-07, "epoch": 0.7083413353818577, "percentage": 70.83, "elapsed_time": "1:14:11", "remaining_time": "0:30:32", "throughput": 7826.41, "total_tokens": 34836096} +{"current_steps": 11070, "total_steps": 15621, "loss": 0.3875, "lr": 4.7429450412274897e-07, "epoch": 0.7086614173228346, "percentage": 70.87, "elapsed_time": "1:14:11", "remaining_time": "0:30:30", "throughput": 7828.72, "total_tokens": 34851584} +{"current_steps": 11075, "total_steps": 15621, "loss": 0.3542, "lr": 4.733443245761596e-07, "epoch": 0.7089814992638115, "percentage": 70.9, "elapsed_time": "1:14:12", "remaining_time": "0:30:27", "throughput": 7831.19, "total_tokens": 34868032} +{"current_steps": 11080, "total_steps": 15621, "loss": 0.3826, "lr": 4.723948025653646e-07, "epoch": 0.7093015812047885, "percentage": 70.93, "elapsed_time": "1:14:13", "remaining_time": "0:30:25", "throughput": 7833.58, "total_tokens": 34884032} +{"current_steps": 11085, "total_steps": 15621, "loss": 0.3252, "lr": 4.714459392758534e-07, "epoch": 0.7096216631457654, "percentage": 70.96, "elapsed_time": "1:14:13", "remaining_time": "0:30:22", "throughput": 7835.87, "total_tokens": 34899456} +{"current_steps": 11090, "total_steps": 15621, "loss": 0.3772, "lr": 4.70497735892293e-07, "epoch": 0.7099417450867422, "percentage": 70.99, "elapsed_time": "1:14:14", "remaining_time": "0:30:19", "throughput": 7838.27, "total_tokens": 34915456} +{"current_steps": 11095, "total_steps": 15621, "loss": 0.3408, "lr": 4.695501935985263e-07, "epoch": 0.7102618270277191, "percentage": 71.03, "elapsed_time": "1:14:15", "remaining_time": "0:30:17", "throughput": 7840.64, "total_tokens": 34931328} +{"current_steps": 11100, "total_steps": 15621, "loss": 0.4064, "lr": 4.686033135775711e-07, "epoch": 0.710581908968696, "percentage": 71.06, "elapsed_time": "1:14:15", "remaining_time": "0:30:14", "throughput": 7842.92, "total_tokens": 34946816} +{"current_steps": 11105, "total_steps": 15621, "loss": 0.3274, "lr": 4.6765709701161817e-07, "epoch": 0.7109019909096729, "percentage": 71.09, "elapsed_time": "1:14:16", "remaining_time": "0:30:12", "throughput": 7845.61, "total_tokens": 34964544} +{"current_steps": 11110, "total_steps": 15621, "loss": 0.3861, "lr": 4.6671154508203003e-07, "epoch": 0.7112220728506498, "percentage": 71.12, "elapsed_time": "1:14:17", "remaining_time": "0:30:09", "throughput": 7848.28, "total_tokens": 34982208} +{"current_steps": 11115, "total_steps": 15621, "loss": 0.3523, "lr": 4.657666589693393e-07, "epoch": 0.7115421547916266, "percentage": 71.15, "elapsed_time": "1:14:18", "remaining_time": "0:30:07", "throughput": 7851.07, "total_tokens": 35000576} +{"current_steps": 11120, "total_steps": 15621, "loss": 0.3167, "lr": 4.6482243985324753e-07, "epoch": 0.7118622367326035, "percentage": 71.19, "elapsed_time": "1:14:18", "remaining_time": "0:30:04", "throughput": 7853.17, "total_tokens": 35014912} +{"current_steps": 11125, "total_steps": 15621, "loss": 0.2867, "lr": 4.638788889126232e-07, "epoch": 0.7121823186735804, "percentage": 71.22, "elapsed_time": "1:14:19", "remaining_time": "0:30:02", "throughput": 7855.33, "total_tokens": 35029632} +{"current_steps": 11130, "total_steps": 15621, "loss": 0.3423, "lr": 4.6293600732550085e-07, "epoch": 0.7125024006145573, "percentage": 71.25, "elapsed_time": "1:14:20", "remaining_time": "0:29:59", "throughput": 7857.61, "total_tokens": 35044992} +{"current_steps": 11135, "total_steps": 15621, "loss": 0.4721, "lr": 4.619937962690792e-07, "epoch": 0.7128224825555343, "percentage": 71.28, "elapsed_time": "1:14:20", "remaining_time": "0:29:57", "throughput": 7859.91, "total_tokens": 35060544} +{"current_steps": 11140, "total_steps": 15621, "loss": 0.5205, "lr": 4.610522569197197e-07, "epoch": 0.7131425644965111, "percentage": 71.31, "elapsed_time": "1:14:21", "remaining_time": "0:29:54", "throughput": 7862.13, "total_tokens": 35075648} +{"current_steps": 11145, "total_steps": 15621, "loss": 0.3271, "lr": 4.6011139045294554e-07, "epoch": 0.713462646437488, "percentage": 71.35, "elapsed_time": "1:14:22", "remaining_time": "0:29:52", "throughput": 7864.38, "total_tokens": 35090880} +{"current_steps": 11150, "total_steps": 15621, "loss": 0.3935, "lr": 4.59171198043439e-07, "epoch": 0.7137827283784649, "percentage": 71.38, "elapsed_time": "1:14:22", "remaining_time": "0:29:49", "throughput": 7866.68, "total_tokens": 35106432} +{"current_steps": 11155, "total_steps": 15621, "loss": 0.4446, "lr": 4.582316808650424e-07, "epoch": 0.7141028103194418, "percentage": 71.41, "elapsed_time": "1:14:23", "remaining_time": "0:29:46", "throughput": 7868.93, "total_tokens": 35121664} +{"current_steps": 11160, "total_steps": 15621, "loss": 0.4704, "lr": 4.572928400907529e-07, "epoch": 0.7144228922604187, "percentage": 71.44, "elapsed_time": "1:14:24", "remaining_time": "0:29:44", "throughput": 7871.21, "total_tokens": 35137152} +{"current_steps": 11165, "total_steps": 15621, "loss": 0.3787, "lr": 4.5635467689272434e-07, "epoch": 0.7147429742013955, "percentage": 71.47, "elapsed_time": "1:14:24", "remaining_time": "0:29:41", "throughput": 7873.58, "total_tokens": 35153088} +{"current_steps": 11170, "total_steps": 15621, "loss": 0.3674, "lr": 4.554171924422655e-07, "epoch": 0.7150630561423724, "percentage": 71.51, "elapsed_time": "1:14:25", "remaining_time": "0:29:39", "throughput": 7875.8, "total_tokens": 35168192} +{"current_steps": 11175, "total_steps": 15621, "loss": 0.3288, "lr": 4.544803879098356e-07, "epoch": 0.7153831380833493, "percentage": 71.54, "elapsed_time": "1:14:26", "remaining_time": "0:29:36", "throughput": 7878.17, "total_tokens": 35184192} +{"current_steps": 11180, "total_steps": 15621, "loss": 0.3703, "lr": 4.535442644650462e-07, "epoch": 0.7157032200243262, "percentage": 71.57, "elapsed_time": "1:14:26", "remaining_time": "0:29:34", "throughput": 7880.55, "total_tokens": 35200256} +{"current_steps": 11185, "total_steps": 15621, "loss": 0.4906, "lr": 4.5260882327665906e-07, "epoch": 0.7160233019653032, "percentage": 71.6, "elapsed_time": "1:14:27", "remaining_time": "0:29:31", "throughput": 7882.67, "total_tokens": 35214720} +{"current_steps": 11190, "total_steps": 15621, "loss": 0.5148, "lr": 4.5167406551258347e-07, "epoch": 0.71634338390628, "percentage": 71.63, "elapsed_time": "1:14:28", "remaining_time": "0:29:29", "throughput": 7885.02, "total_tokens": 35230720} +{"current_steps": 11195, "total_steps": 15621, "loss": 0.3863, "lr": 4.5073999233987445e-07, "epoch": 0.7166634658472569, "percentage": 71.67, "elapsed_time": "1:14:28", "remaining_time": "0:29:26", "throughput": 7887.34, "total_tokens": 35246400} +{"current_steps": 11200, "total_steps": 15621, "loss": 0.47, "lr": 4.4980660492473434e-07, "epoch": 0.7169835477882338, "percentage": 71.7, "elapsed_time": "1:14:29", "remaining_time": "0:29:24", "throughput": 7889.78, "total_tokens": 35262784} +{"current_steps": 11205, "total_steps": 15621, "loss": 0.2775, "lr": 4.4887390443250804e-07, "epoch": 0.7173036297292107, "percentage": 71.73, "elapsed_time": "1:14:30", "remaining_time": "0:29:21", "throughput": 7891.96, "total_tokens": 35277632} +{"current_steps": 11210, "total_steps": 15621, "loss": 0.2913, "lr": 4.4794189202768295e-07, "epoch": 0.7176237116701876, "percentage": 71.76, "elapsed_time": "1:14:30", "remaining_time": "0:29:19", "throughput": 7894.15, "total_tokens": 35292544} +{"current_steps": 11215, "total_steps": 15621, "loss": 0.368, "lr": 4.4701056887388757e-07, "epoch": 0.7179437936111644, "percentage": 71.79, "elapsed_time": "1:14:31", "remaining_time": "0:29:16", "throughput": 7896.5, "total_tokens": 35308352} +{"current_steps": 11220, "total_steps": 15621, "loss": 0.3343, "lr": 4.460799361338897e-07, "epoch": 0.7182638755521413, "percentage": 71.83, "elapsed_time": "1:14:32", "remaining_time": "0:29:14", "throughput": 7898.79, "total_tokens": 35323904} +{"current_steps": 11225, "total_steps": 15621, "loss": 0.4156, "lr": 4.451499949695954e-07, "epoch": 0.7185839574931182, "percentage": 71.86, "elapsed_time": "1:14:32", "remaining_time": "0:29:11", "throughput": 7901.21, "total_tokens": 35340224} +{"current_steps": 11230, "total_steps": 15621, "loss": 0.375, "lr": 4.44220746542047e-07, "epoch": 0.7189040394340951, "percentage": 71.89, "elapsed_time": "1:14:33", "remaining_time": "0:29:09", "throughput": 7903.5, "total_tokens": 35355776} +{"current_steps": 11235, "total_steps": 15621, "loss": 0.4772, "lr": 4.432921920114221e-07, "epoch": 0.719224121375072, "percentage": 71.92, "elapsed_time": "1:14:34", "remaining_time": "0:29:06", "throughput": 7905.75, "total_tokens": 35371072} +{"current_steps": 11240, "total_steps": 15621, "loss": 0.3169, "lr": 4.4236433253703185e-07, "epoch": 0.719544203316049, "percentage": 71.95, "elapsed_time": "1:14:34", "remaining_time": "0:29:04", "throughput": 7908.18, "total_tokens": 35387520} +{"current_steps": 11245, "total_steps": 15621, "loss": 0.3928, "lr": 4.4143716927732e-07, "epoch": 0.7198642852570258, "percentage": 71.99, "elapsed_time": "1:14:35", "remaining_time": "0:29:01", "throughput": 7910.61, "total_tokens": 35403840} +{"current_steps": 11250, "total_steps": 15621, "loss": 0.3873, "lr": 4.405107033898604e-07, "epoch": 0.7201843671980027, "percentage": 72.02, "elapsed_time": "1:14:36", "remaining_time": "0:28:59", "throughput": 7913.0, "total_tokens": 35420032} +{"current_steps": 11255, "total_steps": 15621, "loss": 0.2845, "lr": 4.395849360313568e-07, "epoch": 0.7205044491389796, "percentage": 72.05, "elapsed_time": "1:14:36", "remaining_time": "0:28:56", "throughput": 7915.37, "total_tokens": 35436032} +{"current_steps": 11260, "total_steps": 15621, "loss": 0.3583, "lr": 4.386598683576406e-07, "epoch": 0.7208245310799565, "percentage": 72.08, "elapsed_time": "1:14:37", "remaining_time": "0:28:54", "throughput": 7917.57, "total_tokens": 35451136} +{"current_steps": 11265, "total_steps": 15621, "loss": 0.4711, "lr": 4.377355015236696e-07, "epoch": 0.7211446130209334, "percentage": 72.11, "elapsed_time": "1:14:38", "remaining_time": "0:28:51", "throughput": 7919.87, "total_tokens": 35466816} +{"current_steps": 11270, "total_steps": 15621, "loss": 0.3555, "lr": 4.368118366835266e-07, "epoch": 0.7214646949619102, "percentage": 72.15, "elapsed_time": "1:14:38", "remaining_time": "0:28:49", "throughput": 7922.35, "total_tokens": 35483456} +{"current_steps": 11275, "total_steps": 15621, "loss": 0.4612, "lr": 4.358888749904177e-07, "epoch": 0.7217847769028871, "percentage": 72.18, "elapsed_time": "1:14:39", "remaining_time": "0:28:46", "throughput": 7924.74, "total_tokens": 35499584} +{"current_steps": 11280, "total_steps": 15621, "loss": 0.3546, "lr": 4.349666175966725e-07, "epoch": 0.722104858843864, "percentage": 72.21, "elapsed_time": "1:14:40", "remaining_time": "0:28:44", "throughput": 7927.05, "total_tokens": 35515328} +{"current_steps": 11285, "total_steps": 15621, "loss": 0.4744, "lr": 4.340450656537392e-07, "epoch": 0.7224249407848409, "percentage": 72.24, "elapsed_time": "1:14:40", "remaining_time": "0:28:41", "throughput": 7929.18, "total_tokens": 35530048} +{"current_steps": 11290, "total_steps": 15621, "loss": 0.2965, "lr": 4.331242203121861e-07, "epoch": 0.7227450227258178, "percentage": 72.27, "elapsed_time": "1:14:41", "remaining_time": "0:28:39", "throughput": 7931.51, "total_tokens": 35545792} +{"current_steps": 11295, "total_steps": 15621, "loss": 0.3871, "lr": 4.322040827217004e-07, "epoch": 0.7230651046667947, "percentage": 72.31, "elapsed_time": "1:14:42", "remaining_time": "0:28:36", "throughput": 7933.8, "total_tokens": 35561344} +{"current_steps": 11300, "total_steps": 15621, "loss": 0.405, "lr": 4.312846540310838e-07, "epoch": 0.7233851866077716, "percentage": 72.34, "elapsed_time": "1:14:42", "remaining_time": "0:28:34", "throughput": 7936.11, "total_tokens": 35577024} +{"current_steps": 11305, "total_steps": 15621, "loss": 0.3728, "lr": 4.3036593538825373e-07, "epoch": 0.7237052685487485, "percentage": 72.37, "elapsed_time": "1:14:43", "remaining_time": "0:28:31", "throughput": 7938.32, "total_tokens": 35592192} +{"current_steps": 11310, "total_steps": 15621, "loss": 0.3287, "lr": 4.2944792794024196e-07, "epoch": 0.7240253504897254, "percentage": 72.4, "elapsed_time": "1:14:44", "remaining_time": "0:28:29", "throughput": 7940.64, "total_tokens": 35607872} +{"current_steps": 11315, "total_steps": 15621, "loss": 0.3117, "lr": 4.285306328331915e-07, "epoch": 0.7243454324307023, "percentage": 72.43, "elapsed_time": "1:14:44", "remaining_time": "0:28:26", "throughput": 7943.0, "total_tokens": 35623872} +{"current_steps": 11320, "total_steps": 15621, "loss": 0.3067, "lr": 4.2761405121235506e-07, "epoch": 0.7246655143716791, "percentage": 72.47, "elapsed_time": "1:14:45", "remaining_time": "0:28:24", "throughput": 7945.15, "total_tokens": 35638720} +{"current_steps": 11325, "total_steps": 15621, "loss": 0.5403, "lr": 4.266981842220965e-07, "epoch": 0.724985596312656, "percentage": 72.5, "elapsed_time": "1:14:46", "remaining_time": "0:28:21", "throughput": 7947.68, "total_tokens": 35655680} +{"current_steps": 11330, "total_steps": 15621, "loss": 0.2708, "lr": 4.257830330058864e-07, "epoch": 0.7253056782536329, "percentage": 72.53, "elapsed_time": "1:14:46", "remaining_time": "0:28:19", "throughput": 7949.96, "total_tokens": 35671168} +{"current_steps": 11335, "total_steps": 15621, "loss": 0.4088, "lr": 4.248685987063019e-07, "epoch": 0.7256257601946098, "percentage": 72.56, "elapsed_time": "1:14:47", "remaining_time": "0:28:16", "throughput": 7952.26, "total_tokens": 35686848} +{"current_steps": 11340, "total_steps": 15621, "loss": 0.3478, "lr": 4.2395488246502396e-07, "epoch": 0.7259458421355867, "percentage": 72.59, "elapsed_time": "1:14:48", "remaining_time": "0:28:14", "throughput": 7954.61, "total_tokens": 35702720} +{"current_steps": 11345, "total_steps": 15621, "loss": 0.4566, "lr": 4.2304188542283913e-07, "epoch": 0.7262659240765637, "percentage": 72.63, "elapsed_time": "1:14:49", "remaining_time": "0:28:11", "throughput": 7957.3, "total_tokens": 35720640} +{"current_steps": 11350, "total_steps": 15621, "loss": 0.3923, "lr": 4.221296087196347e-07, "epoch": 0.7265860060175405, "percentage": 72.66, "elapsed_time": "1:14:49", "remaining_time": "0:28:09", "throughput": 7959.44, "total_tokens": 35735424} +{"current_steps": 11355, "total_steps": 15621, "loss": 0.4596, "lr": 4.2121805349439867e-07, "epoch": 0.7269060879585174, "percentage": 72.69, "elapsed_time": "1:14:50", "remaining_time": "0:28:06", "throughput": 7961.76, "total_tokens": 35751168} +{"current_steps": 11360, "total_steps": 15621, "loss": 0.3787, "lr": 4.203072208852184e-07, "epoch": 0.7272261698994943, "percentage": 72.72, "elapsed_time": "1:14:51", "remaining_time": "0:28:04", "throughput": 7964.1, "total_tokens": 35767168} +{"current_steps": 11365, "total_steps": 15621, "loss": 0.439, "lr": 4.193971120292793e-07, "epoch": 0.7275462518404712, "percentage": 72.75, "elapsed_time": "1:14:51", "remaining_time": "0:28:02", "throughput": 7966.33, "total_tokens": 35782464} +{"current_steps": 11370, "total_steps": 15621, "loss": 0.406, "lr": 4.184877280628629e-07, "epoch": 0.727866333781448, "percentage": 72.79, "elapsed_time": "1:14:52", "remaining_time": "0:27:59", "throughput": 7968.69, "total_tokens": 35798592} +{"current_steps": 11375, "total_steps": 15621, "loss": 0.396, "lr": 4.1757907012134565e-07, "epoch": 0.7281864157224249, "percentage": 72.82, "elapsed_time": "1:14:53", "remaining_time": "0:27:57", "throughput": 7971.07, "total_tokens": 35814720} +{"current_steps": 11380, "total_steps": 15621, "loss": 0.2826, "lr": 4.166711393391978e-07, "epoch": 0.7285064976634018, "percentage": 72.85, "elapsed_time": "1:14:53", "remaining_time": "0:27:54", "throughput": 7973.3, "total_tokens": 35830016} +{"current_steps": 11385, "total_steps": 15621, "loss": 0.345, "lr": 4.1576393684998146e-07, "epoch": 0.7288265796043787, "percentage": 72.88, "elapsed_time": "1:14:54", "remaining_time": "0:27:52", "throughput": 7975.58, "total_tokens": 35845632} +{"current_steps": 11390, "total_steps": 15621, "loss": 0.3556, "lr": 4.1485746378634966e-07, "epoch": 0.7291466615453556, "percentage": 72.91, "elapsed_time": "1:14:55", "remaining_time": "0:27:49", "throughput": 7977.86, "total_tokens": 35861184} +{"current_steps": 11395, "total_steps": 15621, "loss": 0.4311, "lr": 4.1395172128004473e-07, "epoch": 0.7294667434863324, "percentage": 72.95, "elapsed_time": "1:14:55", "remaining_time": "0:27:47", "throughput": 7980.17, "total_tokens": 35876864} +{"current_steps": 11400, "total_steps": 15621, "loss": 0.3318, "lr": 4.130467104618963e-07, "epoch": 0.7297868254273094, "percentage": 72.98, "elapsed_time": "1:14:56", "remaining_time": "0:27:44", "throughput": 7982.65, "total_tokens": 35893568} +{"current_steps": 11405, "total_steps": 15621, "loss": 0.3364, "lr": 4.1214243246182223e-07, "epoch": 0.7301069073682863, "percentage": 73.01, "elapsed_time": "1:14:57", "remaining_time": "0:27:42", "throughput": 7985.01, "total_tokens": 35909696} +{"current_steps": 11410, "total_steps": 15621, "loss": 0.5046, "lr": 4.1123888840882306e-07, "epoch": 0.7304269893092632, "percentage": 73.04, "elapsed_time": "1:14:57", "remaining_time": "0:27:39", "throughput": 7987.26, "total_tokens": 35925120} +{"current_steps": 11415, "total_steps": 15621, "loss": 0.3223, "lr": 4.1033607943098415e-07, "epoch": 0.7307470712502401, "percentage": 73.07, "elapsed_time": "1:14:58", "remaining_time": "0:27:37", "throughput": 7989.56, "total_tokens": 35940800} +{"current_steps": 11420, "total_steps": 15621, "loss": 0.3444, "lr": 4.0943400665547423e-07, "epoch": 0.731067153191217, "percentage": 73.11, "elapsed_time": "1:14:59", "remaining_time": "0:27:35", "throughput": 7991.78, "total_tokens": 35955968} +{"current_steps": 11425, "total_steps": 15621, "loss": 0.3449, "lr": 4.0853267120854064e-07, "epoch": 0.7313872351321938, "percentage": 73.14, "elapsed_time": "1:14:59", "remaining_time": "0:27:32", "throughput": 7994.14, "total_tokens": 35972096} +{"current_steps": 11430, "total_steps": 15621, "loss": 0.3315, "lr": 4.076320742155117e-07, "epoch": 0.7317073170731707, "percentage": 73.17, "elapsed_time": "1:15:00", "remaining_time": "0:27:30", "throughput": 7996.25, "total_tokens": 35986624} +{"current_steps": 11435, "total_steps": 15621, "loss": 0.3493, "lr": 4.067322168007928e-07, "epoch": 0.7320273990141476, "percentage": 73.2, "elapsed_time": "1:15:01", "remaining_time": "0:27:27", "throughput": 7998.66, "total_tokens": 36003008} +{"current_steps": 11440, "total_steps": 15621, "loss": 0.3597, "lr": 4.0583310008786775e-07, "epoch": 0.7323474809551245, "percentage": 73.23, "elapsed_time": "1:15:01", "remaining_time": "0:27:25", "throughput": 8000.68, "total_tokens": 36017152} +{"current_steps": 11445, "total_steps": 15621, "loss": 0.271, "lr": 4.049347251992932e-07, "epoch": 0.7326675628961014, "percentage": 73.27, "elapsed_time": "1:15:02", "remaining_time": "0:27:22", "throughput": 8002.82, "total_tokens": 36031936} +{"current_steps": 11450, "total_steps": 15621, "loss": 0.353, "lr": 4.0403709325670064e-07, "epoch": 0.7329876448370783, "percentage": 73.3, "elapsed_time": "1:15:03", "remaining_time": "0:27:20", "throughput": 8005.19, "total_tokens": 36048064} +{"current_steps": 11455, "total_steps": 15621, "loss": 0.4653, "lr": 4.03140205380795e-07, "epoch": 0.7333077267780552, "percentage": 73.33, "elapsed_time": "1:15:03", "remaining_time": "0:27:17", "throughput": 8007.56, "total_tokens": 36064256} +{"current_steps": 11460, "total_steps": 15621, "loss": 0.6715, "lr": 4.0224406269135115e-07, "epoch": 0.7336278087190321, "percentage": 73.36, "elapsed_time": "1:15:04", "remaining_time": "0:27:15", "throughput": 8009.76, "total_tokens": 36079424} +{"current_steps": 11465, "total_steps": 15621, "loss": 0.3111, "lr": 4.0134866630721266e-07, "epoch": 0.733947890660009, "percentage": 73.39, "elapsed_time": "1:15:05", "remaining_time": "0:27:13", "throughput": 8012.1, "total_tokens": 36095424} +{"current_steps": 11470, "total_steps": 15621, "loss": 0.3618, "lr": 4.0045401734629367e-07, "epoch": 0.7342679726009859, "percentage": 73.43, "elapsed_time": "1:15:05", "remaining_time": "0:27:10", "throughput": 8014.43, "total_tokens": 36111360} +{"current_steps": 11475, "total_steps": 15621, "loss": 0.3825, "lr": 3.9956011692557377e-07, "epoch": 0.7345880545419627, "percentage": 73.46, "elapsed_time": "1:15:06", "remaining_time": "0:27:08", "throughput": 8016.75, "total_tokens": 36127232} +{"current_steps": 11480, "total_steps": 15621, "loss": 0.3532, "lr": 3.986669661610972e-07, "epoch": 0.7349081364829396, "percentage": 73.49, "elapsed_time": "1:15:07", "remaining_time": "0:27:05", "throughput": 8019.07, "total_tokens": 36143168} +{"current_steps": 11485, "total_steps": 15621, "loss": 0.3323, "lr": 3.9777456616797414e-07, "epoch": 0.7352282184239165, "percentage": 73.52, "elapsed_time": "1:15:07", "remaining_time": "0:27:03", "throughput": 8021.27, "total_tokens": 36158272} +{"current_steps": 11490, "total_steps": 15621, "loss": 0.3731, "lr": 3.968829180603761e-07, "epoch": 0.7355483003648934, "percentage": 73.55, "elapsed_time": "1:15:08", "remaining_time": "0:27:00", "throughput": 8023.4, "total_tokens": 36173056} +{"current_steps": 11495, "total_steps": 15621, "loss": 0.3927, "lr": 3.9599202295153624e-07, "epoch": 0.7358683823058703, "percentage": 73.59, "elapsed_time": "1:15:09", "remaining_time": "0:26:58", "throughput": 8025.54, "total_tokens": 36187904} +{"current_steps": 11500, "total_steps": 15621, "loss": 0.3596, "lr": 3.951018819537476e-07, "epoch": 0.7361884642468471, "percentage": 73.62, "elapsed_time": "1:15:09", "remaining_time": "0:26:56", "throughput": 8028.17, "total_tokens": 36205632} +{"current_steps": 11505, "total_steps": 15621, "loss": 0.3478, "lr": 3.942124961783616e-07, "epoch": 0.7365085461878241, "percentage": 73.65, "elapsed_time": "1:15:10", "remaining_time": "0:26:53", "throughput": 8030.27, "total_tokens": 36220160} +{"current_steps": 11510, "total_steps": 15621, "loss": 0.3164, "lr": 3.933238667357869e-07, "epoch": 0.736828628128801, "percentage": 73.68, "elapsed_time": "1:15:11", "remaining_time": "0:26:51", "throughput": 8032.65, "total_tokens": 36236416} +{"current_steps": 11515, "total_steps": 15621, "loss": 0.3449, "lr": 3.924359947354876e-07, "epoch": 0.7371487100697779, "percentage": 73.71, "elapsed_time": "1:15:11", "remaining_time": "0:26:48", "throughput": 8034.84, "total_tokens": 36251584} +{"current_steps": 11520, "total_steps": 15621, "loss": 0.3289, "lr": 3.915488812859826e-07, "epoch": 0.7374687920107548, "percentage": 73.75, "elapsed_time": "1:15:12", "remaining_time": "0:26:46", "throughput": 8036.88, "total_tokens": 36265856} +{"current_steps": 11525, "total_steps": 15621, "loss": 0.3927, "lr": 3.90662527494843e-07, "epoch": 0.7377888739517316, "percentage": 73.78, "elapsed_time": "1:15:13", "remaining_time": "0:26:43", "throughput": 8039.57, "total_tokens": 36283904} +{"current_steps": 11530, "total_steps": 15621, "loss": 0.3627, "lr": 3.8977693446869285e-07, "epoch": 0.7381089558927085, "percentage": 73.81, "elapsed_time": "1:15:13", "remaining_time": "0:26:41", "throughput": 8041.64, "total_tokens": 36298432} +{"current_steps": 11535, "total_steps": 15621, "loss": 0.3247, "lr": 3.8889210331320445e-07, "epoch": 0.7384290378336854, "percentage": 73.84, "elapsed_time": "1:15:14", "remaining_time": "0:26:39", "throughput": 8043.87, "total_tokens": 36313728} +{"current_steps": 11540, "total_steps": 15621, "loss": 0.3595, "lr": 3.8800803513310033e-07, "epoch": 0.7387491197746623, "percentage": 73.87, "elapsed_time": "1:15:15", "remaining_time": "0:26:36", "throughput": 8046.09, "total_tokens": 36329088} +{"current_steps": 11545, "total_steps": 15621, "loss": 0.4255, "lr": 3.8712473103214993e-07, "epoch": 0.7390692017156392, "percentage": 73.91, "elapsed_time": "1:15:15", "remaining_time": "0:26:34", "throughput": 8048.41, "total_tokens": 36345024} +{"current_steps": 11550, "total_steps": 15621, "loss": 0.3089, "lr": 3.862421921131688e-07, "epoch": 0.739389283656616, "percentage": 73.94, "elapsed_time": "1:15:16", "remaining_time": "0:26:31", "throughput": 8050.88, "total_tokens": 36361792} +{"current_steps": 11555, "total_steps": 15621, "loss": 0.2832, "lr": 3.85360419478017e-07, "epoch": 0.739709365597593, "percentage": 73.97, "elapsed_time": "1:15:17", "remaining_time": "0:26:29", "throughput": 8053.11, "total_tokens": 36377152} +{"current_steps": 11560, "total_steps": 15621, "loss": 0.3552, "lr": 3.8447941422759786e-07, "epoch": 0.7400294475385699, "percentage": 74.0, "elapsed_time": "1:15:17", "remaining_time": "0:26:27", "throughput": 8055.58, "total_tokens": 36394048} +{"current_steps": 11565, "total_steps": 15621, "loss": 0.3684, "lr": 3.835991774618579e-07, "epoch": 0.7403495294795468, "percentage": 74.03, "elapsed_time": "1:15:18", "remaining_time": "0:26:24", "throughput": 8057.75, "total_tokens": 36409152} +{"current_steps": 11570, "total_steps": 15621, "loss": 0.3859, "lr": 3.827197102797818e-07, "epoch": 0.7406696114205237, "percentage": 74.07, "elapsed_time": "1:15:19", "remaining_time": "0:26:22", "throughput": 8060.4, "total_tokens": 36427072} +{"current_steps": 11575, "total_steps": 15621, "loss": 0.4771, "lr": 3.818410137793947e-07, "epoch": 0.7409896933615006, "percentage": 74.1, "elapsed_time": "1:15:19", "remaining_time": "0:26:19", "throughput": 8062.91, "total_tokens": 36444288} +{"current_steps": 11580, "total_steps": 15621, "loss": 0.4402, "lr": 3.809630890577602e-07, "epoch": 0.7413097753024774, "percentage": 74.13, "elapsed_time": "1:15:20", "remaining_time": "0:26:17", "throughput": 8065.21, "total_tokens": 36460096} +{"current_steps": 11585, "total_steps": 15621, "loss": 0.3388, "lr": 3.800859372109777e-07, "epoch": 0.7416298572434543, "percentage": 74.16, "elapsed_time": "1:15:21", "remaining_time": "0:26:15", "throughput": 8067.4, "total_tokens": 36475264} +{"current_steps": 11590, "total_steps": 15621, "loss": 0.325, "lr": 3.7920955933418055e-07, "epoch": 0.7419499391844312, "percentage": 74.19, "elapsed_time": "1:15:21", "remaining_time": "0:26:12", "throughput": 8069.73, "total_tokens": 36491264} +{"current_steps": 11595, "total_steps": 15621, "loss": 0.3245, "lr": 3.7833395652153775e-07, "epoch": 0.7422700211254081, "percentage": 74.23, "elapsed_time": "1:15:22", "remaining_time": "0:26:10", "throughput": 8071.9, "total_tokens": 36506368} +{"current_steps": 11600, "total_steps": 15621, "loss": 0.3117, "lr": 3.774591298662497e-07, "epoch": 0.742590103066385, "percentage": 74.26, "elapsed_time": "1:15:23", "remaining_time": "0:26:07", "throughput": 8074.25, "total_tokens": 36522432} +{"current_steps": 11605, "total_steps": 15621, "loss": 0.4221, "lr": 3.765850804605468e-07, "epoch": 0.7429101850073618, "percentage": 74.29, "elapsed_time": "1:15:24", "remaining_time": "0:26:05", "throughput": 8076.68, "total_tokens": 36539008} +{"current_steps": 11610, "total_steps": 15621, "loss": 0.2818, "lr": 3.7571180939569104e-07, "epoch": 0.7432302669483388, "percentage": 74.32, "elapsed_time": "1:15:24", "remaining_time": "0:26:03", "throughput": 8078.87, "total_tokens": 36554240} +{"current_steps": 11615, "total_steps": 15621, "loss": 0.3181, "lr": 3.748393177619711e-07, "epoch": 0.7435503488893157, "percentage": 74.36, "elapsed_time": "1:15:25", "remaining_time": "0:26:00", "throughput": 8081.13, "total_tokens": 36569920} +{"current_steps": 11620, "total_steps": 15621, "loss": 0.3139, "lr": 3.739676066487032e-07, "epoch": 0.7438704308302926, "percentage": 74.39, "elapsed_time": "1:15:26", "remaining_time": "0:25:58", "throughput": 8083.43, "total_tokens": 36585792} +{"current_steps": 11625, "total_steps": 15621, "loss": 0.2923, "lr": 3.730966771442289e-07, "epoch": 0.7441905127712695, "percentage": 74.42, "elapsed_time": "1:15:26", "remaining_time": "0:25:56", "throughput": 8085.66, "total_tokens": 36601280} +{"current_steps": 11630, "total_steps": 15621, "loss": 0.5229, "lr": 3.722265303359137e-07, "epoch": 0.7445105947122463, "percentage": 74.45, "elapsed_time": "1:15:27", "remaining_time": "0:25:53", "throughput": 8087.95, "total_tokens": 36617152} +{"current_steps": 11635, "total_steps": 15621, "loss": 0.4046, "lr": 3.713571673101463e-07, "epoch": 0.7448306766532232, "percentage": 74.48, "elapsed_time": "1:15:28", "remaining_time": "0:25:51", "throughput": 8090.16, "total_tokens": 36632512} +{"current_steps": 11640, "total_steps": 15621, "loss": 0.344, "lr": 3.704885891523366e-07, "epoch": 0.7451507585942001, "percentage": 74.52, "elapsed_time": "1:15:28", "remaining_time": "0:25:48", "throughput": 8092.35, "total_tokens": 36647744} +{"current_steps": 11645, "total_steps": 15621, "loss": 0.3938, "lr": 3.696207969469146e-07, "epoch": 0.745470840535177, "percentage": 74.55, "elapsed_time": "1:15:29", "remaining_time": "0:25:46", "throughput": 8094.6, "total_tokens": 36663360} +{"current_steps": 11650, "total_steps": 15621, "loss": 0.373, "lr": 3.6875379177732913e-07, "epoch": 0.7457909224761539, "percentage": 74.58, "elapsed_time": "1:15:30", "remaining_time": "0:25:44", "throughput": 8096.79, "total_tokens": 36678656} +{"current_steps": 11655, "total_steps": 15621, "loss": 0.5096, "lr": 3.6788757472604634e-07, "epoch": 0.7461110044171307, "percentage": 74.61, "elapsed_time": "1:15:30", "remaining_time": "0:25:41", "throughput": 8098.99, "total_tokens": 36693952} +{"current_steps": 11660, "total_steps": 15621, "loss": 0.3264, "lr": 3.6702214687454825e-07, "epoch": 0.7464310863581076, "percentage": 74.64, "elapsed_time": "1:15:31", "remaining_time": "0:25:39", "throughput": 8101.3, "total_tokens": 36709888} +{"current_steps": 11665, "total_steps": 15621, "loss": 0.3066, "lr": 3.6615750930333177e-07, "epoch": 0.7467511682990846, "percentage": 74.68, "elapsed_time": "1:15:32", "remaining_time": "0:25:36", "throughput": 8103.54, "total_tokens": 36725504} +{"current_steps": 11670, "total_steps": 15621, "loss": 0.3025, "lr": 3.65293663091907e-07, "epoch": 0.7470712502400615, "percentage": 74.71, "elapsed_time": "1:15:32", "remaining_time": "0:25:34", "throughput": 8105.85, "total_tokens": 36741376} +{"current_steps": 11675, "total_steps": 15621, "loss": 0.435, "lr": 3.6443060931879623e-07, "epoch": 0.7473913321810384, "percentage": 74.74, "elapsed_time": "1:15:33", "remaining_time": "0:25:32", "throughput": 8108.07, "total_tokens": 36756864} +{"current_steps": 11680, "total_steps": 15621, "loss": 0.4612, "lr": 3.635683490615321e-07, "epoch": 0.7477114141220152, "percentage": 74.77, "elapsed_time": "1:15:34", "remaining_time": "0:25:29", "throughput": 8110.34, "total_tokens": 36772608} +{"current_steps": 11685, "total_steps": 15621, "loss": 0.3057, "lr": 3.6270688339665634e-07, "epoch": 0.7480314960629921, "percentage": 74.8, "elapsed_time": "1:15:34", "remaining_time": "0:25:27", "throughput": 8112.61, "total_tokens": 36788352} +{"current_steps": 11690, "total_steps": 15621, "loss": 0.3581, "lr": 3.6184621339972e-07, "epoch": 0.748351578003969, "percentage": 74.84, "elapsed_time": "1:15:35", "remaining_time": "0:25:25", "throughput": 8114.87, "total_tokens": 36804096} +{"current_steps": 11695, "total_steps": 15621, "loss": 0.3592, "lr": 3.609863401452786e-07, "epoch": 0.7486716599449459, "percentage": 74.87, "elapsed_time": "1:15:36", "remaining_time": "0:25:22", "throughput": 8117.14, "total_tokens": 36819776} +{"current_steps": 11700, "total_steps": 15621, "loss": 0.4102, "lr": 3.6012726470689416e-07, "epoch": 0.7489917418859228, "percentage": 74.9, "elapsed_time": "1:15:36", "remaining_time": "0:25:20", "throughput": 8119.34, "total_tokens": 36835072} +{"current_steps": 11705, "total_steps": 15621, "loss": 0.3346, "lr": 3.592689881571329e-07, "epoch": 0.7493118238268996, "percentage": 74.93, "elapsed_time": "1:15:37", "remaining_time": "0:25:18", "throughput": 8121.6, "total_tokens": 36850816} +{"current_steps": 11710, "total_steps": 15621, "loss": 0.4205, "lr": 3.5841151156756334e-07, "epoch": 0.7496319057678765, "percentage": 74.96, "elapsed_time": "1:15:38", "remaining_time": "0:25:15", "throughput": 8123.83, "total_tokens": 36866368} +{"current_steps": 11715, "total_steps": 15621, "loss": 0.4196, "lr": 3.575548360087539e-07, "epoch": 0.7499519877088535, "percentage": 75.0, "elapsed_time": "1:15:38", "remaining_time": "0:25:13", "throughput": 8126.63, "total_tokens": 36885376} +{"current_steps": 11720, "total_steps": 15621, "loss": 0.3191, "lr": 3.5669896255027533e-07, "epoch": 0.7502720696498304, "percentage": 75.03, "elapsed_time": "1:15:39", "remaining_time": "0:25:10", "throughput": 8128.74, "total_tokens": 36900288} +{"current_steps": 11725, "total_steps": 15621, "loss": 0.3892, "lr": 3.5584389226069543e-07, "epoch": 0.7505921515908073, "percentage": 75.06, "elapsed_time": "1:15:40", "remaining_time": "0:25:08", "throughput": 8131.04, "total_tokens": 36916224} +{"current_steps": 11730, "total_steps": 15621, "loss": 0.3097, "lr": 3.5498962620757866e-07, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "1:15:40", "remaining_time": "0:25:06", "throughput": 8133.25, "total_tokens": 36931648} +{"current_steps": 11730, "total_steps": 15621, "eval_loss": 0.36731547117233276, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "1:16:31", "remaining_time": "0:25:23", "throughput": 8043.65, "total_tokens": 36931648} +{"current_steps": 11735, "total_steps": 15621, "loss": 0.4301, "lr": 3.5413616545748713e-07, "epoch": 0.751232315472761, "percentage": 75.12, "elapsed_time": "1:17:07", "remaining_time": "0:25:32", "throughput": 7983.35, "total_tokens": 36945856} +{"current_steps": 11740, "total_steps": 15621, "loss": 0.509, "lr": 3.532835110759763e-07, "epoch": 0.7515523974137379, "percentage": 75.16, "elapsed_time": "1:17:08", "remaining_time": "0:25:30", "throughput": 7985.62, "total_tokens": 36961792} +{"current_steps": 11745, "total_steps": 15621, "loss": 0.3072, "lr": 3.524316641275955e-07, "epoch": 0.7518724793547148, "percentage": 75.19, "elapsed_time": "1:17:09", "remaining_time": "0:25:27", "throughput": 7987.8, "total_tokens": 36977152} +{"current_steps": 11750, "total_steps": 15621, "loss": 0.4213, "lr": 3.5158062567588467e-07, "epoch": 0.7521925612956917, "percentage": 75.22, "elapsed_time": "1:17:09", "remaining_time": "0:25:25", "throughput": 7989.88, "total_tokens": 36991936} +{"current_steps": 11755, "total_steps": 15621, "loss": 0.4065, "lr": 3.5073039678337633e-07, "epoch": 0.7525126432366686, "percentage": 75.25, "elapsed_time": "1:17:10", "remaining_time": "0:25:22", "throughput": 7991.97, "total_tokens": 37006784} +{"current_steps": 11760, "total_steps": 15621, "loss": 0.3394, "lr": 3.498809785115908e-07, "epoch": 0.7528327251776454, "percentage": 75.28, "elapsed_time": "1:17:11", "remaining_time": "0:25:20", "throughput": 7994.16, "total_tokens": 37022208} +{"current_steps": 11765, "total_steps": 15621, "loss": 0.3495, "lr": 3.4903237192103697e-07, "epoch": 0.7531528071186223, "percentage": 75.32, "elapsed_time": "1:17:11", "remaining_time": "0:25:18", "throughput": 7996.66, "total_tokens": 37039488} +{"current_steps": 11770, "total_steps": 15621, "loss": 0.3453, "lr": 3.481845780712099e-07, "epoch": 0.7534728890595993, "percentage": 75.35, "elapsed_time": "1:17:12", "remaining_time": "0:25:15", "throughput": 7999.03, "total_tokens": 37056064} +{"current_steps": 11775, "total_steps": 15621, "loss": 0.3434, "lr": 3.4733759802059037e-07, "epoch": 0.7537929710005762, "percentage": 75.38, "elapsed_time": "1:17:13", "remaining_time": "0:25:13", "throughput": 8001.35, "total_tokens": 37072256} +{"current_steps": 11780, "total_steps": 15621, "loss": 0.428, "lr": 3.4649143282664273e-07, "epoch": 0.7541130529415531, "percentage": 75.41, "elapsed_time": "1:17:13", "remaining_time": "0:25:10", "throughput": 8003.47, "total_tokens": 37087360} +{"current_steps": 11785, "total_steps": 15621, "loss": 0.3164, "lr": 3.456460835458143e-07, "epoch": 0.7544331348825299, "percentage": 75.44, "elapsed_time": "1:17:14", "remaining_time": "0:25:08", "throughput": 8005.54, "total_tokens": 37102144} +{"current_steps": 11790, "total_steps": 15621, "loss": 0.3131, "lr": 3.4480155123353337e-07, "epoch": 0.7547532168235068, "percentage": 75.48, "elapsed_time": "1:17:15", "remaining_time": "0:25:06", "throughput": 8007.72, "total_tokens": 37117568} +{"current_steps": 11795, "total_steps": 15621, "loss": 0.4608, "lr": 3.4395783694420875e-07, "epoch": 0.7550732987644837, "percentage": 75.51, "elapsed_time": "1:17:15", "remaining_time": "0:25:03", "throughput": 8009.87, "total_tokens": 37132800} +{"current_steps": 11800, "total_steps": 15621, "loss": 0.4036, "lr": 3.4311494173122743e-07, "epoch": 0.7553933807054606, "percentage": 75.54, "elapsed_time": "1:17:16", "remaining_time": "0:25:01", "throughput": 8011.96, "total_tokens": 37147776} +{"current_steps": 11805, "total_steps": 15621, "loss": 0.3944, "lr": 3.422728666469541e-07, "epoch": 0.7557134626464375, "percentage": 75.57, "elapsed_time": "1:17:17", "remaining_time": "0:24:58", "throughput": 8014.27, "total_tokens": 37163904} +{"current_steps": 11810, "total_steps": 15621, "loss": 0.4316, "lr": 3.41431612742729e-07, "epoch": 0.7560335445874143, "percentage": 75.6, "elapsed_time": "1:17:17", "remaining_time": "0:24:56", "throughput": 8016.63, "total_tokens": 37180416} +{"current_steps": 11815, "total_steps": 15621, "loss": 0.4235, "lr": 3.4059118106886855e-07, "epoch": 0.7563536265283912, "percentage": 75.64, "elapsed_time": "1:17:18", "remaining_time": "0:24:54", "throughput": 8018.91, "total_tokens": 37196480} +{"current_steps": 11820, "total_steps": 15621, "loss": 0.5208, "lr": 3.3975157267466036e-07, "epoch": 0.7566737084693682, "percentage": 75.67, "elapsed_time": "1:17:19", "remaining_time": "0:24:51", "throughput": 8021.05, "total_tokens": 37211648} +{"current_steps": 11825, "total_steps": 15621, "loss": 0.2942, "lr": 3.389127886083656e-07, "epoch": 0.7569937904103451, "percentage": 75.7, "elapsed_time": "1:17:19", "remaining_time": "0:24:49", "throughput": 8023.23, "total_tokens": 37227072} +{"current_steps": 11830, "total_steps": 15621, "loss": 0.3342, "lr": 3.3807482991721667e-07, "epoch": 0.757313872351322, "percentage": 75.73, "elapsed_time": "1:17:20", "remaining_time": "0:24:47", "throughput": 8025.65, "total_tokens": 37243968} +{"current_steps": 11835, "total_steps": 15621, "loss": 0.32, "lr": 3.3723769764741474e-07, "epoch": 0.7576339542922989, "percentage": 75.76, "elapsed_time": "1:17:21", "remaining_time": "0:24:44", "throughput": 8027.78, "total_tokens": 37259200} +{"current_steps": 11840, "total_steps": 15621, "loss": 0.2946, "lr": 3.3640139284412825e-07, "epoch": 0.7579540362332757, "percentage": 75.8, "elapsed_time": "1:17:21", "remaining_time": "0:24:42", "throughput": 8030.03, "total_tokens": 37275072} +{"current_steps": 11845, "total_steps": 15621, "loss": 0.4, "lr": 3.355659165514948e-07, "epoch": 0.7582741181742526, "percentage": 75.83, "elapsed_time": "1:17:22", "remaining_time": "0:24:40", "throughput": 8032.35, "total_tokens": 37291392} +{"current_steps": 11850, "total_steps": 15621, "loss": 0.2828, "lr": 3.347312698126161e-07, "epoch": 0.7585942001152295, "percentage": 75.86, "elapsed_time": "1:17:23", "remaining_time": "0:24:37", "throughput": 8034.67, "total_tokens": 37307648} +{"current_steps": 11855, "total_steps": 15621, "loss": 0.2188, "lr": 3.338974536695578e-07, "epoch": 0.7589142820562064, "percentage": 75.89, "elapsed_time": "1:17:24", "remaining_time": "0:24:35", "throughput": 8036.85, "total_tokens": 37323136} +{"current_steps": 11860, "total_steps": 15621, "loss": 0.3193, "lr": 3.330644691633492e-07, "epoch": 0.7592343639971832, "percentage": 75.92, "elapsed_time": "1:17:24", "remaining_time": "0:24:32", "throughput": 8039.01, "total_tokens": 37338496} +{"current_steps": 11865, "total_steps": 15621, "loss": 0.2764, "lr": 3.322323173339818e-07, "epoch": 0.7595544459381601, "percentage": 75.96, "elapsed_time": "1:17:25", "remaining_time": "0:24:30", "throughput": 8041.64, "total_tokens": 37356800} +{"current_steps": 11870, "total_steps": 15621, "loss": 0.4461, "lr": 3.314009992204071e-07, "epoch": 0.759874527879137, "percentage": 75.99, "elapsed_time": "1:17:26", "remaining_time": "0:24:28", "throughput": 8043.9, "total_tokens": 37372800} +{"current_steps": 11875, "total_steps": 15621, "loss": 0.3172, "lr": 3.3057051586053443e-07, "epoch": 0.760194609820114, "percentage": 76.02, "elapsed_time": "1:17:26", "remaining_time": "0:24:25", "throughput": 8046.13, "total_tokens": 37388608} +{"current_steps": 11880, "total_steps": 15621, "loss": 0.4503, "lr": 3.297408682912329e-07, "epoch": 0.7605146917610909, "percentage": 76.05, "elapsed_time": "1:17:27", "remaining_time": "0:24:23", "throughput": 8048.49, "total_tokens": 37405184} +{"current_steps": 11885, "total_steps": 15621, "loss": 0.2743, "lr": 3.289120575483271e-07, "epoch": 0.7608347737020678, "percentage": 76.08, "elapsed_time": "1:17:28", "remaining_time": "0:24:21", "throughput": 8050.58, "total_tokens": 37420096} +{"current_steps": 11890, "total_steps": 15621, "loss": 0.4177, "lr": 3.280840846665969e-07, "epoch": 0.7611548556430446, "percentage": 76.12, "elapsed_time": "1:17:28", "remaining_time": "0:24:18", "throughput": 8052.56, "total_tokens": 37434368} +{"current_steps": 11895, "total_steps": 15621, "loss": 0.3019, "lr": 3.272569506797761e-07, "epoch": 0.7614749375840215, "percentage": 76.15, "elapsed_time": "1:17:29", "remaining_time": "0:24:16", "throughput": 8054.64, "total_tokens": 37449344} +{"current_steps": 11900, "total_steps": 15621, "loss": 0.3364, "lr": 3.2643065662055136e-07, "epoch": 0.7617950195249984, "percentage": 76.18, "elapsed_time": "1:17:30", "remaining_time": "0:24:14", "throughput": 8056.76, "total_tokens": 37464448} +{"current_steps": 11905, "total_steps": 15621, "loss": 0.2844, "lr": 3.2560520352056033e-07, "epoch": 0.7621151014659753, "percentage": 76.21, "elapsed_time": "1:17:30", "remaining_time": "0:24:11", "throughput": 8059.25, "total_tokens": 37481856} +{"current_steps": 11910, "total_steps": 15621, "loss": 0.3952, "lr": 3.24780592410391e-07, "epoch": 0.7624351834069522, "percentage": 76.24, "elapsed_time": "1:17:31", "remaining_time": "0:24:09", "throughput": 8061.5, "total_tokens": 37497856} +{"current_steps": 11915, "total_steps": 15621, "loss": 0.4545, "lr": 3.2395682431957994e-07, "epoch": 0.762755265347929, "percentage": 76.28, "elapsed_time": "1:17:32", "remaining_time": "0:24:06", "throughput": 8063.71, "total_tokens": 37513600} +{"current_steps": 11920, "total_steps": 15621, "loss": 0.3272, "lr": 3.231339002766115e-07, "epoch": 0.7630753472889059, "percentage": 76.31, "elapsed_time": "1:17:32", "remaining_time": "0:24:04", "throughput": 8065.92, "total_tokens": 37529408} +{"current_steps": 11925, "total_steps": 15621, "loss": 0.3396, "lr": 3.2231182130891564e-07, "epoch": 0.7633954292298829, "percentage": 76.34, "elapsed_time": "1:17:33", "remaining_time": "0:24:02", "throughput": 8068.29, "total_tokens": 37545984} +{"current_steps": 11930, "total_steps": 15621, "loss": 0.3342, "lr": 3.214905884428679e-07, "epoch": 0.7637155111708598, "percentage": 76.37, "elapsed_time": "1:17:34", "remaining_time": "0:23:59", "throughput": 8070.52, "total_tokens": 37561856} +{"current_steps": 11935, "total_steps": 15621, "loss": 0.3292, "lr": 3.206702027037868e-07, "epoch": 0.7640355931118367, "percentage": 76.4, "elapsed_time": "1:17:34", "remaining_time": "0:23:57", "throughput": 8072.9, "total_tokens": 37578624} +{"current_steps": 11940, "total_steps": 15621, "loss": 0.3962, "lr": 3.198506651159344e-07, "epoch": 0.7643556750528135, "percentage": 76.44, "elapsed_time": "1:17:35", "remaining_time": "0:23:55", "throughput": 8075.04, "total_tokens": 37593920} +{"current_steps": 11945, "total_steps": 15621, "loss": 0.3658, "lr": 3.190319767025121e-07, "epoch": 0.7646757569937904, "percentage": 76.47, "elapsed_time": "1:17:36", "remaining_time": "0:23:52", "throughput": 8077.24, "total_tokens": 37609664} +{"current_steps": 11950, "total_steps": 15621, "loss": 0.4959, "lr": 3.1821413848566213e-07, "epoch": 0.7649958389347673, "percentage": 76.5, "elapsed_time": "1:17:36", "remaining_time": "0:23:50", "throughput": 8079.54, "total_tokens": 37626048} +{"current_steps": 11955, "total_steps": 15621, "loss": 0.3753, "lr": 3.1739715148646564e-07, "epoch": 0.7653159208757442, "percentage": 76.53, "elapsed_time": "1:17:37", "remaining_time": "0:23:48", "throughput": 8081.75, "total_tokens": 37641792} +{"current_steps": 11960, "total_steps": 15621, "loss": 0.4534, "lr": 3.1658101672494043e-07, "epoch": 0.7656360028167211, "percentage": 76.56, "elapsed_time": "1:17:38", "remaining_time": "0:23:45", "throughput": 8083.79, "total_tokens": 37656512} +{"current_steps": 11965, "total_steps": 15621, "loss": 0.3377, "lr": 3.157657352200397e-07, "epoch": 0.7659560847576979, "percentage": 76.6, "elapsed_time": "1:17:38", "remaining_time": "0:23:43", "throughput": 8085.94, "total_tokens": 37672000} +{"current_steps": 11970, "total_steps": 15621, "loss": 0.3278, "lr": 3.149513079896521e-07, "epoch": 0.7662761666986748, "percentage": 76.63, "elapsed_time": "1:17:39", "remaining_time": "0:23:41", "throughput": 8088.07, "total_tokens": 37687232} +{"current_steps": 11975, "total_steps": 15621, "loss": 0.3237, "lr": 3.1413773605060034e-07, "epoch": 0.7665962486396517, "percentage": 76.66, "elapsed_time": "1:17:40", "remaining_time": "0:23:38", "throughput": 8090.23, "total_tokens": 37702656} +{"current_steps": 11980, "total_steps": 15621, "loss": 0.4234, "lr": 3.1332502041863783e-07, "epoch": 0.7669163305806287, "percentage": 76.69, "elapsed_time": "1:17:40", "remaining_time": "0:23:36", "throughput": 8092.39, "total_tokens": 37718080} +{"current_steps": 11985, "total_steps": 15621, "loss": 0.3181, "lr": 3.1251316210844946e-07, "epoch": 0.7672364125216056, "percentage": 76.72, "elapsed_time": "1:17:41", "remaining_time": "0:23:34", "throughput": 8094.89, "total_tokens": 37735680} +{"current_steps": 11990, "total_steps": 15621, "loss": 0.2871, "lr": 3.1170216213365055e-07, "epoch": 0.7675564944625825, "percentage": 76.76, "elapsed_time": "1:17:42", "remaining_time": "0:23:31", "throughput": 8096.87, "total_tokens": 37749952} +{"current_steps": 11995, "total_steps": 15621, "loss": 0.4582, "lr": 3.1089202150678397e-07, "epoch": 0.7678765764035593, "percentage": 76.79, "elapsed_time": "1:17:42", "remaining_time": "0:23:29", "throughput": 8099.0, "total_tokens": 37765312} +{"current_steps": 12000, "total_steps": 15621, "loss": 0.4919, "lr": 3.1008274123931886e-07, "epoch": 0.7681966583445362, "percentage": 76.82, "elapsed_time": "1:17:43", "remaining_time": "0:23:27", "throughput": 8101.06, "total_tokens": 37780160} +{"current_steps": 12005, "total_steps": 15621, "loss": 0.2657, "lr": 3.092743223416523e-07, "epoch": 0.7685167402855131, "percentage": 76.85, "elapsed_time": "1:17:44", "remaining_time": "0:23:24", "throughput": 8103.34, "total_tokens": 37796352} +{"current_steps": 12010, "total_steps": 15621, "loss": 0.3551, "lr": 3.0846676582310413e-07, "epoch": 0.76883682222649, "percentage": 76.88, "elapsed_time": "1:17:44", "remaining_time": "0:23:22", "throughput": 8105.67, "total_tokens": 37812864} +{"current_steps": 12015, "total_steps": 15621, "loss": 0.3818, "lr": 3.076600726919185e-07, "epoch": 0.7691569041674668, "percentage": 76.92, "elapsed_time": "1:17:45", "remaining_time": "0:23:20", "throughput": 8107.75, "total_tokens": 37827840} +{"current_steps": 12020, "total_steps": 15621, "loss": 0.3599, "lr": 3.0685424395526106e-07, "epoch": 0.7694769861084437, "percentage": 76.95, "elapsed_time": "1:17:46", "remaining_time": "0:23:17", "throughput": 8110.52, "total_tokens": 37847040} +{"current_steps": 12025, "total_steps": 15621, "loss": 0.2875, "lr": 3.060492806192184e-07, "epoch": 0.7697970680494206, "percentage": 76.98, "elapsed_time": "1:17:47", "remaining_time": "0:23:15", "throughput": 8112.68, "total_tokens": 37862464} +{"current_steps": 12030, "total_steps": 15621, "loss": 0.3826, "lr": 3.052451836887968e-07, "epoch": 0.7701171499903975, "percentage": 77.01, "elapsed_time": "1:17:47", "remaining_time": "0:23:13", "throughput": 8114.81, "total_tokens": 37877760} +{"current_steps": 12035, "total_steps": 15621, "loss": 0.2867, "lr": 3.044419541679207e-07, "epoch": 0.7704372319313745, "percentage": 77.04, "elapsed_time": "1:17:48", "remaining_time": "0:23:11", "throughput": 8116.89, "total_tokens": 37892800} +{"current_steps": 12040, "total_steps": 15621, "loss": 0.4353, "lr": 3.0363959305943153e-07, "epoch": 0.7707573138723514, "percentage": 77.08, "elapsed_time": "1:17:49", "remaining_time": "0:23:08", "throughput": 8119.17, "total_tokens": 37909056} +{"current_steps": 12045, "total_steps": 15621, "loss": 0.3447, "lr": 3.028381013650867e-07, "epoch": 0.7710773958133282, "percentage": 77.11, "elapsed_time": "1:17:49", "remaining_time": "0:23:06", "throughput": 8121.46, "total_tokens": 37925376} +{"current_steps": 12050, "total_steps": 15621, "loss": 0.3705, "lr": 3.0203748008555783e-07, "epoch": 0.7713974777543051, "percentage": 77.14, "elapsed_time": "1:17:50", "remaining_time": "0:23:04", "throughput": 8123.74, "total_tokens": 37941632} +{"current_steps": 12055, "total_steps": 15621, "loss": 0.374, "lr": 3.012377302204301e-07, "epoch": 0.771717559695282, "percentage": 77.17, "elapsed_time": "1:17:51", "remaining_time": "0:23:01", "throughput": 8125.88, "total_tokens": 37957056} +{"current_steps": 12060, "total_steps": 15621, "loss": 0.3959, "lr": 3.0043885276820046e-07, "epoch": 0.7720376416362589, "percentage": 77.2, "elapsed_time": "1:17:51", "remaining_time": "0:22:59", "throughput": 8128.12, "total_tokens": 37973184} +{"current_steps": 12065, "total_steps": 15621, "loss": 0.3027, "lr": 2.99640848726277e-07, "epoch": 0.7723577235772358, "percentage": 77.24, "elapsed_time": "1:17:52", "remaining_time": "0:22:57", "throughput": 8130.21, "total_tokens": 37988288} +{"current_steps": 12070, "total_steps": 15621, "loss": 0.3723, "lr": 2.9884371909097704e-07, "epoch": 0.7726778055182126, "percentage": 77.27, "elapsed_time": "1:17:53", "remaining_time": "0:22:54", "throughput": 8132.42, "total_tokens": 38004224} +{"current_steps": 12075, "total_steps": 15621, "loss": 0.3721, "lr": 2.9804746485752616e-07, "epoch": 0.7729978874591895, "percentage": 77.3, "elapsed_time": "1:17:53", "remaining_time": "0:22:52", "throughput": 8134.55, "total_tokens": 38019456} +{"current_steps": 12080, "total_steps": 15621, "loss": 0.4237, "lr": 2.972520870200573e-07, "epoch": 0.7733179694001664, "percentage": 77.33, "elapsed_time": "1:17:54", "remaining_time": "0:22:50", "throughput": 8136.74, "total_tokens": 38035264} +{"current_steps": 12085, "total_steps": 15621, "loss": 0.4166, "lr": 2.9645758657160904e-07, "epoch": 0.7736380513411434, "percentage": 77.36, "elapsed_time": "1:17:55", "remaining_time": "0:22:47", "throughput": 8138.95, "total_tokens": 38051072} +{"current_steps": 12090, "total_steps": 15621, "loss": 0.3573, "lr": 2.9566396450412444e-07, "epoch": 0.7739581332821203, "percentage": 77.4, "elapsed_time": "1:17:55", "remaining_time": "0:22:45", "throughput": 8141.11, "total_tokens": 38066688} +{"current_steps": 12095, "total_steps": 15621, "loss": 0.3237, "lr": 2.9487122180844957e-07, "epoch": 0.7742782152230971, "percentage": 77.43, "elapsed_time": "1:17:56", "remaining_time": "0:22:43", "throughput": 8143.25, "total_tokens": 38082048} +{"current_steps": 12100, "total_steps": 15621, "loss": 0.3143, "lr": 2.9407935947433406e-07, "epoch": 0.774598297164074, "percentage": 77.46, "elapsed_time": "1:17:57", "remaining_time": "0:22:41", "throughput": 8145.36, "total_tokens": 38097344} +{"current_steps": 12105, "total_steps": 15621, "loss": 0.4448, "lr": 2.932883784904264e-07, "epoch": 0.7749183791050509, "percentage": 77.49, "elapsed_time": "1:17:57", "remaining_time": "0:22:38", "throughput": 8147.42, "total_tokens": 38112320} +{"current_steps": 12110, "total_steps": 15621, "loss": 0.244, "lr": 2.9249827984427555e-07, "epoch": 0.7752384610460278, "percentage": 77.52, "elapsed_time": "1:17:58", "remaining_time": "0:22:36", "throughput": 8149.6, "total_tokens": 38128000} +{"current_steps": 12115, "total_steps": 15621, "loss": 0.3049, "lr": 2.917090645223297e-07, "epoch": 0.7755585429870047, "percentage": 77.56, "elapsed_time": "1:17:59", "remaining_time": "0:22:34", "throughput": 8151.7, "total_tokens": 38143168} +{"current_steps": 12120, "total_steps": 15621, "loss": 0.301, "lr": 2.909207335099332e-07, "epoch": 0.7758786249279815, "percentage": 77.59, "elapsed_time": "1:17:59", "remaining_time": "0:22:31", "throughput": 8153.71, "total_tokens": 38157824} +{"current_steps": 12125, "total_steps": 15621, "loss": 0.3329, "lr": 2.9013328779132595e-07, "epoch": 0.7761987068689584, "percentage": 77.62, "elapsed_time": "1:18:00", "remaining_time": "0:22:29", "throughput": 8155.79, "total_tokens": 38172864} +{"current_steps": 12130, "total_steps": 15621, "loss": 0.4221, "lr": 2.893467283496439e-07, "epoch": 0.7765187888099353, "percentage": 77.65, "elapsed_time": "1:18:01", "remaining_time": "0:22:27", "throughput": 8157.76, "total_tokens": 38187264} +{"current_steps": 12135, "total_steps": 15621, "loss": 0.3534, "lr": 2.885610561669155e-07, "epoch": 0.7768388707509122, "percentage": 77.68, "elapsed_time": "1:18:01", "remaining_time": "0:22:24", "throughput": 8160.17, "total_tokens": 38204288} +{"current_steps": 12140, "total_steps": 15621, "loss": 0.3447, "lr": 2.8777627222406163e-07, "epoch": 0.7771589526918892, "percentage": 77.72, "elapsed_time": "1:18:02", "remaining_time": "0:22:22", "throughput": 8162.23, "total_tokens": 38219264} +{"current_steps": 12145, "total_steps": 15621, "loss": 0.3845, "lr": 2.869923775008943e-07, "epoch": 0.777479034632866, "percentage": 77.75, "elapsed_time": "1:18:03", "remaining_time": "0:22:20", "throughput": 8164.33, "total_tokens": 38234496} +{"current_steps": 12150, "total_steps": 15621, "loss": 0.2729, "lr": 2.862093729761155e-07, "epoch": 0.7777991165738429, "percentage": 77.78, "elapsed_time": "1:18:03", "remaining_time": "0:22:18", "throughput": 8166.66, "total_tokens": 38251072} +{"current_steps": 12155, "total_steps": 15621, "loss": 0.3971, "lr": 2.854272596273152e-07, "epoch": 0.7781191985148198, "percentage": 77.81, "elapsed_time": "1:18:04", "remaining_time": "0:22:15", "throughput": 8168.8, "total_tokens": 38266560} +{"current_steps": 12160, "total_steps": 15621, "loss": 0.331, "lr": 2.8464603843097134e-07, "epoch": 0.7784392804557967, "percentage": 77.84, "elapsed_time": "1:18:05", "remaining_time": "0:22:13", "throughput": 8171.09, "total_tokens": 38282944} +{"current_steps": 12165, "total_steps": 15621, "loss": 0.3274, "lr": 2.8386571036244764e-07, "epoch": 0.7787593623967736, "percentage": 77.88, "elapsed_time": "1:18:05", "remaining_time": "0:22:11", "throughput": 8173.38, "total_tokens": 38299264} +{"current_steps": 12170, "total_steps": 15621, "loss": 0.3866, "lr": 2.830862763959929e-07, "epoch": 0.7790794443377504, "percentage": 77.91, "elapsed_time": "1:18:06", "remaining_time": "0:22:08", "throughput": 8175.46, "total_tokens": 38314368} +{"current_steps": 12175, "total_steps": 15621, "loss": 0.3108, "lr": 2.8230773750473956e-07, "epoch": 0.7793995262787273, "percentage": 77.94, "elapsed_time": "1:18:07", "remaining_time": "0:22:06", "throughput": 8177.58, "total_tokens": 38329664} +{"current_steps": 12180, "total_steps": 15621, "loss": 0.3067, "lr": 2.8153009466070267e-07, "epoch": 0.7797196082197042, "percentage": 77.97, "elapsed_time": "1:18:07", "remaining_time": "0:22:04", "throughput": 8179.75, "total_tokens": 38345408} +{"current_steps": 12185, "total_steps": 15621, "loss": 0.2959, "lr": 2.807533488347783e-07, "epoch": 0.7800396901606811, "percentage": 78.0, "elapsed_time": "1:18:08", "remaining_time": "0:22:02", "throughput": 8182.19, "total_tokens": 38362688} +{"current_steps": 12190, "total_steps": 15621, "loss": 0.2508, "lr": 2.7997750099674277e-07, "epoch": 0.7803597721016581, "percentage": 78.04, "elapsed_time": "1:18:09", "remaining_time": "0:21:59", "throughput": 8184.24, "total_tokens": 38377600} +{"current_steps": 12195, "total_steps": 15621, "loss": 0.5263, "lr": 2.792025521152512e-07, "epoch": 0.780679854042635, "percentage": 78.07, "elapsed_time": "1:18:09", "remaining_time": "0:21:57", "throughput": 8186.3, "total_tokens": 38392640} +{"current_steps": 12200, "total_steps": 15621, "loss": 0.4457, "lr": 2.784285031578365e-07, "epoch": 0.7809999359836118, "percentage": 78.1, "elapsed_time": "1:18:10", "remaining_time": "0:21:55", "throughput": 8188.48, "total_tokens": 38408448} +{"current_steps": 12205, "total_steps": 15621, "loss": 0.3649, "lr": 2.7765535509090786e-07, "epoch": 0.7813200179245887, "percentage": 78.13, "elapsed_time": "1:18:11", "remaining_time": "0:21:53", "throughput": 8190.7, "total_tokens": 38424512} +{"current_steps": 12210, "total_steps": 15621, "loss": 0.4661, "lr": 2.768831088797495e-07, "epoch": 0.7816400998655656, "percentage": 78.16, "elapsed_time": "1:18:11", "remaining_time": "0:21:50", "throughput": 8192.73, "total_tokens": 38439296} +{"current_steps": 12215, "total_steps": 15621, "loss": 0.247, "lr": 2.761117654885201e-07, "epoch": 0.7819601818065425, "percentage": 78.2, "elapsed_time": "1:18:12", "remaining_time": "0:21:48", "throughput": 8194.97, "total_tokens": 38455424} +{"current_steps": 12220, "total_steps": 15621, "loss": 0.3314, "lr": 2.7534132588025063e-07, "epoch": 0.7822802637475194, "percentage": 78.23, "elapsed_time": "1:18:13", "remaining_time": "0:21:46", "throughput": 8197.11, "total_tokens": 38470976} +{"current_steps": 12225, "total_steps": 15621, "loss": 0.5088, "lr": 2.7457179101684483e-07, "epoch": 0.7826003456884962, "percentage": 78.26, "elapsed_time": "1:18:13", "remaining_time": "0:21:43", "throughput": 8199.18, "total_tokens": 38486016} +{"current_steps": 12230, "total_steps": 15621, "loss": 0.2958, "lr": 2.7380316185907506e-07, "epoch": 0.7829204276294731, "percentage": 78.29, "elapsed_time": "1:18:14", "remaining_time": "0:21:41", "throughput": 8201.27, "total_tokens": 38501248} +{"current_steps": 12235, "total_steps": 15621, "loss": 0.3508, "lr": 2.730354393665839e-07, "epoch": 0.78324050957045, "percentage": 78.32, "elapsed_time": "1:18:15", "remaining_time": "0:21:39", "throughput": 8203.44, "total_tokens": 38516992} +{"current_steps": 12240, "total_steps": 15621, "loss": 0.3871, "lr": 2.7226862449788245e-07, "epoch": 0.7835605915114269, "percentage": 78.36, "elapsed_time": "1:18:15", "remaining_time": "0:21:37", "throughput": 8205.41, "total_tokens": 38531456} +{"current_steps": 12245, "total_steps": 15621, "loss": 0.3283, "lr": 2.715027182103482e-07, "epoch": 0.7838806734524039, "percentage": 78.39, "elapsed_time": "1:18:16", "remaining_time": "0:21:34", "throughput": 8207.55, "total_tokens": 38546880} +{"current_steps": 12250, "total_steps": 15621, "loss": 0.3104, "lr": 2.707377214602232e-07, "epoch": 0.7842007553933807, "percentage": 78.42, "elapsed_time": "1:18:17", "remaining_time": "0:21:32", "throughput": 8209.65, "total_tokens": 38562176} +{"current_steps": 12255, "total_steps": 15621, "loss": 0.4304, "lr": 2.699736352026157e-07, "epoch": 0.7845208373343576, "percentage": 78.45, "elapsed_time": "1:18:17", "remaining_time": "0:21:30", "throughput": 8211.75, "total_tokens": 38577472} +{"current_steps": 12260, "total_steps": 15621, "loss": 0.3265, "lr": 2.6921046039149645e-07, "epoch": 0.7848409192753345, "percentage": 78.48, "elapsed_time": "1:18:18", "remaining_time": "0:21:28", "throughput": 8213.9, "total_tokens": 38593088} +{"current_steps": 12265, "total_steps": 15621, "loss": 0.3378, "lr": 2.6844819797969744e-07, "epoch": 0.7851610012163114, "percentage": 78.52, "elapsed_time": "1:18:19", "remaining_time": "0:21:25", "throughput": 8215.93, "total_tokens": 38607936} +{"current_steps": 12270, "total_steps": 15621, "loss": 0.2504, "lr": 2.6768684891891236e-07, "epoch": 0.7854810831572883, "percentage": 78.55, "elapsed_time": "1:18:19", "remaining_time": "0:21:23", "throughput": 8218.31, "total_tokens": 38625024} +{"current_steps": 12275, "total_steps": 15621, "loss": 0.3268, "lr": 2.6692641415969497e-07, "epoch": 0.7858011650982651, "percentage": 78.58, "elapsed_time": "1:18:20", "remaining_time": "0:21:21", "throughput": 8220.64, "total_tokens": 38641792} +{"current_steps": 12280, "total_steps": 15621, "loss": 0.4112, "lr": 2.66166894651457e-07, "epoch": 0.786121247039242, "percentage": 78.61, "elapsed_time": "1:18:21", "remaining_time": "0:21:19", "throughput": 8222.72, "total_tokens": 38656896} +{"current_steps": 12285, "total_steps": 15621, "loss": 0.343, "lr": 2.654082913424668e-07, "epoch": 0.7864413289802189, "percentage": 78.64, "elapsed_time": "1:18:21", "remaining_time": "0:21:16", "throughput": 8224.87, "total_tokens": 38672448} +{"current_steps": 12290, "total_steps": 15621, "loss": 0.305, "lr": 2.6465060517985003e-07, "epoch": 0.7867614109211958, "percentage": 78.68, "elapsed_time": "1:18:22", "remaining_time": "0:21:14", "throughput": 8227.1, "total_tokens": 38688576} +{"current_steps": 12295, "total_steps": 15621, "loss": 0.5196, "lr": 2.638938371095867e-07, "epoch": 0.7870814928621728, "percentage": 78.71, "elapsed_time": "1:18:23", "remaining_time": "0:21:12", "throughput": 8229.23, "total_tokens": 38704064} +{"current_steps": 12300, "total_steps": 15621, "loss": 0.3756, "lr": 2.6313798807651065e-07, "epoch": 0.7874015748031497, "percentage": 78.74, "elapsed_time": "1:18:23", "remaining_time": "0:21:10", "throughput": 8231.27, "total_tokens": 38718976} +{"current_steps": 12305, "total_steps": 15621, "loss": 0.3578, "lr": 2.6238305902430813e-07, "epoch": 0.7877216567441265, "percentage": 78.77, "elapsed_time": "1:18:24", "remaining_time": "0:21:07", "throughput": 8233.36, "total_tokens": 38734272} +{"current_steps": 12310, "total_steps": 15621, "loss": 0.3147, "lr": 2.61629050895517e-07, "epoch": 0.7880417386851034, "percentage": 78.8, "elapsed_time": "1:18:25", "remaining_time": "0:21:05", "throughput": 8235.44, "total_tokens": 38749504} +{"current_steps": 12315, "total_steps": 15621, "loss": 0.3237, "lr": 2.608759646315253e-07, "epoch": 0.7883618206260803, "percentage": 78.84, "elapsed_time": "1:18:25", "remaining_time": "0:21:03", "throughput": 8237.47, "total_tokens": 38764352} +{"current_steps": 12320, "total_steps": 15621, "loss": 0.3771, "lr": 2.6012380117257005e-07, "epoch": 0.7886819025670572, "percentage": 78.87, "elapsed_time": "1:18:26", "remaining_time": "0:21:01", "throughput": 8239.64, "total_tokens": 38780096} +{"current_steps": 12325, "total_steps": 15621, "loss": 0.3853, "lr": 2.5937256145773613e-07, "epoch": 0.789001984508034, "percentage": 78.9, "elapsed_time": "1:18:27", "remaining_time": "0:20:58", "throughput": 8241.78, "total_tokens": 38795712} +{"current_steps": 12330, "total_steps": 15621, "loss": 0.3191, "lr": 2.586222464249551e-07, "epoch": 0.7893220664490109, "percentage": 78.93, "elapsed_time": "1:18:27", "remaining_time": "0:20:56", "throughput": 8243.93, "total_tokens": 38811328} +{"current_steps": 12335, "total_steps": 15621, "loss": 0.2067, "lr": 2.5787285701100413e-07, "epoch": 0.7896421483899878, "percentage": 78.96, "elapsed_time": "1:18:28", "remaining_time": "0:20:54", "throughput": 8245.97, "total_tokens": 38826240} +{"current_steps": 12340, "total_steps": 15621, "loss": 0.3655, "lr": 2.571243941515048e-07, "epoch": 0.7899622303309647, "percentage": 79.0, "elapsed_time": "1:18:29", "remaining_time": "0:20:52", "throughput": 8248.23, "total_tokens": 38842624} +{"current_steps": 12345, "total_steps": 15621, "loss": 0.278, "lr": 2.563768587809213e-07, "epoch": 0.7902823122719416, "percentage": 79.03, "elapsed_time": "1:18:29", "remaining_time": "0:20:49", "throughput": 8250.24, "total_tokens": 38857472} +{"current_steps": 12350, "total_steps": 15621, "loss": 0.4174, "lr": 2.5563025183256137e-07, "epoch": 0.7906023942129186, "percentage": 79.06, "elapsed_time": "1:18:30", "remaining_time": "0:20:47", "throughput": 8252.25, "total_tokens": 38872256} +{"current_steps": 12355, "total_steps": 15621, "loss": 0.5513, "lr": 2.548845742385717e-07, "epoch": 0.7909224761538954, "percentage": 79.09, "elapsed_time": "1:18:31", "remaining_time": "0:20:45", "throughput": 8254.75, "total_tokens": 38890048} +{"current_steps": 12360, "total_steps": 15621, "loss": 0.2424, "lr": 2.541398269299393e-07, "epoch": 0.7912425580948723, "percentage": 79.12, "elapsed_time": "1:18:31", "remaining_time": "0:20:43", "throughput": 8256.9, "total_tokens": 38905664} +{"current_steps": 12365, "total_steps": 15621, "loss": 0.3106, "lr": 2.5339601083649063e-07, "epoch": 0.7915626400358492, "percentage": 79.16, "elapsed_time": "1:18:32", "remaining_time": "0:20:40", "throughput": 8259.8, "total_tokens": 38926144} +{"current_steps": 12370, "total_steps": 15621, "loss": 0.5144, "lr": 2.526531268868889e-07, "epoch": 0.7918827219768261, "percentage": 79.19, "elapsed_time": "1:18:33", "remaining_time": "0:20:38", "throughput": 8262.08, "total_tokens": 38942720} +{"current_steps": 12375, "total_steps": 15621, "loss": 0.3388, "lr": 2.5191117600863266e-07, "epoch": 0.792202803917803, "percentage": 79.22, "elapsed_time": "1:18:34", "remaining_time": "0:20:36", "throughput": 8264.19, "total_tokens": 38958144} +{"current_steps": 12380, "total_steps": 15621, "loss": 0.2559, "lr": 2.511701591280565e-07, "epoch": 0.7925228858587798, "percentage": 79.25, "elapsed_time": "1:18:34", "remaining_time": "0:20:34", "throughput": 8266.28, "total_tokens": 38973376} +{"current_steps": 12385, "total_steps": 15621, "loss": 0.3501, "lr": 2.504300771703295e-07, "epoch": 0.7928429677997567, "percentage": 79.28, "elapsed_time": "1:18:35", "remaining_time": "0:20:32", "throughput": 8268.5, "total_tokens": 38989504} +{"current_steps": 12390, "total_steps": 15621, "loss": 0.3819, "lr": 2.496909310594517e-07, "epoch": 0.7931630497407336, "percentage": 79.32, "elapsed_time": "1:18:36", "remaining_time": "0:20:29", "throughput": 8270.64, "total_tokens": 39005056} +{"current_steps": 12395, "total_steps": 15621, "loss": 0.4581, "lr": 2.4895272171825587e-07, "epoch": 0.7934831316817105, "percentage": 79.35, "elapsed_time": "1:18:36", "remaining_time": "0:20:27", "throughput": 8272.77, "total_tokens": 39020608} +{"current_steps": 12400, "total_steps": 15621, "loss": 0.4464, "lr": 2.482154500684055e-07, "epoch": 0.7938032136226874, "percentage": 79.38, "elapsed_time": "1:18:37", "remaining_time": "0:20:25", "throughput": 8274.82, "total_tokens": 39035712} +{"current_steps": 12405, "total_steps": 15621, "loss": 0.3431, "lr": 2.4747911703039293e-07, "epoch": 0.7941232955636643, "percentage": 79.41, "elapsed_time": "1:18:38", "remaining_time": "0:20:23", "throughput": 8276.89, "total_tokens": 39050880} +{"current_steps": 12410, "total_steps": 15621, "loss": 0.3737, "lr": 2.467437235235378e-07, "epoch": 0.7944433775046412, "percentage": 79.44, "elapsed_time": "1:18:38", "remaining_time": "0:20:20", "throughput": 8278.92, "total_tokens": 39065792} +{"current_steps": 12415, "total_steps": 15621, "loss": 0.3441, "lr": 2.460092704659883e-07, "epoch": 0.7947634594456181, "percentage": 79.48, "elapsed_time": "1:18:39", "remaining_time": "0:20:18", "throughput": 8281.0, "total_tokens": 39080960} +{"current_steps": 12420, "total_steps": 15621, "loss": 0.2641, "lr": 2.452757587747174e-07, "epoch": 0.795083541386595, "percentage": 79.51, "elapsed_time": "1:18:40", "remaining_time": "0:20:16", "throughput": 8283.23, "total_tokens": 39097216} +{"current_steps": 12425, "total_steps": 15621, "loss": 0.182, "lr": 2.445431893655232e-07, "epoch": 0.7954036233275719, "percentage": 79.54, "elapsed_time": "1:18:40", "remaining_time": "0:20:14", "throughput": 8285.42, "total_tokens": 39113152} +{"current_steps": 12430, "total_steps": 15621, "loss": 0.3652, "lr": 2.438115631530271e-07, "epoch": 0.7957237052685487, "percentage": 79.57, "elapsed_time": "1:18:41", "remaining_time": "0:20:12", "throughput": 8287.77, "total_tokens": 39130176} +{"current_steps": 12435, "total_steps": 15621, "loss": 0.2338, "lr": 2.4308088105067305e-07, "epoch": 0.7960437872095256, "percentage": 79.6, "elapsed_time": "1:18:42", "remaining_time": "0:20:09", "throughput": 8289.9, "total_tokens": 39145792} +{"current_steps": 12440, "total_steps": 15621, "loss": 0.4227, "lr": 2.423511439707262e-07, "epoch": 0.7963638691505025, "percentage": 79.64, "elapsed_time": "1:18:42", "remaining_time": "0:20:07", "throughput": 8292.02, "total_tokens": 39161280} +{"current_steps": 12445, "total_steps": 15621, "loss": 0.2807, "lr": 2.4162235282427177e-07, "epoch": 0.7966839510914794, "percentage": 79.67, "elapsed_time": "1:18:43", "remaining_time": "0:20:05", "throughput": 8294.09, "total_tokens": 39176512} +{"current_steps": 12450, "total_steps": 15621, "loss": 0.353, "lr": 2.408945085212144e-07, "epoch": 0.7970040330324563, "percentage": 79.7, "elapsed_time": "1:18:44", "remaining_time": "0:20:03", "throughput": 8296.16, "total_tokens": 39191808} +{"current_steps": 12455, "total_steps": 15621, "loss": 0.2507, "lr": 2.401676119702759e-07, "epoch": 0.7973241149734333, "percentage": 79.73, "elapsed_time": "1:18:44", "remaining_time": "0:20:01", "throughput": 8298.48, "total_tokens": 39208640} +{"current_steps": 12460, "total_steps": 15621, "loss": 0.3667, "lr": 2.394416640789952e-07, "epoch": 0.7976441969144101, "percentage": 79.76, "elapsed_time": "1:18:45", "remaining_time": "0:19:58", "throughput": 8300.45, "total_tokens": 39223232} +{"current_steps": 12465, "total_steps": 15621, "loss": 0.3149, "lr": 2.3871666575372696e-07, "epoch": 0.797964278855387, "percentage": 79.8, "elapsed_time": "1:18:46", "remaining_time": "0:19:56", "throughput": 8302.54, "total_tokens": 39238656} +{"current_steps": 12470, "total_steps": 15621, "loss": 0.5348, "lr": 2.3799261789963964e-07, "epoch": 0.7982843607963639, "percentage": 79.83, "elapsed_time": "1:18:46", "remaining_time": "0:19:54", "throughput": 8304.93, "total_tokens": 39255872} +{"current_steps": 12475, "total_steps": 15621, "loss": 0.269, "lr": 2.3726952142071644e-07, "epoch": 0.7986044427373408, "percentage": 79.86, "elapsed_time": "1:18:47", "remaining_time": "0:19:52", "throughput": 8306.93, "total_tokens": 39270784} +{"current_steps": 12480, "total_steps": 15621, "loss": 0.3524, "lr": 2.365473772197508e-07, "epoch": 0.7989245246783176, "percentage": 79.89, "elapsed_time": "1:18:48", "remaining_time": "0:19:49", "throughput": 8309.02, "total_tokens": 39286080} +{"current_steps": 12485, "total_steps": 15621, "loss": 0.3557, "lr": 2.3582618619834883e-07, "epoch": 0.7992446066192945, "percentage": 79.92, "elapsed_time": "1:18:48", "remaining_time": "0:19:47", "throughput": 8311.1, "total_tokens": 39301312} +{"current_steps": 12490, "total_steps": 15621, "loss": 0.2214, "lr": 2.3510594925692528e-07, "epoch": 0.7995646885602714, "percentage": 79.96, "elapsed_time": "1:18:49", "remaining_time": "0:19:45", "throughput": 8313.19, "total_tokens": 39316736} +{"current_steps": 12495, "total_steps": 15621, "loss": 0.3518, "lr": 2.343866672947057e-07, "epoch": 0.7998847705012483, "percentage": 79.99, "elapsed_time": "1:18:50", "remaining_time": "0:19:43", "throughput": 8315.15, "total_tokens": 39331264} +{"current_steps": 12500, "total_steps": 15621, "loss": 0.2711, "lr": 2.336683412097209e-07, "epoch": 0.8002048524422252, "percentage": 80.02, "elapsed_time": "1:18:50", "remaining_time": "0:19:41", "throughput": 8317.12, "total_tokens": 39345856} +{"current_steps": 12505, "total_steps": 15621, "loss": 0.3662, "lr": 2.329509718988095e-07, "epoch": 0.800524934383202, "percentage": 80.05, "elapsed_time": "1:18:51", "remaining_time": "0:19:38", "throughput": 8319.22, "total_tokens": 39361280} +{"current_steps": 12510, "total_steps": 15621, "loss": 0.3395, "lr": 2.3223456025761645e-07, "epoch": 0.800845016324179, "percentage": 80.08, "elapsed_time": "1:18:52", "remaining_time": "0:19:36", "throughput": 8321.18, "total_tokens": 39375872} +{"current_steps": 12512, "total_steps": 15621, "eval_loss": 0.3655269742012024, "epoch": 0.8009730491005698, "percentage": 80.1, "elapsed_time": "1:19:42", "remaining_time": "0:19:48", "throughput": 8234.01, "total_tokens": 39382144} +{"current_steps": 12515, "total_steps": 15621, "loss": 0.3043, "lr": 2.315191071805892e-07, "epoch": 0.8011650982651559, "percentage": 80.12, "elapsed_time": "1:23:39", "remaining_time": "0:20:45", "throughput": 7847.49, "total_tokens": 39392320} +{"current_steps": 12520, "total_steps": 15621, "loss": 0.3619, "lr": 2.3080461356097937e-07, "epoch": 0.8014851802061328, "percentage": 80.15, "elapsed_time": "1:23:40", "remaining_time": "0:20:43", "throughput": 7849.52, "total_tokens": 39407680} +{"current_steps": 12525, "total_steps": 15621, "loss": 0.2951, "lr": 2.30091080290841e-07, "epoch": 0.8018052621471097, "percentage": 80.18, "elapsed_time": "1:23:41", "remaining_time": "0:20:41", "throughput": 7851.76, "total_tokens": 39424512} +{"current_steps": 12530, "total_steps": 15621, "loss": 0.3417, "lr": 2.29378508261029e-07, "epoch": 0.8021253440880866, "percentage": 80.21, "elapsed_time": "1:23:41", "remaining_time": "0:20:38", "throughput": 7853.69, "total_tokens": 39439296} +{"current_steps": 12535, "total_steps": 15621, "loss": 0.3672, "lr": 2.2866689836119702e-07, "epoch": 0.8024454260290634, "percentage": 80.24, "elapsed_time": "1:23:42", "remaining_time": "0:20:36", "throughput": 7856.0, "total_tokens": 39456576} +{"current_steps": 12540, "total_steps": 15621, "loss": 0.3553, "lr": 2.2795625147979913e-07, "epoch": 0.8027655079700403, "percentage": 80.28, "elapsed_time": "1:23:43", "remaining_time": "0:20:34", "throughput": 7858.13, "total_tokens": 39472512} +{"current_steps": 12545, "total_steps": 15621, "loss": 0.2351, "lr": 2.2724656850408597e-07, "epoch": 0.8030855899110172, "percentage": 80.31, "elapsed_time": "1:23:43", "remaining_time": "0:20:31", "throughput": 7860.19, "total_tokens": 39488192} +{"current_steps": 12550, "total_steps": 15621, "loss": 0.3808, "lr": 2.2653785032010532e-07, "epoch": 0.8034056718519941, "percentage": 80.34, "elapsed_time": "1:23:44", "remaining_time": "0:20:29", "throughput": 7862.2, "total_tokens": 39503552} +{"current_steps": 12555, "total_steps": 15621, "loss": 0.3368, "lr": 2.258300978126999e-07, "epoch": 0.803725753792971, "percentage": 80.37, "elapsed_time": "1:23:45", "remaining_time": "0:20:27", "throughput": 7864.35, "total_tokens": 39519744} +{"current_steps": 12560, "total_steps": 15621, "loss": 0.4903, "lr": 2.2512331186550715e-07, "epoch": 0.804045835733948, "percentage": 80.4, "elapsed_time": "1:23:45", "remaining_time": "0:20:24", "throughput": 7866.38, "total_tokens": 39535232} +{"current_steps": 12565, "total_steps": 15621, "loss": 0.3867, "lr": 2.244174933609575e-07, "epoch": 0.8043659176749248, "percentage": 80.44, "elapsed_time": "1:23:46", "remaining_time": "0:20:22", "throughput": 7868.25, "total_tokens": 39549568} +{"current_steps": 12570, "total_steps": 15621, "loss": 0.2726, "lr": 2.2371264318027383e-07, "epoch": 0.8046859996159017, "percentage": 80.47, "elapsed_time": "1:23:47", "remaining_time": "0:20:20", "throughput": 7870.44, "total_tokens": 39566016} +{"current_steps": 12575, "total_steps": 15621, "loss": 0.2337, "lr": 2.2300876220346975e-07, "epoch": 0.8050060815568786, "percentage": 80.5, "elapsed_time": "1:23:47", "remaining_time": "0:20:17", "throughput": 7872.52, "total_tokens": 39581760} +{"current_steps": 12580, "total_steps": 15621, "loss": 0.2888, "lr": 2.2230585130934897e-07, "epoch": 0.8053261634978555, "percentage": 80.53, "elapsed_time": "1:23:48", "remaining_time": "0:20:15", "throughput": 7874.65, "total_tokens": 39597888} +{"current_steps": 12585, "total_steps": 15621, "loss": 0.4469, "lr": 2.2160391137550394e-07, "epoch": 0.8056462454388323, "percentage": 80.56, "elapsed_time": "1:23:49", "remaining_time": "0:20:13", "throughput": 7876.71, "total_tokens": 39613568} +{"current_steps": 12590, "total_steps": 15621, "loss": 0.4226, "lr": 2.2090294327831494e-07, "epoch": 0.8059663273798092, "percentage": 80.6, "elapsed_time": "1:23:49", "remaining_time": "0:20:10", "throughput": 7878.61, "total_tokens": 39628096} +{"current_steps": 12595, "total_steps": 15621, "loss": 0.2881, "lr": 2.202029478929488e-07, "epoch": 0.8062864093207861, "percentage": 80.63, "elapsed_time": "1:23:50", "remaining_time": "0:20:08", "throughput": 7880.48, "total_tokens": 39642560} +{"current_steps": 12600, "total_steps": 15621, "loss": 0.2958, "lr": 2.195039260933581e-07, "epoch": 0.806606491261763, "percentage": 80.66, "elapsed_time": "1:23:51", "remaining_time": "0:20:06", "throughput": 7882.53, "total_tokens": 39658112} +{"current_steps": 12605, "total_steps": 15621, "loss": 0.2724, "lr": 2.1880587875227973e-07, "epoch": 0.8069265732027399, "percentage": 80.69, "elapsed_time": "1:23:51", "remaining_time": "0:20:03", "throughput": 7884.65, "total_tokens": 39674112} +{"current_steps": 12610, "total_steps": 15621, "loss": 0.3308, "lr": 2.18108806741234e-07, "epoch": 0.8072466551437167, "percentage": 80.72, "elapsed_time": "1:23:52", "remaining_time": "0:20:01", "throughput": 7886.81, "total_tokens": 39690432} +{"current_steps": 12615, "total_steps": 15621, "loss": 0.3547, "lr": 2.1741271093052315e-07, "epoch": 0.8075667370846937, "percentage": 80.76, "elapsed_time": "1:23:53", "remaining_time": "0:19:59", "throughput": 7888.83, "total_tokens": 39705792} +{"current_steps": 12620, "total_steps": 15621, "loss": 0.4658, "lr": 2.167175921892318e-07, "epoch": 0.8078868190256706, "percentage": 80.79, "elapsed_time": "1:23:53", "remaining_time": "0:19:57", "throughput": 7890.97, "total_tokens": 39722048} +{"current_steps": 12625, "total_steps": 15621, "loss": 0.4219, "lr": 2.1602345138522314e-07, "epoch": 0.8082069009666475, "percentage": 80.82, "elapsed_time": "1:23:54", "remaining_time": "0:19:54", "throughput": 7893.12, "total_tokens": 39738304} +{"current_steps": 12630, "total_steps": 15621, "loss": 0.3551, "lr": 2.1533028938514008e-07, "epoch": 0.8085269829076244, "percentage": 80.85, "elapsed_time": "1:23:55", "remaining_time": "0:19:52", "throughput": 7895.14, "total_tokens": 39753728} +{"current_steps": 12635, "total_steps": 15621, "loss": 0.3441, "lr": 2.1463810705440433e-07, "epoch": 0.8088470648486012, "percentage": 80.88, "elapsed_time": "1:23:55", "remaining_time": "0:19:50", "throughput": 7897.22, "total_tokens": 39769600} +{"current_steps": 12640, "total_steps": 15621, "loss": 0.3571, "lr": 2.139469052572127e-07, "epoch": 0.8091671467895781, "percentage": 80.92, "elapsed_time": "1:23:56", "remaining_time": "0:19:47", "throughput": 7899.09, "total_tokens": 39784000} +{"current_steps": 12645, "total_steps": 15621, "loss": 0.3587, "lr": 2.1325668485653891e-07, "epoch": 0.809487228730555, "percentage": 80.95, "elapsed_time": "1:23:57", "remaining_time": "0:19:45", "throughput": 7901.23, "total_tokens": 39800320} +{"current_steps": 12650, "total_steps": 15621, "loss": 0.4617, "lr": 2.1256744671413173e-07, "epoch": 0.8098073106715319, "percentage": 80.98, "elapsed_time": "1:23:57", "remaining_time": "0:19:43", "throughput": 7903.2, "total_tokens": 39815360} +{"current_steps": 12655, "total_steps": 15621, "loss": 0.3819, "lr": 2.1187919169051316e-07, "epoch": 0.8101273926125088, "percentage": 81.01, "elapsed_time": "1:23:58", "remaining_time": "0:19:40", "throughput": 7905.08, "total_tokens": 39829952} +{"current_steps": 12660, "total_steps": 15621, "loss": 0.3505, "lr": 2.111919206449767e-07, "epoch": 0.8104474745534856, "percentage": 81.04, "elapsed_time": "1:23:59", "remaining_time": "0:19:38", "throughput": 7907.1, "total_tokens": 39845376} +{"current_steps": 12665, "total_steps": 15621, "loss": 0.4955, "lr": 2.1050563443558922e-07, "epoch": 0.8107675564944626, "percentage": 81.08, "elapsed_time": "1:23:59", "remaining_time": "0:19:36", "throughput": 7909.25, "total_tokens": 39861696} +{"current_steps": 12670, "total_steps": 15621, "loss": 0.3, "lr": 2.0982033391918697e-07, "epoch": 0.8110876384354395, "percentage": 81.11, "elapsed_time": "1:24:00", "remaining_time": "0:19:34", "throughput": 7911.32, "total_tokens": 39877440} +{"current_steps": 12675, "total_steps": 15621, "loss": 0.3292, "lr": 2.0913601995137543e-07, "epoch": 0.8114077203764164, "percentage": 81.14, "elapsed_time": "1:24:01", "remaining_time": "0:19:31", "throughput": 7913.46, "total_tokens": 39893760} +{"current_steps": 12680, "total_steps": 15621, "loss": 0.2889, "lr": 2.084526933865287e-07, "epoch": 0.8117278023173933, "percentage": 81.17, "elapsed_time": "1:24:01", "remaining_time": "0:19:29", "throughput": 7915.54, "total_tokens": 39909568} +{"current_steps": 12685, "total_steps": 15621, "loss": 0.4667, "lr": 2.0777035507778817e-07, "epoch": 0.8120478842583702, "percentage": 81.2, "elapsed_time": "1:24:02", "remaining_time": "0:19:27", "throughput": 7917.35, "total_tokens": 39923648} +{"current_steps": 12690, "total_steps": 15621, "loss": 0.4268, "lr": 2.0708900587706135e-07, "epoch": 0.812367966199347, "percentage": 81.24, "elapsed_time": "1:24:03", "remaining_time": "0:19:24", "throughput": 7919.34, "total_tokens": 39939008} +{"current_steps": 12695, "total_steps": 15621, "loss": 0.3356, "lr": 2.0640864663502e-07, "epoch": 0.8126880481403239, "percentage": 81.27, "elapsed_time": "1:24:03", "remaining_time": "0:19:22", "throughput": 7921.44, "total_tokens": 39955072} +{"current_steps": 12700, "total_steps": 15621, "loss": 0.4563, "lr": 2.057292782011013e-07, "epoch": 0.8130081300813008, "percentage": 81.3, "elapsed_time": "1:24:04", "remaining_time": "0:19:20", "throughput": 7923.51, "total_tokens": 39970880} +{"current_steps": 12705, "total_steps": 15621, "loss": 0.3045, "lr": 2.0505090142350468e-07, "epoch": 0.8133282120222777, "percentage": 81.33, "elapsed_time": "1:24:05", "remaining_time": "0:19:17", "throughput": 7925.5, "total_tokens": 39986240} +{"current_steps": 12710, "total_steps": 15621, "loss": 0.3426, "lr": 2.0437351714919127e-07, "epoch": 0.8136482939632546, "percentage": 81.36, "elapsed_time": "1:24:05", "remaining_time": "0:19:15", "throughput": 7927.54, "total_tokens": 40001856} +{"current_steps": 12715, "total_steps": 15621, "loss": 0.3084, "lr": 2.0369712622388336e-07, "epoch": 0.8139683759042314, "percentage": 81.4, "elapsed_time": "1:24:06", "remaining_time": "0:19:13", "throughput": 7929.68, "total_tokens": 40018112} +{"current_steps": 12720, "total_steps": 15621, "loss": 0.2869, "lr": 2.0302172949206298e-07, "epoch": 0.8142884578452084, "percentage": 81.43, "elapsed_time": "1:24:07", "remaining_time": "0:19:11", "throughput": 7931.69, "total_tokens": 40033664} +{"current_steps": 12725, "total_steps": 15621, "loss": 0.3069, "lr": 2.0234732779697094e-07, "epoch": 0.8146085397861853, "percentage": 81.46, "elapsed_time": "1:24:07", "remaining_time": "0:19:08", "throughput": 7933.65, "total_tokens": 40048768} +{"current_steps": 12730, "total_steps": 15621, "loss": 0.3267, "lr": 2.016739219806056e-07, "epoch": 0.8149286217271622, "percentage": 81.49, "elapsed_time": "1:24:08", "remaining_time": "0:19:06", "throughput": 7935.51, "total_tokens": 40063232} +{"current_steps": 12735, "total_steps": 15621, "loss": 0.3839, "lr": 2.0100151288372215e-07, "epoch": 0.8152487036681391, "percentage": 81.52, "elapsed_time": "1:24:09", "remaining_time": "0:19:04", "throughput": 7937.61, "total_tokens": 40079296} +{"current_steps": 12740, "total_steps": 15621, "loss": 0.5609, "lr": 2.0033010134583084e-07, "epoch": 0.8155687856091159, "percentage": 81.56, "elapsed_time": "1:24:09", "remaining_time": "0:19:01", "throughput": 7939.66, "total_tokens": 40094976} +{"current_steps": 12745, "total_steps": 15621, "loss": 0.314, "lr": 1.9965968820519763e-07, "epoch": 0.8158888675500928, "percentage": 81.59, "elapsed_time": "1:24:10", "remaining_time": "0:18:59", "throughput": 7941.68, "total_tokens": 40110464} +{"current_steps": 12750, "total_steps": 15621, "loss": 0.4042, "lr": 1.9899027429884042e-07, "epoch": 0.8162089494910697, "percentage": 81.62, "elapsed_time": "1:24:11", "remaining_time": "0:18:57", "throughput": 7943.63, "total_tokens": 40125568} +{"current_steps": 12755, "total_steps": 15621, "loss": 0.4302, "lr": 1.983218604625305e-07, "epoch": 0.8165290314320466, "percentage": 81.65, "elapsed_time": "1:24:11", "remaining_time": "0:18:55", "throughput": 7945.71, "total_tokens": 40141440} +{"current_steps": 12760, "total_steps": 15621, "loss": 0.3288, "lr": 1.9765444753079096e-07, "epoch": 0.8168491133730235, "percentage": 81.68, "elapsed_time": "1:24:12", "remaining_time": "0:18:52", "throughput": 7947.64, "total_tokens": 40156416} +{"current_steps": 12765, "total_steps": 15621, "loss": 0.3985, "lr": 1.9698803633689408e-07, "epoch": 0.8171691953140003, "percentage": 81.72, "elapsed_time": "1:24:13", "remaining_time": "0:18:50", "throughput": 7949.8, "total_tokens": 40172928} +{"current_steps": 12770, "total_steps": 15621, "loss": 0.2404, "lr": 1.963226277128619e-07, "epoch": 0.8174892772549772, "percentage": 81.75, "elapsed_time": "1:24:13", "remaining_time": "0:18:48", "throughput": 7951.77, "total_tokens": 40188096} +{"current_steps": 12775, "total_steps": 15621, "loss": 0.3559, "lr": 1.956582224894655e-07, "epoch": 0.8178093591959542, "percentage": 81.78, "elapsed_time": "1:24:14", "remaining_time": "0:18:46", "throughput": 7953.85, "total_tokens": 40204032} +{"current_steps": 12780, "total_steps": 15621, "loss": 0.369, "lr": 1.949948214962227e-07, "epoch": 0.8181294411369311, "percentage": 81.81, "elapsed_time": "1:24:15", "remaining_time": "0:18:43", "throughput": 7955.78, "total_tokens": 40218944} +{"current_steps": 12785, "total_steps": 15621, "loss": 0.358, "lr": 1.943324255613964e-07, "epoch": 0.818449523077908, "percentage": 81.84, "elapsed_time": "1:24:16", "remaining_time": "0:18:41", "throughput": 7957.94, "total_tokens": 40235456} +{"current_steps": 12790, "total_steps": 15621, "loss": 0.4564, "lr": 1.936710355119967e-07, "epoch": 0.8187696050188848, "percentage": 81.88, "elapsed_time": "1:24:16", "remaining_time": "0:18:39", "throughput": 7959.83, "total_tokens": 40250176} +{"current_steps": 12795, "total_steps": 15621, "loss": 0.3312, "lr": 1.9301065217377655e-07, "epoch": 0.8190896869598617, "percentage": 81.91, "elapsed_time": "1:24:17", "remaining_time": "0:18:36", "throughput": 7961.81, "total_tokens": 40265472} +{"current_steps": 12800, "total_steps": 15621, "loss": 0.3995, "lr": 1.9235127637123249e-07, "epoch": 0.8194097689008386, "percentage": 81.94, "elapsed_time": "1:24:18", "remaining_time": "0:18:34", "throughput": 7963.93, "total_tokens": 40281728} +{"current_steps": 12805, "total_steps": 15621, "loss": 0.3221, "lr": 1.9169290892760225e-07, "epoch": 0.8197298508418155, "percentage": 81.97, "elapsed_time": "1:24:18", "remaining_time": "0:18:32", "throughput": 7965.87, "total_tokens": 40296768} +{"current_steps": 12810, "total_steps": 15621, "loss": 0.3295, "lr": 1.91035550664866e-07, "epoch": 0.8200499327827924, "percentage": 82.0, "elapsed_time": "1:24:19", "remaining_time": "0:18:30", "throughput": 7967.74, "total_tokens": 40311488} +{"current_steps": 12815, "total_steps": 15621, "loss": 0.3238, "lr": 1.903792024037433e-07, "epoch": 0.8203700147237692, "percentage": 82.04, "elapsed_time": "1:24:20", "remaining_time": "0:18:27", "throughput": 7969.78, "total_tokens": 40327232} +{"current_steps": 12820, "total_steps": 15621, "loss": 0.4338, "lr": 1.8972386496369185e-07, "epoch": 0.8206900966647461, "percentage": 82.07, "elapsed_time": "1:24:20", "remaining_time": "0:18:25", "throughput": 7971.99, "total_tokens": 40344064} +{"current_steps": 12825, "total_steps": 15621, "loss": 0.3917, "lr": 1.89069539162909e-07, "epoch": 0.8210101786057231, "percentage": 82.1, "elapsed_time": "1:24:21", "remaining_time": "0:18:23", "throughput": 7973.92, "total_tokens": 40359040} +{"current_steps": 12830, "total_steps": 15621, "loss": 0.4034, "lr": 1.8841622581832783e-07, "epoch": 0.8213302605467, "percentage": 82.13, "elapsed_time": "1:24:22", "remaining_time": "0:18:21", "throughput": 7976.21, "total_tokens": 40376384} +{"current_steps": 12835, "total_steps": 15621, "loss": 0.5928, "lr": 1.8776392574561783e-07, "epoch": 0.8216503424876769, "percentage": 82.17, "elapsed_time": "1:24:22", "remaining_time": "0:18:18", "throughput": 7978.23, "total_tokens": 40391936} +{"current_steps": 12840, "total_steps": 15621, "loss": 0.4702, "lr": 1.8711263975918322e-07, "epoch": 0.8219704244286538, "percentage": 82.2, "elapsed_time": "1:24:23", "remaining_time": "0:18:16", "throughput": 7980.46, "total_tokens": 40408832} +{"current_steps": 12845, "total_steps": 15621, "loss": 0.4516, "lr": 1.8646236867216215e-07, "epoch": 0.8222905063696306, "percentage": 82.23, "elapsed_time": "1:24:24", "remaining_time": "0:18:14", "throughput": 7982.61, "total_tokens": 40425280} +{"current_steps": 12850, "total_steps": 15621, "loss": 0.3451, "lr": 1.8581311329642591e-07, "epoch": 0.8226105883106075, "percentage": 82.26, "elapsed_time": "1:24:24", "remaining_time": "0:18:12", "throughput": 7984.62, "total_tokens": 40440832} +{"current_steps": 12855, "total_steps": 15621, "loss": 0.2711, "lr": 1.8516487444257723e-07, "epoch": 0.8229306702515844, "percentage": 82.29, "elapsed_time": "1:24:25", "remaining_time": "0:18:09", "throughput": 7986.98, "total_tokens": 40458624} +{"current_steps": 12860, "total_steps": 15621, "loss": 0.4068, "lr": 1.8451765291995004e-07, "epoch": 0.8232507521925613, "percentage": 82.33, "elapsed_time": "1:24:26", "remaining_time": "0:18:07", "throughput": 7989.08, "total_tokens": 40474688} +{"current_steps": 12865, "total_steps": 15621, "loss": 0.3591, "lr": 1.8387144953660806e-07, "epoch": 0.8235708341335382, "percentage": 82.36, "elapsed_time": "1:24:26", "remaining_time": "0:18:05", "throughput": 7991.19, "total_tokens": 40490816} +{"current_steps": 12870, "total_steps": 15621, "loss": 0.4492, "lr": 1.832262650993437e-07, "epoch": 0.823890916074515, "percentage": 82.39, "elapsed_time": "1:24:27", "remaining_time": "0:18:03", "throughput": 7993.16, "total_tokens": 40506112} +{"current_steps": 12875, "total_steps": 15621, "loss": 0.2973, "lr": 1.825821004136774e-07, "epoch": 0.8242109980154919, "percentage": 82.42, "elapsed_time": "1:24:28", "remaining_time": "0:18:00", "throughput": 7995.13, "total_tokens": 40521344} +{"current_steps": 12880, "total_steps": 15621, "loss": 0.2799, "lr": 1.819389562838559e-07, "epoch": 0.8245310799564689, "percentage": 82.45, "elapsed_time": "1:24:28", "remaining_time": "0:17:58", "throughput": 7997.17, "total_tokens": 40537024} +{"current_steps": 12885, "total_steps": 15621, "loss": 0.3058, "lr": 1.8129683351285319e-07, "epoch": 0.8248511618974458, "percentage": 82.49, "elapsed_time": "1:24:29", "remaining_time": "0:17:56", "throughput": 7999.2, "total_tokens": 40552640} +{"current_steps": 12890, "total_steps": 15621, "loss": 0.3209, "lr": 1.8065573290236626e-07, "epoch": 0.8251712438384227, "percentage": 82.52, "elapsed_time": "1:24:30", "remaining_time": "0:17:54", "throughput": 8001.18, "total_tokens": 40568000} +{"current_steps": 12895, "total_steps": 15621, "loss": 0.3806, "lr": 1.8001565525281682e-07, "epoch": 0.8254913257793995, "percentage": 82.55, "elapsed_time": "1:24:30", "remaining_time": "0:17:52", "throughput": 8003.4, "total_tokens": 40584960} +{"current_steps": 12900, "total_steps": 15621, "loss": 0.3707, "lr": 1.793766013633493e-07, "epoch": 0.8258114077203764, "percentage": 82.58, "elapsed_time": "1:24:31", "remaining_time": "0:17:49", "throughput": 8005.43, "total_tokens": 40600704} +{"current_steps": 12905, "total_steps": 15621, "loss": 0.3865, "lr": 1.7873857203183074e-07, "epoch": 0.8261314896613533, "percentage": 82.61, "elapsed_time": "1:24:32", "remaining_time": "0:17:47", "throughput": 8007.39, "total_tokens": 40615872} +{"current_steps": 12910, "total_steps": 15621, "loss": 0.4632, "lr": 1.7810156805484733e-07, "epoch": 0.8264515716023302, "percentage": 82.65, "elapsed_time": "1:24:32", "remaining_time": "0:17:45", "throughput": 8009.61, "total_tokens": 40632640} +{"current_steps": 12915, "total_steps": 15621, "loss": 0.3007, "lr": 1.7746559022770612e-07, "epoch": 0.8267716535433071, "percentage": 82.68, "elapsed_time": "1:24:33", "remaining_time": "0:17:43", "throughput": 8011.6, "total_tokens": 40648064} +{"current_steps": 12920, "total_steps": 15621, "loss": 0.3833, "lr": 1.7683063934443342e-07, "epoch": 0.8270917354842839, "percentage": 82.71, "elapsed_time": "1:24:34", "remaining_time": "0:17:40", "throughput": 8013.78, "total_tokens": 40664704} +{"current_steps": 12925, "total_steps": 15621, "loss": 0.4074, "lr": 1.7619671619777277e-07, "epoch": 0.8274118174252608, "percentage": 82.74, "elapsed_time": "1:24:35", "remaining_time": "0:17:38", "throughput": 8015.9, "total_tokens": 40681024} +{"current_steps": 12930, "total_steps": 15621, "loss": 0.4121, "lr": 1.7556382157918404e-07, "epoch": 0.8277318993662378, "percentage": 82.77, "elapsed_time": "1:24:35", "remaining_time": "0:17:36", "throughput": 8017.81, "total_tokens": 40695936} +{"current_steps": 12935, "total_steps": 15621, "loss": 0.3177, "lr": 1.7493195627884427e-07, "epoch": 0.8280519813072147, "percentage": 82.81, "elapsed_time": "1:24:36", "remaining_time": "0:17:34", "throughput": 8020.11, "total_tokens": 40713472} +{"current_steps": 12940, "total_steps": 15621, "loss": 0.3141, "lr": 1.7430112108564465e-07, "epoch": 0.8283720632481916, "percentage": 82.84, "elapsed_time": "1:24:37", "remaining_time": "0:17:31", "throughput": 8022.16, "total_tokens": 40729344} +{"current_steps": 12945, "total_steps": 15621, "loss": 0.3983, "lr": 1.736713167871896e-07, "epoch": 0.8286921451891684, "percentage": 82.87, "elapsed_time": "1:24:37", "remaining_time": "0:17:29", "throughput": 8024.3, "total_tokens": 40745856} +{"current_steps": 12950, "total_steps": 15621, "loss": 0.2973, "lr": 1.7304254416979803e-07, "epoch": 0.8290122271301453, "percentage": 82.9, "elapsed_time": "1:24:38", "remaining_time": "0:17:27", "throughput": 8026.39, "total_tokens": 40761920} +{"current_steps": 12955, "total_steps": 15621, "loss": 0.263, "lr": 1.7241480401849963e-07, "epoch": 0.8293323090711222, "percentage": 82.93, "elapsed_time": "1:24:39", "remaining_time": "0:17:25", "throughput": 8028.33, "total_tokens": 40776960} +{"current_steps": 12960, "total_steps": 15621, "loss": 0.3413, "lr": 1.7178809711703524e-07, "epoch": 0.8296523910120991, "percentage": 82.97, "elapsed_time": "1:24:39", "remaining_time": "0:17:23", "throughput": 8030.28, "total_tokens": 40792192} +{"current_steps": 12965, "total_steps": 15621, "loss": 0.36, "lr": 1.7116242424785599e-07, "epoch": 0.829972472953076, "percentage": 83.0, "elapsed_time": "1:24:40", "remaining_time": "0:17:20", "throughput": 8032.36, "total_tokens": 40808256} +{"current_steps": 12970, "total_steps": 15621, "loss": 0.4272, "lr": 1.7053778619212166e-07, "epoch": 0.8302925548940528, "percentage": 83.03, "elapsed_time": "1:24:41", "remaining_time": "0:17:18", "throughput": 8034.3, "total_tokens": 40823424} +{"current_steps": 12975, "total_steps": 15621, "loss": 0.4132, "lr": 1.6991418372970022e-07, "epoch": 0.8306126368350297, "percentage": 83.06, "elapsed_time": "1:24:41", "remaining_time": "0:17:16", "throughput": 8036.6, "total_tokens": 40840960} +{"current_steps": 12980, "total_steps": 15621, "loss": 0.3849, "lr": 1.6929161763916666e-07, "epoch": 0.8309327187760066, "percentage": 83.09, "elapsed_time": "1:24:42", "remaining_time": "0:17:14", "throughput": 8038.76, "total_tokens": 40857536} +{"current_steps": 12985, "total_steps": 15621, "loss": 0.3582, "lr": 1.686700886978021e-07, "epoch": 0.8312528007169836, "percentage": 83.13, "elapsed_time": "1:24:43", "remaining_time": "0:17:11", "throughput": 8040.94, "total_tokens": 40874240} +{"current_steps": 12990, "total_steps": 15621, "loss": 0.3579, "lr": 1.6804959768159266e-07, "epoch": 0.8315728826579605, "percentage": 83.16, "elapsed_time": "1:24:43", "remaining_time": "0:17:09", "throughput": 8042.81, "total_tokens": 40888960} +{"current_steps": 12995, "total_steps": 15621, "loss": 0.5373, "lr": 1.674301453652287e-07, "epoch": 0.8318929645989374, "percentage": 83.19, "elapsed_time": "1:24:44", "remaining_time": "0:17:07", "throughput": 8044.8, "total_tokens": 40904512} +{"current_steps": 13000, "total_steps": 15621, "loss": 0.2969, "lr": 1.6681173252210378e-07, "epoch": 0.8322130465399142, "percentage": 83.22, "elapsed_time": "1:24:45", "remaining_time": "0:17:05", "throughput": 8047.07, "total_tokens": 40921856} +{"current_steps": 13005, "total_steps": 15621, "loss": 0.3801, "lr": 1.6619435992431342e-07, "epoch": 0.8325331284808911, "percentage": 83.25, "elapsed_time": "1:24:46", "remaining_time": "0:17:03", "throughput": 8049.28, "total_tokens": 40938752} +{"current_steps": 13010, "total_steps": 15621, "loss": 0.3026, "lr": 1.6557802834265466e-07, "epoch": 0.832853210421868, "percentage": 83.29, "elapsed_time": "1:24:46", "remaining_time": "0:17:00", "throughput": 8051.23, "total_tokens": 40954048} +{"current_steps": 13015, "total_steps": 15621, "loss": 0.3634, "lr": 1.649627385466248e-07, "epoch": 0.8331732923628449, "percentage": 83.32, "elapsed_time": "1:24:47", "remaining_time": "0:16:58", "throughput": 8053.69, "total_tokens": 40972672} +{"current_steps": 13020, "total_steps": 15621, "loss": 0.2467, "lr": 1.643484913044202e-07, "epoch": 0.8334933743038218, "percentage": 83.35, "elapsed_time": "1:24:48", "remaining_time": "0:16:56", "throughput": 8055.61, "total_tokens": 40987648} +{"current_steps": 13025, "total_steps": 15621, "loss": 0.3171, "lr": 1.6373528738293564e-07, "epoch": 0.8338134562447986, "percentage": 83.38, "elapsed_time": "1:24:48", "remaining_time": "0:16:54", "throughput": 8057.64, "total_tokens": 41003328} +{"current_steps": 13030, "total_steps": 15621, "loss": 0.2939, "lr": 1.6312312754776404e-07, "epoch": 0.8341335381857755, "percentage": 83.41, "elapsed_time": "1:24:49", "remaining_time": "0:16:52", "throughput": 8059.6, "total_tokens": 41018624} +{"current_steps": 13035, "total_steps": 15621, "loss": 0.3318, "lr": 1.6251201256319357e-07, "epoch": 0.8344536201267524, "percentage": 83.45, "elapsed_time": "1:24:50", "remaining_time": "0:16:49", "throughput": 8061.66, "total_tokens": 41034624} +{"current_steps": 13040, "total_steps": 15621, "loss": 0.3699, "lr": 1.619019431922083e-07, "epoch": 0.8347737020677294, "percentage": 83.48, "elapsed_time": "1:24:50", "remaining_time": "0:16:47", "throughput": 8063.58, "total_tokens": 41049664} +{"current_steps": 13045, "total_steps": 15621, "loss": 0.3494, "lr": 1.6129292019648754e-07, "epoch": 0.8350937840087063, "percentage": 83.51, "elapsed_time": "1:24:51", "remaining_time": "0:16:45", "throughput": 8065.75, "total_tokens": 41066368} +{"current_steps": 13050, "total_steps": 15621, "loss": 0.2975, "lr": 1.606849443364038e-07, "epoch": 0.8354138659496831, "percentage": 83.54, "elapsed_time": "1:24:52", "remaining_time": "0:16:43", "throughput": 8067.76, "total_tokens": 41082048} +{"current_steps": 13055, "total_steps": 15621, "loss": 0.3425, "lr": 1.6007801637102104e-07, "epoch": 0.83573394789066, "percentage": 83.57, "elapsed_time": "1:24:52", "remaining_time": "0:16:41", "throughput": 8069.82, "total_tokens": 41098048} +{"current_steps": 13060, "total_steps": 15621, "loss": 0.3858, "lr": 1.594721370580969e-07, "epoch": 0.8360540298316369, "percentage": 83.61, "elapsed_time": "1:24:53", "remaining_time": "0:16:38", "throughput": 8071.69, "total_tokens": 41112768} +{"current_steps": 13065, "total_steps": 15621, "loss": 0.4241, "lr": 1.588673071540788e-07, "epoch": 0.8363741117726138, "percentage": 83.64, "elapsed_time": "1:24:54", "remaining_time": "0:16:36", "throughput": 8073.57, "total_tokens": 41127488} +{"current_steps": 13070, "total_steps": 15621, "loss": 0.3195, "lr": 1.5826352741410332e-07, "epoch": 0.8366941937135907, "percentage": 83.67, "elapsed_time": "1:24:54", "remaining_time": "0:16:34", "throughput": 8075.44, "total_tokens": 41142272} +{"current_steps": 13075, "total_steps": 15621, "loss": 0.2947, "lr": 1.576607985919971e-07, "epoch": 0.8370142756545675, "percentage": 83.7, "elapsed_time": "1:24:55", "remaining_time": "0:16:32", "throughput": 8077.45, "total_tokens": 41157952} +{"current_steps": 13080, "total_steps": 15621, "loss": 0.3547, "lr": 1.57059121440274e-07, "epoch": 0.8373343575955444, "percentage": 83.73, "elapsed_time": "1:24:56", "remaining_time": "0:16:29", "throughput": 8079.36, "total_tokens": 41172992} +{"current_steps": 13085, "total_steps": 15621, "loss": 0.3823, "lr": 1.56458496710135e-07, "epoch": 0.8376544395365213, "percentage": 83.77, "elapsed_time": "1:24:56", "remaining_time": "0:16:27", "throughput": 8081.22, "total_tokens": 41187776} +{"current_steps": 13090, "total_steps": 15621, "loss": 0.3403, "lr": 1.5585892515146716e-07, "epoch": 0.8379745214774983, "percentage": 83.8, "elapsed_time": "1:24:57", "remaining_time": "0:16:25", "throughput": 8083.38, "total_tokens": 41204416} +{"current_steps": 13095, "total_steps": 15621, "loss": 0.4214, "lr": 1.5526040751284253e-07, "epoch": 0.8382946034184752, "percentage": 83.83, "elapsed_time": "1:24:58", "remaining_time": "0:16:23", "throughput": 8085.4, "total_tokens": 41220032} +{"current_steps": 13100, "total_steps": 15621, "loss": 0.3168, "lr": 1.546629445415174e-07, "epoch": 0.838614685359452, "percentage": 83.86, "elapsed_time": "1:24:58", "remaining_time": "0:16:21", "throughput": 8087.42, "total_tokens": 41235776} +{"current_steps": 13105, "total_steps": 15621, "loss": 0.3724, "lr": 1.5406653698343141e-07, "epoch": 0.8389347673004289, "percentage": 83.89, "elapsed_time": "1:24:59", "remaining_time": "0:16:19", "throughput": 8089.54, "total_tokens": 41252160} +{"current_steps": 13110, "total_steps": 15621, "loss": 0.3591, "lr": 1.5347118558320637e-07, "epoch": 0.8392548492414058, "percentage": 83.93, "elapsed_time": "1:25:00", "remaining_time": "0:16:16", "throughput": 8091.74, "total_tokens": 41269056} +{"current_steps": 13115, "total_steps": 15621, "loss": 0.3632, "lr": 1.5287689108414558e-07, "epoch": 0.8395749311823827, "percentage": 83.96, "elapsed_time": "1:25:00", "remaining_time": "0:16:14", "throughput": 8093.83, "total_tokens": 41285312} +{"current_steps": 13120, "total_steps": 15621, "loss": 0.3374, "lr": 1.5228365422823242e-07, "epoch": 0.8398950131233596, "percentage": 83.99, "elapsed_time": "1:25:01", "remaining_time": "0:16:12", "throughput": 8095.84, "total_tokens": 41300992} +{"current_steps": 13125, "total_steps": 15621, "loss": 0.2637, "lr": 1.5169147575613038e-07, "epoch": 0.8402150950643364, "percentage": 84.02, "elapsed_time": "1:25:02", "remaining_time": "0:16:10", "throughput": 8098.04, "total_tokens": 41317952} +{"current_steps": 13130, "total_steps": 15621, "loss": 0.297, "lr": 1.5110035640718098e-07, "epoch": 0.8405351770053133, "percentage": 84.05, "elapsed_time": "1:25:02", "remaining_time": "0:16:08", "throughput": 8100.02, "total_tokens": 41333440} +{"current_steps": 13135, "total_steps": 15621, "loss": 0.3665, "lr": 1.5051029691940387e-07, "epoch": 0.8408552589462902, "percentage": 84.09, "elapsed_time": "1:25:03", "remaining_time": "0:16:05", "throughput": 8102.05, "total_tokens": 41349312} +{"current_steps": 13140, "total_steps": 15621, "loss": 0.356, "lr": 1.4992129802949515e-07, "epoch": 0.8411753408872671, "percentage": 84.12, "elapsed_time": "1:25:04", "remaining_time": "0:16:03", "throughput": 8103.94, "total_tokens": 41364288} +{"current_steps": 13145, "total_steps": 15621, "loss": 0.2884, "lr": 1.4933336047282696e-07, "epoch": 0.8414954228282441, "percentage": 84.15, "elapsed_time": "1:25:04", "remaining_time": "0:16:01", "throughput": 8105.93, "total_tokens": 41379904} +{"current_steps": 13150, "total_steps": 15621, "loss": 0.3481, "lr": 1.4874648498344579e-07, "epoch": 0.841815504769221, "percentage": 84.18, "elapsed_time": "1:25:05", "remaining_time": "0:15:59", "throughput": 8107.77, "total_tokens": 41394432} +{"current_steps": 13155, "total_steps": 15621, "loss": 0.3485, "lr": 1.4816067229407348e-07, "epoch": 0.8421355867101978, "percentage": 84.21, "elapsed_time": "1:25:06", "remaining_time": "0:15:57", "throughput": 8109.74, "total_tokens": 41409984} +{"current_steps": 13160, "total_steps": 15621, "loss": 0.3051, "lr": 1.4757592313610322e-07, "epoch": 0.8424556686511747, "percentage": 84.25, "elapsed_time": "1:25:06", "remaining_time": "0:15:55", "throughput": 8111.78, "total_tokens": 41425984} +{"current_steps": 13165, "total_steps": 15621, "loss": 0.3312, "lr": 1.4699223823960128e-07, "epoch": 0.8427757505921516, "percentage": 84.28, "elapsed_time": "1:25:07", "remaining_time": "0:15:52", "throughput": 8113.8, "total_tokens": 41441920} +{"current_steps": 13170, "total_steps": 15621, "loss": 0.3389, "lr": 1.4640961833330579e-07, "epoch": 0.8430958325331285, "percentage": 84.31, "elapsed_time": "1:25:08", "remaining_time": "0:15:50", "throughput": 8115.81, "total_tokens": 41457664} +{"current_steps": 13175, "total_steps": 15621, "loss": 0.2518, "lr": 1.4582806414462378e-07, "epoch": 0.8434159144741054, "percentage": 84.34, "elapsed_time": "1:25:08", "remaining_time": "0:15:48", "throughput": 8117.74, "total_tokens": 41472832} +{"current_steps": 13180, "total_steps": 15621, "loss": 0.33, "lr": 1.4524757639963258e-07, "epoch": 0.8437359964150822, "percentage": 84.37, "elapsed_time": "1:25:09", "remaining_time": "0:15:46", "throughput": 8120.03, "total_tokens": 41490368} +{"current_steps": 13185, "total_steps": 15621, "loss": 0.4397, "lr": 1.4466815582307845e-07, "epoch": 0.8440560783560591, "percentage": 84.41, "elapsed_time": "1:25:10", "remaining_time": "0:15:44", "throughput": 8122.12, "total_tokens": 41506624} +{"current_steps": 13190, "total_steps": 15621, "loss": 0.251, "lr": 1.440898031383746e-07, "epoch": 0.844376160297036, "percentage": 84.44, "elapsed_time": "1:25:11", "remaining_time": "0:15:41", "throughput": 8124.26, "total_tokens": 41523264} +{"current_steps": 13195, "total_steps": 15621, "loss": 0.3803, "lr": 1.4351251906760064e-07, "epoch": 0.844696242238013, "percentage": 84.47, "elapsed_time": "1:25:11", "remaining_time": "0:15:39", "throughput": 8126.25, "total_tokens": 41538944} +{"current_steps": 13200, "total_steps": 15621, "loss": 0.3939, "lr": 1.4293630433150317e-07, "epoch": 0.8450163241789899, "percentage": 84.5, "elapsed_time": "1:25:12", "remaining_time": "0:15:37", "throughput": 8128.29, "total_tokens": 41554880} +{"current_steps": 13205, "total_steps": 15621, "loss": 0.4367, "lr": 1.423611596494927e-07, "epoch": 0.8453364061199667, "percentage": 84.53, "elapsed_time": "1:25:13", "remaining_time": "0:15:35", "throughput": 8130.1, "total_tokens": 41569280} +{"current_steps": 13210, "total_steps": 15621, "loss": 0.3546, "lr": 1.4178708573964438e-07, "epoch": 0.8456564880609436, "percentage": 84.57, "elapsed_time": "1:25:13", "remaining_time": "0:15:33", "throughput": 8132.03, "total_tokens": 41584576} +{"current_steps": 13215, "total_steps": 15621, "loss": 0.3589, "lr": 1.4121408331869566e-07, "epoch": 0.8459765700019205, "percentage": 84.6, "elapsed_time": "1:25:14", "remaining_time": "0:15:31", "throughput": 8133.99, "total_tokens": 41600000} +{"current_steps": 13220, "total_steps": 15621, "loss": 0.3603, "lr": 1.406421531020474e-07, "epoch": 0.8462966519428974, "percentage": 84.63, "elapsed_time": "1:25:14", "remaining_time": "0:15:28", "throughput": 8135.89, "total_tokens": 41615040} +{"current_steps": 13225, "total_steps": 15621, "loss": 0.3551, "lr": 1.4007129580376097e-07, "epoch": 0.8466167338838743, "percentage": 84.66, "elapsed_time": "1:25:15", "remaining_time": "0:15:26", "throughput": 8137.81, "total_tokens": 41630208} +{"current_steps": 13230, "total_steps": 15621, "loss": 0.3672, "lr": 1.3950151213655847e-07, "epoch": 0.8469368158248511, "percentage": 84.69, "elapsed_time": "1:25:16", "remaining_time": "0:15:24", "throughput": 8139.75, "total_tokens": 41645440} +{"current_steps": 13235, "total_steps": 15621, "loss": 0.3281, "lr": 1.389328028118214e-07, "epoch": 0.847256897765828, "percentage": 84.73, "elapsed_time": "1:25:16", "remaining_time": "0:15:22", "throughput": 8141.75, "total_tokens": 41661184} +{"current_steps": 13240, "total_steps": 15621, "loss": 0.358, "lr": 1.3836516853959e-07, "epoch": 0.8475769797068049, "percentage": 84.76, "elapsed_time": "1:25:17", "remaining_time": "0:15:20", "throughput": 8143.66, "total_tokens": 41676224} +{"current_steps": 13245, "total_steps": 15621, "loss": 0.308, "lr": 1.3779861002856242e-07, "epoch": 0.8478970616477818, "percentage": 84.79, "elapsed_time": "1:25:18", "remaining_time": "0:15:18", "throughput": 8145.49, "total_tokens": 41690816} +{"current_steps": 13250, "total_steps": 15621, "loss": 0.3357, "lr": 1.3723312798609366e-07, "epoch": 0.8482171435887588, "percentage": 84.82, "elapsed_time": "1:25:18", "remaining_time": "0:15:16", "throughput": 8147.52, "total_tokens": 41706688} +{"current_steps": 13255, "total_steps": 15621, "loss": 0.349, "lr": 1.3666872311819455e-07, "epoch": 0.8485372255297357, "percentage": 84.85, "elapsed_time": "1:25:19", "remaining_time": "0:15:13", "throughput": 8149.45, "total_tokens": 41721920} +{"current_steps": 13260, "total_steps": 15621, "loss": 0.285, "lr": 1.361053961295312e-07, "epoch": 0.8488573074707125, "percentage": 84.89, "elapsed_time": "1:25:20", "remaining_time": "0:15:11", "throughput": 8151.52, "total_tokens": 41738112} +{"current_steps": 13265, "total_steps": 15621, "loss": 0.3445, "lr": 1.3554314772342412e-07, "epoch": 0.8491773894116894, "percentage": 84.92, "elapsed_time": "1:25:20", "remaining_time": "0:15:09", "throughput": 8153.51, "total_tokens": 41753792} +{"current_steps": 13270, "total_steps": 15621, "loss": 0.3294, "lr": 1.349819786018469e-07, "epoch": 0.8494974713526663, "percentage": 84.95, "elapsed_time": "1:25:21", "remaining_time": "0:15:07", "throughput": 8155.79, "total_tokens": 41771328} +{"current_steps": 13275, "total_steps": 15621, "loss": 0.3734, "lr": 1.3442188946542566e-07, "epoch": 0.8498175532936432, "percentage": 84.98, "elapsed_time": "1:25:22", "remaining_time": "0:15:05", "throughput": 8157.9, "total_tokens": 41787712} +{"current_steps": 13280, "total_steps": 15621, "loss": 0.3099, "lr": 1.338628810134388e-07, "epoch": 0.85013763523462, "percentage": 85.01, "elapsed_time": "1:25:23", "remaining_time": "0:15:03", "throughput": 8159.83, "total_tokens": 41803072} +{"current_steps": 13285, "total_steps": 15621, "loss": 0.3624, "lr": 1.3330495394381435e-07, "epoch": 0.8504577171755969, "percentage": 85.05, "elapsed_time": "1:25:23", "remaining_time": "0:15:00", "throughput": 8161.82, "total_tokens": 41818688} +{"current_steps": 13290, "total_steps": 15621, "loss": 0.2868, "lr": 1.3274810895313083e-07, "epoch": 0.8507777991165738, "percentage": 85.08, "elapsed_time": "1:25:24", "remaining_time": "0:14:58", "throughput": 8163.73, "total_tokens": 41833792} +{"current_steps": 13294, "total_steps": 15621, "eval_loss": 0.35909759998321533, "epoch": 0.8510338646693554, "percentage": 85.1, "elapsed_time": "1:26:15", "remaining_time": "0:15:05", "throughput": 8085.49, "total_tokens": 41847872} +{"current_steps": 13295, "total_steps": 15621, "loss": 0.3846, "lr": 1.321923467366164e-07, "epoch": 0.8510978810575507, "percentage": 85.11, "elapsed_time": "1:29:30", "remaining_time": "0:15:39", "throughput": 7792.44, "total_tokens": 41850880} +{"current_steps": 13300, "total_steps": 15621, "loss": 0.183, "lr": 1.3163766798814603e-07, "epoch": 0.8514179629985277, "percentage": 85.14, "elapsed_time": "1:29:31", "remaining_time": "0:15:37", "throughput": 7794.36, "total_tokens": 41866560} +{"current_steps": 13305, "total_steps": 15621, "loss": 0.3041, "lr": 1.3108407340024264e-07, "epoch": 0.8517380449395046, "percentage": 85.17, "elapsed_time": "1:29:32", "remaining_time": "0:15:35", "throughput": 7796.3, "total_tokens": 41882240} +{"current_steps": 13310, "total_steps": 15621, "loss": 0.3421, "lr": 1.3053156366407613e-07, "epoch": 0.8520581268804814, "percentage": 85.21, "elapsed_time": "1:29:32", "remaining_time": "0:15:32", "throughput": 7798.38, "total_tokens": 41898880} +{"current_steps": 13315, "total_steps": 15621, "loss": 0.2428, "lr": 1.2998013946946119e-07, "epoch": 0.8523782088214583, "percentage": 85.24, "elapsed_time": "1:29:33", "remaining_time": "0:15:30", "throughput": 7800.52, "total_tokens": 41915968} +{"current_steps": 13320, "total_steps": 15621, "loss": 0.3499, "lr": 1.2942980150485706e-07, "epoch": 0.8526982907624352, "percentage": 85.27, "elapsed_time": "1:29:34", "remaining_time": "0:15:28", "throughput": 7802.35, "total_tokens": 41930816} +{"current_steps": 13325, "total_steps": 15621, "loss": 0.3192, "lr": 1.2888055045736723e-07, "epoch": 0.8530183727034121, "percentage": 85.3, "elapsed_time": "1:29:34", "remaining_time": "0:15:26", "throughput": 7804.38, "total_tokens": 41947200} +{"current_steps": 13330, "total_steps": 15621, "loss": 0.301, "lr": 1.283323870127384e-07, "epoch": 0.853338454644389, "percentage": 85.33, "elapsed_time": "1:29:35", "remaining_time": "0:15:23", "throughput": 7806.23, "total_tokens": 41962240} +{"current_steps": 13335, "total_steps": 15621, "loss": 0.3015, "lr": 1.2778531185535911e-07, "epoch": 0.8536585365853658, "percentage": 85.37, "elapsed_time": "1:29:36", "remaining_time": "0:15:21", "throughput": 7808.28, "total_tokens": 41978752} +{"current_steps": 13340, "total_steps": 15621, "loss": 0.3288, "lr": 1.2723932566825844e-07, "epoch": 0.8539786185263427, "percentage": 85.4, "elapsed_time": "1:29:36", "remaining_time": "0:15:19", "throughput": 7810.18, "total_tokens": 41994112} +{"current_steps": 13345, "total_steps": 15621, "loss": 0.294, "lr": 1.2669442913310723e-07, "epoch": 0.8542987004673196, "percentage": 85.43, "elapsed_time": "1:29:37", "remaining_time": "0:15:17", "throughput": 7812.2, "total_tokens": 42010432} +{"current_steps": 13350, "total_steps": 15621, "loss": 0.2745, "lr": 1.2615062293021506e-07, "epoch": 0.8546187824082965, "percentage": 85.46, "elapsed_time": "1:29:38", "remaining_time": "0:15:14", "throughput": 7814.11, "total_tokens": 42025984} +{"current_steps": 13355, "total_steps": 15621, "loss": 0.3147, "lr": 1.2560790773853025e-07, "epoch": 0.8549388643492735, "percentage": 85.49, "elapsed_time": "1:29:38", "remaining_time": "0:15:12", "throughput": 7815.94, "total_tokens": 42040832} +{"current_steps": 13360, "total_steps": 15621, "loss": 0.4083, "lr": 1.2506628423563915e-07, "epoch": 0.8552589462902503, "percentage": 85.53, "elapsed_time": "1:29:39", "remaining_time": "0:15:10", "throughput": 7818.02, "total_tokens": 42057536} +{"current_steps": 13365, "total_steps": 15621, "loss": 0.2828, "lr": 1.2452575309776493e-07, "epoch": 0.8555790282312272, "percentage": 85.56, "elapsed_time": "1:29:40", "remaining_time": "0:15:08", "throughput": 7819.96, "total_tokens": 42073152} +{"current_steps": 13370, "total_steps": 15621, "loss": 0.3032, "lr": 1.2398631499976732e-07, "epoch": 0.8558991101722041, "percentage": 85.59, "elapsed_time": "1:29:40", "remaining_time": "0:15:05", "throughput": 7821.85, "total_tokens": 42088512} +{"current_steps": 13375, "total_steps": 15621, "loss": 0.4253, "lr": 1.234479706151409e-07, "epoch": 0.856219192113181, "percentage": 85.62, "elapsed_time": "1:29:41", "remaining_time": "0:15:03", "throughput": 7823.7, "total_tokens": 42103552} +{"current_steps": 13380, "total_steps": 15621, "loss": 0.3577, "lr": 1.2291072061601503e-07, "epoch": 0.8565392740541579, "percentage": 85.65, "elapsed_time": "1:29:42", "remaining_time": "0:15:01", "throughput": 7825.73, "total_tokens": 42119872} +{"current_steps": 13385, "total_steps": 15621, "loss": 0.4374, "lr": 1.2237456567315264e-07, "epoch": 0.8568593559951347, "percentage": 85.69, "elapsed_time": "1:29:42", "remaining_time": "0:14:59", "throughput": 7827.86, "total_tokens": 42136832} +{"current_steps": 13390, "total_steps": 15621, "loss": 0.3158, "lr": 1.2183950645594944e-07, "epoch": 0.8571794379361116, "percentage": 85.72, "elapsed_time": "1:29:43", "remaining_time": "0:14:57", "throughput": 7829.84, "total_tokens": 42152896} +{"current_steps": 13395, "total_steps": 15621, "loss": 0.3555, "lr": 1.2130554363243318e-07, "epoch": 0.8574995198770885, "percentage": 85.75, "elapsed_time": "1:29:44", "remaining_time": "0:14:54", "throughput": 7831.69, "total_tokens": 42168064} +{"current_steps": 13400, "total_steps": 15621, "loss": 0.3738, "lr": 1.207726778692625e-07, "epoch": 0.8578196018180654, "percentage": 85.78, "elapsed_time": "1:29:44", "remaining_time": "0:14:52", "throughput": 7833.49, "total_tokens": 42182784} +{"current_steps": 13405, "total_steps": 15621, "loss": 0.3228, "lr": 1.2024090983172718e-07, "epoch": 0.8581396837590423, "percentage": 85.81, "elapsed_time": "1:29:45", "remaining_time": "0:14:50", "throughput": 7835.6, "total_tokens": 42199744} +{"current_steps": 13410, "total_steps": 15621, "loss": 0.3631, "lr": 1.1971024018374532e-07, "epoch": 0.8584597657000193, "percentage": 85.85, "elapsed_time": "1:29:46", "remaining_time": "0:14:48", "throughput": 7837.48, "total_tokens": 42215040} +{"current_steps": 13415, "total_steps": 15621, "loss": 0.3079, "lr": 1.1918066958786432e-07, "epoch": 0.8587798476409961, "percentage": 85.88, "elapsed_time": "1:29:46", "remaining_time": "0:14:45", "throughput": 7839.32, "total_tokens": 42230144} +{"current_steps": 13420, "total_steps": 15621, "loss": 0.3677, "lr": 1.1865219870525922e-07, "epoch": 0.859099929581973, "percentage": 85.91, "elapsed_time": "1:29:47", "remaining_time": "0:14:43", "throughput": 7841.34, "total_tokens": 42246528} +{"current_steps": 13425, "total_steps": 15621, "loss": 0.4245, "lr": 1.1812482819573222e-07, "epoch": 0.8594200115229499, "percentage": 85.94, "elapsed_time": "1:29:48", "remaining_time": "0:14:41", "throughput": 7843.4, "total_tokens": 42263168} +{"current_steps": 13430, "total_steps": 15621, "loss": 0.3877, "lr": 1.1759855871771163e-07, "epoch": 0.8597400934639268, "percentage": 85.97, "elapsed_time": "1:29:49", "remaining_time": "0:14:39", "throughput": 7845.34, "total_tokens": 42278912} +{"current_steps": 13435, "total_steps": 15621, "loss": 0.387, "lr": 1.1707339092825075e-07, "epoch": 0.8600601754049036, "percentage": 86.01, "elapsed_time": "1:29:49", "remaining_time": "0:14:36", "throughput": 7847.29, "total_tokens": 42294656} +{"current_steps": 13440, "total_steps": 15621, "loss": 0.3927, "lr": 1.1654932548302842e-07, "epoch": 0.8603802573458805, "percentage": 86.04, "elapsed_time": "1:29:50", "remaining_time": "0:14:34", "throughput": 7849.38, "total_tokens": 42311552} +{"current_steps": 13445, "total_steps": 15621, "loss": 0.365, "lr": 1.1602636303634595e-07, "epoch": 0.8607003392868574, "percentage": 86.07, "elapsed_time": "1:29:51", "remaining_time": "0:14:32", "throughput": 7851.36, "total_tokens": 42327552} +{"current_steps": 13450, "total_steps": 15621, "loss": 0.3526, "lr": 1.1550450424112801e-07, "epoch": 0.8610204212278343, "percentage": 86.1, "elapsed_time": "1:29:51", "remaining_time": "0:14:30", "throughput": 7853.31, "total_tokens": 42343360} +{"current_steps": 13455, "total_steps": 15621, "loss": 0.3455, "lr": 1.1498374974892178e-07, "epoch": 0.8613405031688112, "percentage": 86.13, "elapsed_time": "1:29:52", "remaining_time": "0:14:28", "throughput": 7855.39, "total_tokens": 42360064} +{"current_steps": 13460, "total_steps": 15621, "loss": 0.4371, "lr": 1.144641002098955e-07, "epoch": 0.8616605851097882, "percentage": 86.17, "elapsed_time": "1:29:53", "remaining_time": "0:14:25", "throughput": 7857.19, "total_tokens": 42374976} +{"current_steps": 13465, "total_steps": 15621, "loss": 0.3502, "lr": 1.1394555627283697e-07, "epoch": 0.861980667050765, "percentage": 86.2, "elapsed_time": "1:29:53", "remaining_time": "0:14:23", "throughput": 7859.27, "total_tokens": 42391616} +{"current_steps": 13470, "total_steps": 15621, "loss": 0.3075, "lr": 1.134281185851551e-07, "epoch": 0.8623007489917419, "percentage": 86.23, "elapsed_time": "1:29:54", "remaining_time": "0:14:21", "throughput": 7861.08, "total_tokens": 42406528} +{"current_steps": 13475, "total_steps": 15621, "loss": 0.2948, "lr": 1.1291178779287691e-07, "epoch": 0.8626208309327188, "percentage": 86.26, "elapsed_time": "1:29:55", "remaining_time": "0:14:19", "throughput": 7863.31, "total_tokens": 42424320} +{"current_steps": 13480, "total_steps": 15621, "loss": 0.3616, "lr": 1.1239656454064683e-07, "epoch": 0.8629409128736957, "percentage": 86.29, "elapsed_time": "1:29:55", "remaining_time": "0:14:17", "throughput": 7865.37, "total_tokens": 42440960} +{"current_steps": 13485, "total_steps": 15621, "loss": 0.2464, "lr": 1.1188244947172776e-07, "epoch": 0.8632609948146726, "percentage": 86.33, "elapsed_time": "1:29:56", "remaining_time": "0:14:14", "throughput": 7867.27, "total_tokens": 42456448} +{"current_steps": 13490, "total_steps": 15621, "loss": 0.3201, "lr": 1.1136944322799812e-07, "epoch": 0.8635810767556494, "percentage": 86.36, "elapsed_time": "1:29:57", "remaining_time": "0:14:12", "throughput": 7869.23, "total_tokens": 42472448} +{"current_steps": 13495, "total_steps": 15621, "loss": 0.3177, "lr": 1.1085754644995227e-07, "epoch": 0.8639011586966263, "percentage": 86.39, "elapsed_time": "1:29:57", "remaining_time": "0:14:10", "throughput": 7871.11, "total_tokens": 42487808} +{"current_steps": 13500, "total_steps": 15621, "loss": 0.3577, "lr": 1.1034675977669938e-07, "epoch": 0.8642212406376032, "percentage": 86.42, "elapsed_time": "1:29:58", "remaining_time": "0:14:08", "throughput": 7873.06, "total_tokens": 42503744} +{"current_steps": 13505, "total_steps": 15621, "loss": 0.6111, "lr": 1.0983708384596258e-07, "epoch": 0.8645413225785801, "percentage": 86.45, "elapsed_time": "1:29:59", "remaining_time": "0:14:05", "throughput": 7875.19, "total_tokens": 42520768} +{"current_steps": 13510, "total_steps": 15621, "loss": 0.3703, "lr": 1.0932851929407827e-07, "epoch": 0.864861404519557, "percentage": 86.49, "elapsed_time": "1:30:00", "remaining_time": "0:14:03", "throughput": 7877.24, "total_tokens": 42537408} +{"current_steps": 13515, "total_steps": 15621, "loss": 0.3583, "lr": 1.0882106675599534e-07, "epoch": 0.8651814864605339, "percentage": 86.52, "elapsed_time": "1:30:00", "remaining_time": "0:14:01", "throughput": 7879.25, "total_tokens": 42553728} +{"current_steps": 13520, "total_steps": 15621, "loss": 0.3226, "lr": 1.0831472686527409e-07, "epoch": 0.8655015684015108, "percentage": 86.55, "elapsed_time": "1:30:01", "remaining_time": "0:13:59", "throughput": 7881.1, "total_tokens": 42568896} +{"current_steps": 13525, "total_steps": 15621, "loss": 0.2985, "lr": 1.0780950025408586e-07, "epoch": 0.8658216503424877, "percentage": 86.58, "elapsed_time": "1:30:02", "remaining_time": "0:13:57", "throughput": 7882.94, "total_tokens": 42584000} +{"current_steps": 13530, "total_steps": 15621, "loss": 0.3884, "lr": 1.0730538755321217e-07, "epoch": 0.8661417322834646, "percentage": 86.61, "elapsed_time": "1:30:02", "remaining_time": "0:13:54", "throughput": 7884.92, "total_tokens": 42600192} +{"current_steps": 13535, "total_steps": 15621, "loss": 0.2997, "lr": 1.0680238939204334e-07, "epoch": 0.8664618142244415, "percentage": 86.65, "elapsed_time": "1:30:03", "remaining_time": "0:13:52", "throughput": 7886.66, "total_tokens": 42614656} +{"current_steps": 13540, "total_steps": 15621, "loss": 0.402, "lr": 1.0630050639857879e-07, "epoch": 0.8667818961654183, "percentage": 86.68, "elapsed_time": "1:30:04", "remaining_time": "0:13:50", "throughput": 7888.46, "total_tokens": 42629504} +{"current_steps": 13545, "total_steps": 15621, "loss": 0.3165, "lr": 1.0579973919942508e-07, "epoch": 0.8671019781063952, "percentage": 86.71, "elapsed_time": "1:30:04", "remaining_time": "0:13:48", "throughput": 7890.24, "total_tokens": 42644224} +{"current_steps": 13550, "total_steps": 15621, "loss": 0.2452, "lr": 1.0530008841979621e-07, "epoch": 0.8674220600473721, "percentage": 86.74, "elapsed_time": "1:30:05", "remaining_time": "0:13:46", "throughput": 7892.1, "total_tokens": 42659584} +{"current_steps": 13555, "total_steps": 15621, "loss": 0.272, "lr": 1.048015546835117e-07, "epoch": 0.867742141988349, "percentage": 86.77, "elapsed_time": "1:30:06", "remaining_time": "0:13:43", "throughput": 7894.1, "total_tokens": 42675776} +{"current_steps": 13560, "total_steps": 15621, "loss": 0.388, "lr": 1.0430413861299691e-07, "epoch": 0.8680622239293259, "percentage": 86.81, "elapsed_time": "1:30:06", "remaining_time": "0:13:41", "throughput": 7896.26, "total_tokens": 42693184} +{"current_steps": 13565, "total_steps": 15621, "loss": 0.4564, "lr": 1.0380784082928196e-07, "epoch": 0.8683823058703029, "percentage": 86.84, "elapsed_time": "1:30:07", "remaining_time": "0:13:39", "throughput": 7898.45, "total_tokens": 42710784} +{"current_steps": 13570, "total_steps": 15621, "loss": 0.3905, "lr": 1.0331266195200006e-07, "epoch": 0.8687023878112797, "percentage": 86.87, "elapsed_time": "1:30:08", "remaining_time": "0:13:37", "throughput": 7900.45, "total_tokens": 42727040} +{"current_steps": 13575, "total_steps": 15621, "loss": 0.3189, "lr": 1.0281860259938779e-07, "epoch": 0.8690224697522566, "percentage": 86.9, "elapsed_time": "1:30:08", "remaining_time": "0:13:35", "throughput": 7902.3, "total_tokens": 42742208} +{"current_steps": 13580, "total_steps": 15621, "loss": 0.3634, "lr": 1.0232566338828452e-07, "epoch": 0.8693425516932335, "percentage": 86.93, "elapsed_time": "1:30:09", "remaining_time": "0:13:33", "throughput": 7904.29, "total_tokens": 42758464} +{"current_steps": 13585, "total_steps": 15621, "loss": 0.4021, "lr": 1.018338449341305e-07, "epoch": 0.8696626336342104, "percentage": 86.97, "elapsed_time": "1:30:10", "remaining_time": "0:13:30", "throughput": 7906.19, "total_tokens": 42774016} +{"current_steps": 13590, "total_steps": 15621, "loss": 0.3924, "lr": 1.0134314785096632e-07, "epoch": 0.8699827155751872, "percentage": 87.0, "elapsed_time": "1:30:10", "remaining_time": "0:13:28", "throughput": 7908.05, "total_tokens": 42789248} +{"current_steps": 13595, "total_steps": 15621, "loss": 0.3446, "lr": 1.0085357275143359e-07, "epoch": 0.8703027975161641, "percentage": 87.03, "elapsed_time": "1:30:11", "remaining_time": "0:13:26", "throughput": 7909.92, "total_tokens": 42804608} +{"current_steps": 13600, "total_steps": 15621, "loss": 0.495, "lr": 1.0036512024677268e-07, "epoch": 0.870622879457141, "percentage": 87.06, "elapsed_time": "1:30:12", "remaining_time": "0:13:24", "throughput": 7911.73, "total_tokens": 42819584} +{"current_steps": 13605, "total_steps": 15621, "loss": 0.2823, "lr": 9.98777909468217e-08, "epoch": 0.8709429613981179, "percentage": 87.09, "elapsed_time": "1:30:12", "remaining_time": "0:13:22", "throughput": 7913.63, "total_tokens": 42835200} +{"current_steps": 13610, "total_steps": 15621, "loss": 0.4072, "lr": 9.939158546001736e-08, "epoch": 0.8712630433390948, "percentage": 87.13, "elapsed_time": "1:30:13", "remaining_time": "0:13:19", "throughput": 7915.8, "total_tokens": 42852672} +{"current_steps": 13615, "total_steps": 15621, "loss": 0.3252, "lr": 9.890650439339299e-08, "epoch": 0.8715831252800716, "percentage": 87.16, "elapsed_time": "1:30:14", "remaining_time": "0:13:17", "throughput": 7917.76, "total_tokens": 42868672} +{"current_steps": 13620, "total_steps": 15621, "loss": 0.412, "lr": 9.842254835257791e-08, "epoch": 0.8719032072210486, "percentage": 87.19, "elapsed_time": "1:30:14", "remaining_time": "0:13:15", "throughput": 7919.64, "total_tokens": 42884096} +{"current_steps": 13625, "total_steps": 15621, "loss": 0.374, "lr": 9.793971794179679e-08, "epoch": 0.8722232891620255, "percentage": 87.22, "elapsed_time": "1:30:15", "remaining_time": "0:13:13", "throughput": 7921.41, "total_tokens": 42898752} +{"current_steps": 13630, "total_steps": 15621, "loss": 0.3535, "lr": 9.745801376386931e-08, "epoch": 0.8725433711030024, "percentage": 87.25, "elapsed_time": "1:30:16", "remaining_time": "0:13:11", "throughput": 7923.36, "total_tokens": 42914688} +{"current_steps": 13635, "total_steps": 15621, "loss": 0.3186, "lr": 9.697743642020861e-08, "epoch": 0.8728634530439793, "percentage": 87.29, "elapsed_time": "1:30:16", "remaining_time": "0:13:08", "throughput": 7925.33, "total_tokens": 42930688} +{"current_steps": 13640, "total_steps": 15621, "loss": 0.3329, "lr": 9.649798651082119e-08, "epoch": 0.8731835349849562, "percentage": 87.32, "elapsed_time": "1:30:17", "remaining_time": "0:13:06", "throughput": 7927.34, "total_tokens": 42947008} +{"current_steps": 13645, "total_steps": 15621, "loss": 0.3973, "lr": 9.601966463430588e-08, "epoch": 0.873503616925933, "percentage": 87.35, "elapsed_time": "1:30:18", "remaining_time": "0:13:04", "throughput": 7929.26, "total_tokens": 42962816} +{"current_steps": 13650, "total_steps": 15621, "loss": 0.3428, "lr": 9.554247138785321e-08, "epoch": 0.8738236988669099, "percentage": 87.38, "elapsed_time": "1:30:18", "remaining_time": "0:13:02", "throughput": 7931.05, "total_tokens": 42977664} +{"current_steps": 13655, "total_steps": 15621, "loss": 0.4653, "lr": 9.506640736724447e-08, "epoch": 0.8741437808078868, "percentage": 87.41, "elapsed_time": "1:30:19", "remaining_time": "0:13:00", "throughput": 7932.97, "total_tokens": 42993472} +{"current_steps": 13660, "total_steps": 15621, "loss": 0.3973, "lr": 9.459147316685123e-08, "epoch": 0.8744638627488637, "percentage": 87.45, "elapsed_time": "1:30:20", "remaining_time": "0:12:58", "throughput": 7935.11, "total_tokens": 43010688} +{"current_steps": 13665, "total_steps": 15621, "loss": 0.3411, "lr": 9.41176693796345e-08, "epoch": 0.8747839446898406, "percentage": 87.48, "elapsed_time": "1:30:20", "remaining_time": "0:12:55", "throughput": 7937.17, "total_tokens": 43027392} +{"current_steps": 13670, "total_steps": 15621, "loss": 0.4175, "lr": 9.364499659714364e-08, "epoch": 0.8751040266308175, "percentage": 87.51, "elapsed_time": "1:30:21", "remaining_time": "0:12:53", "throughput": 7939.07, "total_tokens": 43043008} +{"current_steps": 13675, "total_steps": 15621, "loss": 0.3438, "lr": 9.31734554095165e-08, "epoch": 0.8754241085717944, "percentage": 87.54, "elapsed_time": "1:30:22", "remaining_time": "0:12:51", "throughput": 7941.04, "total_tokens": 43059072} +{"current_steps": 13680, "total_steps": 15621, "loss": 0.3456, "lr": 9.270304640547744e-08, "epoch": 0.8757441905127713, "percentage": 87.57, "elapsed_time": "1:30:23", "remaining_time": "0:12:49", "throughput": 7942.93, "total_tokens": 43074624} +{"current_steps": 13685, "total_steps": 15621, "loss": 0.3922, "lr": 9.223377017233768e-08, "epoch": 0.8760642724537482, "percentage": 87.61, "elapsed_time": "1:30:23", "remaining_time": "0:12:47", "throughput": 7944.74, "total_tokens": 43089536} +{"current_steps": 13690, "total_steps": 15621, "loss": 0.361, "lr": 9.176562729599458e-08, "epoch": 0.8763843543947251, "percentage": 87.64, "elapsed_time": "1:30:24", "remaining_time": "0:12:45", "throughput": 7946.55, "total_tokens": 43104512} +{"current_steps": 13695, "total_steps": 15621, "loss": 0.3434, "lr": 9.129861836092944e-08, "epoch": 0.8767044363357019, "percentage": 87.67, "elapsed_time": "1:30:24", "remaining_time": "0:12:42", "throughput": 7948.52, "total_tokens": 43120640} +{"current_steps": 13700, "total_steps": 15621, "loss": 0.4433, "lr": 9.083274395020845e-08, "epoch": 0.8770245182766788, "percentage": 87.7, "elapsed_time": "1:30:25", "remaining_time": "0:12:40", "throughput": 7950.43, "total_tokens": 43136384} +{"current_steps": 13705, "total_steps": 15621, "loss": 0.4021, "lr": 9.036800464548156e-08, "epoch": 0.8773446002176557, "percentage": 87.73, "elapsed_time": "1:30:26", "remaining_time": "0:12:38", "throughput": 7952.51, "total_tokens": 43153216} +{"current_steps": 13710, "total_steps": 15621, "loss": 0.3506, "lr": 8.990440102698138e-08, "epoch": 0.8776646821586326, "percentage": 87.77, "elapsed_time": "1:30:27", "remaining_time": "0:12:36", "throughput": 7954.28, "total_tokens": 43167936} +{"current_steps": 13715, "total_steps": 15621, "loss": 0.2722, "lr": 8.944193367352182e-08, "epoch": 0.8779847640996095, "percentage": 87.8, "elapsed_time": "1:30:27", "remaining_time": "0:12:34", "throughput": 7956.21, "total_tokens": 43183872} +{"current_steps": 13720, "total_steps": 15621, "loss": 0.408, "lr": 8.898060316249944e-08, "epoch": 0.8783048460405863, "percentage": 87.83, "elapsed_time": "1:30:28", "remaining_time": "0:12:32", "throughput": 7958.21, "total_tokens": 43200256} +{"current_steps": 13725, "total_steps": 15621, "loss": 0.3606, "lr": 8.852041006989064e-08, "epoch": 0.8786249279815633, "percentage": 87.86, "elapsed_time": "1:30:29", "remaining_time": "0:12:29", "throughput": 7960.34, "total_tokens": 43217600} +{"current_steps": 13730, "total_steps": 15621, "loss": 0.3858, "lr": 8.80613549702518e-08, "epoch": 0.8789450099225402, "percentage": 87.89, "elapsed_time": "1:30:29", "remaining_time": "0:12:27", "throughput": 7962.26, "total_tokens": 43233344} +{"current_steps": 13735, "total_steps": 15621, "loss": 0.5397, "lr": 8.760343843671824e-08, "epoch": 0.8792650918635171, "percentage": 87.93, "elapsed_time": "1:30:30", "remaining_time": "0:12:25", "throughput": 7964.19, "total_tokens": 43249280} +{"current_steps": 13740, "total_steps": 15621, "loss": 0.4595, "lr": 8.714666104100487e-08, "epoch": 0.879585173804494, "percentage": 87.96, "elapsed_time": "1:30:31", "remaining_time": "0:12:23", "throughput": 7966.11, "total_tokens": 43265024} +{"current_steps": 13745, "total_steps": 15621, "loss": 0.3597, "lr": 8.66910233534034e-08, "epoch": 0.8799052557454708, "percentage": 87.99, "elapsed_time": "1:30:31", "remaining_time": "0:12:21", "throughput": 7968.0, "total_tokens": 43280576} +{"current_steps": 13750, "total_steps": 15621, "loss": 0.3074, "lr": 8.62365259427823e-08, "epoch": 0.8802253376864477, "percentage": 88.02, "elapsed_time": "1:30:32", "remaining_time": "0:12:19", "throughput": 7969.87, "total_tokens": 43296064} +{"current_steps": 13755, "total_steps": 15621, "loss": 0.292, "lr": 8.578316937658758e-08, "epoch": 0.8805454196274246, "percentage": 88.05, "elapsed_time": "1:30:33", "remaining_time": "0:12:17", "throughput": 7971.74, "total_tokens": 43311552} +{"current_steps": 13760, "total_steps": 15621, "loss": 0.3216, "lr": 8.533095422083992e-08, "epoch": 0.8808655015684015, "percentage": 88.09, "elapsed_time": "1:30:33", "remaining_time": "0:12:14", "throughput": 7973.52, "total_tokens": 43326272} +{"current_steps": 13765, "total_steps": 15621, "loss": 0.2926, "lr": 8.487988104013533e-08, "epoch": 0.8811855835093784, "percentage": 88.12, "elapsed_time": "1:30:34", "remaining_time": "0:12:12", "throughput": 7975.5, "total_tokens": 43342592} +{"current_steps": 13770, "total_steps": 15621, "loss": 0.3183, "lr": 8.4429950397644e-08, "epoch": 0.8815056654503552, "percentage": 88.15, "elapsed_time": "1:30:35", "remaining_time": "0:12:10", "throughput": 7977.34, "total_tokens": 43357888} +{"current_steps": 13775, "total_steps": 15621, "loss": 0.272, "lr": 8.398116285510948e-08, "epoch": 0.8818257473913321, "percentage": 88.18, "elapsed_time": "1:30:35", "remaining_time": "0:12:08", "throughput": 7979.34, "total_tokens": 43374272} +{"current_steps": 13780, "total_steps": 15621, "loss": 0.2715, "lr": 8.353351897284844e-08, "epoch": 0.8821458293323091, "percentage": 88.21, "elapsed_time": "1:30:36", "remaining_time": "0:12:06", "throughput": 7981.69, "total_tokens": 43393280} +{"current_steps": 13785, "total_steps": 15621, "loss": 0.4713, "lr": 8.308701930974949e-08, "epoch": 0.882465911273286, "percentage": 88.25, "elapsed_time": "1:30:37", "remaining_time": "0:12:04", "throughput": 7983.69, "total_tokens": 43409600} +{"current_steps": 13790, "total_steps": 15621, "loss": 0.4144, "lr": 8.264166442327269e-08, "epoch": 0.8827859932142629, "percentage": 88.28, "elapsed_time": "1:30:37", "remaining_time": "0:12:02", "throughput": 7985.47, "total_tokens": 43424384} +{"current_steps": 13795, "total_steps": 15621, "loss": 0.2591, "lr": 8.219745486944885e-08, "epoch": 0.8831060751552398, "percentage": 88.31, "elapsed_time": "1:30:38", "remaining_time": "0:11:59", "throughput": 7987.37, "total_tokens": 43440128} +{"current_steps": 13800, "total_steps": 15621, "loss": 0.4706, "lr": 8.175439120287875e-08, "epoch": 0.8834261570962166, "percentage": 88.34, "elapsed_time": "1:30:39", "remaining_time": "0:11:57", "throughput": 7989.17, "total_tokens": 43455168} +{"current_steps": 13805, "total_steps": 15621, "loss": 0.3454, "lr": 8.131247397673269e-08, "epoch": 0.8837462390371935, "percentage": 88.37, "elapsed_time": "1:30:39", "remaining_time": "0:11:55", "throughput": 7991.24, "total_tokens": 43472064} +{"current_steps": 13810, "total_steps": 15621, "loss": 0.4261, "lr": 8.087170374274921e-08, "epoch": 0.8840663209781704, "percentage": 88.41, "elapsed_time": "1:30:40", "remaining_time": "0:11:53", "throughput": 7993.17, "total_tokens": 43488000} +{"current_steps": 13815, "total_steps": 15621, "loss": 0.2942, "lr": 8.043208105123578e-08, "epoch": 0.8843864029191473, "percentage": 88.44, "elapsed_time": "1:30:41", "remaining_time": "0:11:51", "throughput": 7995.04, "total_tokens": 43503488} +{"current_steps": 13820, "total_steps": 15621, "loss": 0.3418, "lr": 7.999360645106579e-08, "epoch": 0.8847064848601242, "percentage": 88.47, "elapsed_time": "1:30:41", "remaining_time": "0:11:49", "throughput": 7996.83, "total_tokens": 43518336} +{"current_steps": 13825, "total_steps": 15621, "loss": 0.2716, "lr": 7.955628048968011e-08, "epoch": 0.885026566801101, "percentage": 88.5, "elapsed_time": "1:30:42", "remaining_time": "0:11:47", "throughput": 7998.55, "total_tokens": 43532800} +{"current_steps": 13830, "total_steps": 15621, "loss": 0.2586, "lr": 7.912010371308564e-08, "epoch": 0.885346648742078, "percentage": 88.53, "elapsed_time": "1:30:43", "remaining_time": "0:11:44", "throughput": 8000.34, "total_tokens": 43547648} +{"current_steps": 13835, "total_steps": 15621, "loss": 0.2934, "lr": 7.868507666585422e-08, "epoch": 0.8856667306830549, "percentage": 88.57, "elapsed_time": "1:30:43", "remaining_time": "0:11:42", "throughput": 8002.13, "total_tokens": 43562688} +{"current_steps": 13840, "total_steps": 15621, "loss": 0.4174, "lr": 7.825119989112172e-08, "epoch": 0.8859868126240318, "percentage": 88.6, "elapsed_time": "1:30:44", "remaining_time": "0:11:40", "throughput": 8004.01, "total_tokens": 43578176} +{"current_steps": 13845, "total_steps": 15621, "loss": 0.2904, "lr": 7.78184739305886e-08, "epoch": 0.8863068945650087, "percentage": 88.63, "elapsed_time": "1:30:45", "remaining_time": "0:11:38", "throughput": 8005.91, "total_tokens": 43593920} +{"current_steps": 13850, "total_steps": 15621, "loss": 0.3606, "lr": 7.73868993245187e-08, "epoch": 0.8866269765059855, "percentage": 88.66, "elapsed_time": "1:30:45", "remaining_time": "0:11:36", "throughput": 8007.99, "total_tokens": 43610944} +{"current_steps": 13855, "total_steps": 15621, "loss": 0.3406, "lr": 7.695647661173754e-08, "epoch": 0.8869470584469624, "percentage": 88.69, "elapsed_time": "1:30:46", "remaining_time": "0:11:34", "throughput": 8009.94, "total_tokens": 43627008} +{"current_steps": 13860, "total_steps": 15621, "loss": 0.3843, "lr": 7.652720632963284e-08, "epoch": 0.8872671403879393, "percentage": 88.73, "elapsed_time": "1:30:47", "remaining_time": "0:11:32", "throughput": 8011.84, "total_tokens": 43642752} +{"current_steps": 13865, "total_steps": 15621, "loss": 0.3506, "lr": 7.609908901415396e-08, "epoch": 0.8875872223289162, "percentage": 88.76, "elapsed_time": "1:30:47", "remaining_time": "0:11:29", "throughput": 8013.73, "total_tokens": 43658496} +{"current_steps": 13870, "total_steps": 15621, "loss": 0.3988, "lr": 7.567212519981047e-08, "epoch": 0.8879073042698931, "percentage": 88.79, "elapsed_time": "1:30:48", "remaining_time": "0:11:27", "throughput": 8015.64, "total_tokens": 43674304} +{"current_steps": 13875, "total_steps": 15621, "loss": 0.3315, "lr": 7.524631541967108e-08, "epoch": 0.8882273862108699, "percentage": 88.82, "elapsed_time": "1:30:49", "remaining_time": "0:11:25", "throughput": 8017.48, "total_tokens": 43689536} +{"current_steps": 13880, "total_steps": 15621, "loss": 0.2984, "lr": 7.482166020536485e-08, "epoch": 0.8885474681518468, "percentage": 88.85, "elapsed_time": "1:30:50", "remaining_time": "0:11:23", "throughput": 8019.54, "total_tokens": 43706496} +{"current_steps": 13885, "total_steps": 15621, "loss": 0.3097, "lr": 7.439816008707877e-08, "epoch": 0.8888675500928238, "percentage": 88.89, "elapsed_time": "1:30:50", "remaining_time": "0:11:21", "throughput": 8021.31, "total_tokens": 43721408} +{"current_steps": 13890, "total_steps": 15621, "loss": 0.3397, "lr": 7.397581559355748e-08, "epoch": 0.8891876320338007, "percentage": 88.92, "elapsed_time": "1:30:51", "remaining_time": "0:11:19", "throughput": 8023.27, "total_tokens": 43737536} +{"current_steps": 13895, "total_steps": 15621, "loss": 0.4171, "lr": 7.355462725210315e-08, "epoch": 0.8895077139747776, "percentage": 88.95, "elapsed_time": "1:30:51", "remaining_time": "0:11:17", "throughput": 8025.08, "total_tokens": 43752640} +{"current_steps": 13900, "total_steps": 15621, "loss": 0.4097, "lr": 7.313459558857438e-08, "epoch": 0.8898277959157544, "percentage": 88.98, "elapsed_time": "1:30:52", "remaining_time": "0:11:15", "throughput": 8026.97, "total_tokens": 43768384} +{"current_steps": 13905, "total_steps": 15621, "loss": 0.3141, "lr": 7.271572112738566e-08, "epoch": 0.8901478778567313, "percentage": 89.01, "elapsed_time": "1:30:53", "remaining_time": "0:11:12", "throughput": 8028.89, "total_tokens": 43784320} +{"current_steps": 13910, "total_steps": 15621, "loss": 0.3635, "lr": 7.229800439150657e-08, "epoch": 0.8904679597977082, "percentage": 89.05, "elapsed_time": "1:30:53", "remaining_time": "0:11:10", "throughput": 8030.67, "total_tokens": 43799232} +{"current_steps": 13915, "total_steps": 15621, "loss": 0.3806, "lr": 7.188144590246148e-08, "epoch": 0.8907880417386851, "percentage": 89.08, "elapsed_time": "1:30:54", "remaining_time": "0:11:08", "throughput": 8032.63, "total_tokens": 43815360} +{"current_steps": 13920, "total_steps": 15621, "loss": 0.3317, "lr": 7.146604618032848e-08, "epoch": 0.891108123679662, "percentage": 89.11, "elapsed_time": "1:30:55", "remaining_time": "0:11:06", "throughput": 8034.4, "total_tokens": 43830336} +{"current_steps": 13925, "total_steps": 15621, "loss": 0.4062, "lr": 7.105180574373904e-08, "epoch": 0.8914282056206388, "percentage": 89.14, "elapsed_time": "1:30:56", "remaining_time": "0:11:04", "throughput": 8036.38, "total_tokens": 43846656} +{"current_steps": 13930, "total_steps": 15621, "loss": 0.3279, "lr": 7.063872510987712e-08, "epoch": 0.8917482875616157, "percentage": 89.17, "elapsed_time": "1:30:56", "remaining_time": "0:11:02", "throughput": 8038.31, "total_tokens": 43862720} +{"current_steps": 13935, "total_steps": 15621, "loss": 0.3541, "lr": 7.022680479447874e-08, "epoch": 0.8920683695025927, "percentage": 89.21, "elapsed_time": "1:30:57", "remaining_time": "0:11:00", "throughput": 8039.97, "total_tokens": 43876800} +{"current_steps": 13940, "total_steps": 15621, "loss": 0.3046, "lr": 6.98160453118316e-08, "epoch": 0.8923884514435696, "percentage": 89.24, "elapsed_time": "1:30:57", "remaining_time": "0:10:58", "throughput": 8041.81, "total_tokens": 43892160} +{"current_steps": 13945, "total_steps": 15621, "loss": 0.3444, "lr": 6.940644717477328e-08, "epoch": 0.8927085333845465, "percentage": 89.27, "elapsed_time": "1:30:58", "remaining_time": "0:10:56", "throughput": 8043.77, "total_tokens": 43908416} +{"current_steps": 13950, "total_steps": 15621, "loss": 0.4553, "lr": 6.899801089469204e-08, "epoch": 0.8930286153255234, "percentage": 89.3, "elapsed_time": "1:30:59", "remaining_time": "0:10:53", "throughput": 8045.59, "total_tokens": 43923712} +{"current_steps": 13955, "total_steps": 15621, "loss": 0.3491, "lr": 6.85907369815254e-08, "epoch": 0.8933486972665002, "percentage": 89.33, "elapsed_time": "1:31:00", "remaining_time": "0:10:51", "throughput": 8047.49, "total_tokens": 43939520} +{"current_steps": 13960, "total_steps": 15621, "loss": 0.3771, "lr": 6.81846259437595e-08, "epoch": 0.8936687792074771, "percentage": 89.37, "elapsed_time": "1:31:00", "remaining_time": "0:10:49", "throughput": 8049.29, "total_tokens": 43954688} +{"current_steps": 13965, "total_steps": 15621, "loss": 0.3246, "lr": 6.77796782884289e-08, "epoch": 0.893988861148454, "percentage": 89.4, "elapsed_time": "1:31:01", "remaining_time": "0:10:47", "throughput": 8051.07, "total_tokens": 43969600} +{"current_steps": 13970, "total_steps": 15621, "loss": 0.3885, "lr": 6.737589452111526e-08, "epoch": 0.8943089430894309, "percentage": 89.43, "elapsed_time": "1:31:02", "remaining_time": "0:10:45", "throughput": 8052.97, "total_tokens": 43985472} +{"current_steps": 13975, "total_steps": 15621, "loss": 0.4012, "lr": 6.697327514594786e-08, "epoch": 0.8946290250304078, "percentage": 89.46, "elapsed_time": "1:31:02", "remaining_time": "0:10:43", "throughput": 8054.8, "total_tokens": 44000768} +{"current_steps": 13980, "total_steps": 15621, "loss": 0.4538, "lr": 6.657182066560118e-08, "epoch": 0.8949491069713846, "percentage": 89.49, "elapsed_time": "1:31:03", "remaining_time": "0:10:41", "throughput": 8056.76, "total_tokens": 44017088} +{"current_steps": 13985, "total_steps": 15621, "loss": 0.3715, "lr": 6.617153158129596e-08, "epoch": 0.8952691889123615, "percentage": 89.53, "elapsed_time": "1:31:04", "remaining_time": "0:10:39", "throughput": 8058.46, "total_tokens": 44031488} +{"current_steps": 13990, "total_steps": 15621, "loss": 0.3356, "lr": 6.577240839279807e-08, "epoch": 0.8955892708533385, "percentage": 89.56, "elapsed_time": "1:31:04", "remaining_time": "0:10:37", "throughput": 8060.36, "total_tokens": 44047296} +{"current_steps": 13995, "total_steps": 15621, "loss": 0.3162, "lr": 6.537445159841748e-08, "epoch": 0.8959093527943154, "percentage": 89.59, "elapsed_time": "1:31:05", "remaining_time": "0:10:34", "throughput": 8062.33, "total_tokens": 44063744} +{"current_steps": 14000, "total_steps": 15621, "loss": 0.3898, "lr": 6.497766169500752e-08, "epoch": 0.8962294347352923, "percentage": 89.62, "elapsed_time": "1:31:06", "remaining_time": "0:10:32", "throughput": 8064.19, "total_tokens": 44079168} +{"current_steps": 14005, "total_steps": 15621, "loss": 0.2716, "lr": 6.458203917796546e-08, "epoch": 0.8965495166762691, "percentage": 89.65, "elapsed_time": "1:31:06", "remaining_time": "0:10:30", "throughput": 8065.92, "total_tokens": 44093824} +{"current_steps": 14010, "total_steps": 15621, "loss": 0.4511, "lr": 6.418758454123041e-08, "epoch": 0.896869598617246, "percentage": 89.69, "elapsed_time": "1:31:07", "remaining_time": "0:10:28", "throughput": 8068.04, "total_tokens": 44111296} +{"current_steps": 14015, "total_steps": 15621, "loss": 0.3912, "lr": 6.379429827728377e-08, "epoch": 0.8971896805582229, "percentage": 89.72, "elapsed_time": "1:31:08", "remaining_time": "0:10:26", "throughput": 8070.07, "total_tokens": 44128000} +{"current_steps": 14020, "total_steps": 15621, "loss": 0.3795, "lr": 6.340218087714799e-08, "epoch": 0.8975097624991998, "percentage": 89.75, "elapsed_time": "1:31:08", "remaining_time": "0:10:24", "throughput": 8071.92, "total_tokens": 44143488} +{"current_steps": 14025, "total_steps": 15621, "loss": 0.347, "lr": 6.301123283038634e-08, "epoch": 0.8978298444401767, "percentage": 89.78, "elapsed_time": "1:31:09", "remaining_time": "0:10:22", "throughput": 8073.77, "total_tokens": 44158976} +{"current_steps": 14030, "total_steps": 15621, "loss": 0.3207, "lr": 6.262145462510193e-08, "epoch": 0.8981499263811535, "percentage": 89.81, "elapsed_time": "1:31:10", "remaining_time": "0:10:20", "throughput": 8075.82, "total_tokens": 44175808} +{"current_steps": 14035, "total_steps": 15621, "loss": 0.2917, "lr": 6.223284674793738e-08, "epoch": 0.8984700083221304, "percentage": 89.85, "elapsed_time": "1:31:10", "remaining_time": "0:10:18", "throughput": 8077.53, "total_tokens": 44190336} +{"current_steps": 14040, "total_steps": 15621, "loss": 0.39, "lr": 6.184540968407437e-08, "epoch": 0.8987900902631074, "percentage": 89.88, "elapsed_time": "1:31:11", "remaining_time": "0:10:16", "throughput": 8079.36, "total_tokens": 44205696} +{"current_steps": 14045, "total_steps": 15621, "loss": 0.3515, "lr": 6.145914391723239e-08, "epoch": 0.8991101722040843, "percentage": 89.91, "elapsed_time": "1:31:12", "remaining_time": "0:10:14", "throughput": 8081.33, "total_tokens": 44222016} +{"current_steps": 14050, "total_steps": 15621, "loss": 0.327, "lr": 6.107404992966902e-08, "epoch": 0.8994302541450612, "percentage": 89.94, "elapsed_time": "1:31:12", "remaining_time": "0:10:11", "throughput": 8083.32, "total_tokens": 44238592} +{"current_steps": 14055, "total_steps": 15621, "loss": 0.2489, "lr": 6.069012820217856e-08, "epoch": 0.899750336086038, "percentage": 89.98, "elapsed_time": "1:31:13", "remaining_time": "0:10:09", "throughput": 8085.16, "total_tokens": 44254016} +{"current_steps": 14060, "total_steps": 15621, "loss": 0.3843, "lr": 6.030737921409168e-08, "epoch": 0.9000704180270149, "percentage": 90.01, "elapsed_time": "1:31:14", "remaining_time": "0:10:07", "throughput": 8087.0, "total_tokens": 44269376} +{"current_steps": 14065, "total_steps": 15621, "loss": 0.4579, "lr": 5.992580344327503e-08, "epoch": 0.9003904999679918, "percentage": 90.04, "elapsed_time": "1:31:14", "remaining_time": "0:10:05", "throughput": 8088.81, "total_tokens": 44284672} +{"current_steps": 14070, "total_steps": 15621, "loss": 0.346, "lr": 5.954540136613051e-08, "epoch": 0.9007105819089687, "percentage": 90.07, "elapsed_time": "1:31:15", "remaining_time": "0:10:03", "throughput": 8090.67, "total_tokens": 44300224} +{"current_steps": 14075, "total_steps": 15621, "loss": 0.3511, "lr": 5.916617345759456e-08, "epoch": 0.9010306638499456, "percentage": 90.1, "elapsed_time": "1:31:16", "remaining_time": "0:10:01", "throughput": 8092.45, "total_tokens": 44315264} +{"current_steps": 14076, "total_steps": 15621, "eval_loss": 0.35641103982925415, "epoch": 0.901094680238141, "percentage": 90.11, "elapsed_time": "1:32:06", "remaining_time": "0:10:06", "throughput": 8018.83, "total_tokens": 44318848} +{"current_steps": 14080, "total_steps": 15621, "loss": 0.4212, "lr": 5.878812019113766e-08, "epoch": 0.9013507457909224, "percentage": 90.14, "elapsed_time": "1:32:40", "remaining_time": "0:10:08", "throughput": 7971.94, "total_tokens": 44330176} +{"current_steps": 14085, "total_steps": 15621, "loss": 0.3065, "lr": 5.84112420387638e-08, "epoch": 0.9016708277318993, "percentage": 90.17, "elapsed_time": "1:32:41", "remaining_time": "0:10:06", "throughput": 7973.7, "total_tokens": 44345152} +{"current_steps": 14090, "total_steps": 15621, "loss": 0.3625, "lr": 5.8035539471009697e-08, "epoch": 0.9019909096728762, "percentage": 90.2, "elapsed_time": "1:32:42", "remaining_time": "0:10:04", "throughput": 7975.59, "total_tokens": 44361152} +{"current_steps": 14095, "total_steps": 15621, "loss": 0.4095, "lr": 5.7661012956944253e-08, "epoch": 0.9023109916138532, "percentage": 90.23, "elapsed_time": "1:32:42", "remaining_time": "0:10:02", "throughput": 7977.36, "total_tokens": 44376128} +{"current_steps": 14100, "total_steps": 15621, "loss": 0.2917, "lr": 5.728766296416876e-08, "epoch": 0.9026310735548301, "percentage": 90.26, "elapsed_time": "1:32:43", "remaining_time": "0:10:00", "throughput": 7979.28, "total_tokens": 44392192} +{"current_steps": 14105, "total_steps": 15621, "loss": 0.4205, "lr": 5.6915489958814453e-08, "epoch": 0.902951155495807, "percentage": 90.3, "elapsed_time": "1:32:44", "remaining_time": "0:09:58", "throughput": 7981.11, "total_tokens": 44407680} +{"current_steps": 14110, "total_steps": 15621, "loss": 0.4106, "lr": 5.654449440554399e-08, "epoch": 0.9032712374367838, "percentage": 90.33, "elapsed_time": "1:32:44", "remaining_time": "0:09:55", "throughput": 7983.11, "total_tokens": 44424384} +{"current_steps": 14115, "total_steps": 15621, "loss": 0.3803, "lr": 5.617467676754972e-08, "epoch": 0.9035913193777607, "percentage": 90.36, "elapsed_time": "1:32:45", "remaining_time": "0:09:53", "throughput": 7984.91, "total_tokens": 44439744} +{"current_steps": 14120, "total_steps": 15621, "loss": 0.296, "lr": 5.580603750655344e-08, "epoch": 0.9039114013187376, "percentage": 90.39, "elapsed_time": "1:32:46", "remaining_time": "0:09:51", "throughput": 7986.61, "total_tokens": 44454272} +{"current_steps": 14125, "total_steps": 15621, "loss": 0.3739, "lr": 5.543857708280497e-08, "epoch": 0.9042314832597145, "percentage": 90.42, "elapsed_time": "1:32:46", "remaining_time": "0:09:49", "throughput": 7988.34, "total_tokens": 44468992} +{"current_steps": 14130, "total_steps": 15621, "loss": 0.4703, "lr": 5.507229595508367e-08, "epoch": 0.9045515652006914, "percentage": 90.46, "elapsed_time": "1:32:47", "remaining_time": "0:09:47", "throughput": 7990.22, "total_tokens": 44484864} +{"current_steps": 14135, "total_steps": 15621, "loss": 0.2887, "lr": 5.4707194580695504e-08, "epoch": 0.9048716471416682, "percentage": 90.49, "elapsed_time": "1:32:48", "remaining_time": "0:09:45", "throughput": 7991.99, "total_tokens": 44499968} +{"current_steps": 14140, "total_steps": 15621, "loss": 0.4279, "lr": 5.4343273415473846e-08, "epoch": 0.9051917290826451, "percentage": 90.52, "elapsed_time": "1:32:48", "remaining_time": "0:09:43", "throughput": 7994.15, "total_tokens": 44517952} +{"current_steps": 14145, "total_steps": 15621, "loss": 0.3413, "lr": 5.3980532913778576e-08, "epoch": 0.905511811023622, "percentage": 90.55, "elapsed_time": "1:32:49", "remaining_time": "0:09:41", "throughput": 7995.91, "total_tokens": 44532928} +{"current_steps": 14150, "total_steps": 15621, "loss": 0.3928, "lr": 5.361897352849554e-08, "epoch": 0.905831892964599, "percentage": 90.58, "elapsed_time": "1:32:50", "remaining_time": "0:09:39", "throughput": 7997.72, "total_tokens": 44548288} +{"current_steps": 14155, "total_steps": 15621, "loss": 0.3204, "lr": 5.325859571103586e-08, "epoch": 0.9061519749055759, "percentage": 90.62, "elapsed_time": "1:32:50", "remaining_time": "0:09:36", "throughput": 7999.53, "total_tokens": 44563712} +{"current_steps": 14160, "total_steps": 15621, "loss": 0.3376, "lr": 5.289939991133508e-08, "epoch": 0.9064720568465527, "percentage": 90.65, "elapsed_time": "1:32:51", "remaining_time": "0:09:34", "throughput": 8001.36, "total_tokens": 44579264} +{"current_steps": 14165, "total_steps": 15621, "loss": 0.2387, "lr": 5.2541386577853895e-08, "epoch": 0.9067921387875296, "percentage": 90.68, "elapsed_time": "1:32:52", "remaining_time": "0:09:32", "throughput": 8003.11, "total_tokens": 44594176} +{"current_steps": 14170, "total_steps": 15621, "loss": 0.2536, "lr": 5.2184556157576e-08, "epoch": 0.9071122207285065, "percentage": 90.71, "elapsed_time": "1:32:52", "remaining_time": "0:09:30", "throughput": 8004.93, "total_tokens": 44609664} +{"current_steps": 14175, "total_steps": 15621, "loss": 0.3807, "lr": 5.1828909096008234e-08, "epoch": 0.9074323026694834, "percentage": 90.74, "elapsed_time": "1:32:53", "remaining_time": "0:09:28", "throughput": 8007.0, "total_tokens": 44626944} +{"current_steps": 14180, "total_steps": 15621, "loss": 0.2294, "lr": 5.14744458371803e-08, "epoch": 0.9077523846104603, "percentage": 90.78, "elapsed_time": "1:32:54", "remaining_time": "0:09:26", "throughput": 8008.97, "total_tokens": 44643520} +{"current_steps": 14185, "total_steps": 15621, "loss": 0.4922, "lr": 5.1121166823643646e-08, "epoch": 0.9080724665514371, "percentage": 90.81, "elapsed_time": "1:32:54", "remaining_time": "0:09:24", "throughput": 8010.65, "total_tokens": 44657984} +{"current_steps": 14190, "total_steps": 15621, "loss": 0.3841, "lr": 5.076907249647122e-08, "epoch": 0.908392548492414, "percentage": 90.84, "elapsed_time": "1:32:55", "remaining_time": "0:09:22", "throughput": 8012.41, "total_tokens": 44673024} +{"current_steps": 14195, "total_steps": 15621, "loss": 0.4111, "lr": 5.0418163295257055e-08, "epoch": 0.9087126304333909, "percentage": 90.87, "elapsed_time": "1:32:56", "remaining_time": "0:09:20", "throughput": 8014.07, "total_tokens": 44687424} +{"current_steps": 14200, "total_steps": 15621, "loss": 0.2901, "lr": 5.006843965811536e-08, "epoch": 0.9090327123743679, "percentage": 90.9, "elapsed_time": "1:32:56", "remaining_time": "0:09:18", "throughput": 8015.9, "total_tokens": 44702976} +{"current_steps": 14205, "total_steps": 15621, "loss": 0.4813, "lr": 4.971990202168008e-08, "epoch": 0.9093527943153448, "percentage": 90.94, "elapsed_time": "1:32:57", "remaining_time": "0:09:15", "throughput": 8017.69, "total_tokens": 44718144} +{"current_steps": 14210, "total_steps": 15621, "loss": 0.3209, "lr": 4.9372550821104697e-08, "epoch": 0.9096728762563216, "percentage": 90.97, "elapsed_time": "1:32:58", "remaining_time": "0:09:13", "throughput": 8019.68, "total_tokens": 44734912} +{"current_steps": 14215, "total_steps": 15621, "loss": 0.3205, "lr": 4.902638649006119e-08, "epoch": 0.9099929581972985, "percentage": 91.0, "elapsed_time": "1:32:58", "remaining_time": "0:09:11", "throughput": 8021.44, "total_tokens": 44749888} +{"current_steps": 14220, "total_steps": 15621, "loss": 0.3289, "lr": 4.868140946073973e-08, "epoch": 0.9103130401382754, "percentage": 91.03, "elapsed_time": "1:32:59", "remaining_time": "0:09:09", "throughput": 8023.14, "total_tokens": 44764544} +{"current_steps": 14225, "total_steps": 15621, "loss": 0.3017, "lr": 4.833762016384857e-08, "epoch": 0.9106331220792523, "percentage": 91.06, "elapsed_time": "1:33:00", "remaining_time": "0:09:07", "throughput": 8025.08, "total_tokens": 44780992} +{"current_steps": 14230, "total_steps": 15621, "loss": 0.3869, "lr": 4.799501902861214e-08, "epoch": 0.9109532040202292, "percentage": 91.1, "elapsed_time": "1:33:00", "remaining_time": "0:09:05", "throughput": 8026.91, "total_tokens": 44796672} +{"current_steps": 14235, "total_steps": 15621, "loss": 0.4287, "lr": 4.765360648277217e-08, "epoch": 0.911273285961206, "percentage": 91.13, "elapsed_time": "1:33:01", "remaining_time": "0:09:03", "throughput": 8028.73, "total_tokens": 44812224} +{"current_steps": 14240, "total_steps": 15621, "loss": 0.4228, "lr": 4.7313382952586465e-08, "epoch": 0.9115933679021829, "percentage": 91.16, "elapsed_time": "1:33:02", "remaining_time": "0:09:01", "throughput": 8030.47, "total_tokens": 44827136} +{"current_steps": 14245, "total_steps": 15621, "loss": 0.3649, "lr": 4.6974348862828027e-08, "epoch": 0.9119134498431598, "percentage": 91.19, "elapsed_time": "1:33:02", "remaining_time": "0:08:59", "throughput": 8032.22, "total_tokens": 44842176} +{"current_steps": 14250, "total_steps": 15621, "loss": 0.4412, "lr": 4.663650463678448e-08, "epoch": 0.9122335317841367, "percentage": 91.22, "elapsed_time": "1:33:03", "remaining_time": "0:08:57", "throughput": 8034.19, "total_tokens": 44858880} +{"current_steps": 14255, "total_steps": 15621, "loss": 0.4434, "lr": 4.629985069625875e-08, "epoch": 0.9125536137251137, "percentage": 91.26, "elapsed_time": "1:33:04", "remaining_time": "0:08:55", "throughput": 8036.15, "total_tokens": 44875328} +{"current_steps": 14260, "total_steps": 15621, "loss": 0.3751, "lr": 4.596438746156728e-08, "epoch": 0.9128736956660906, "percentage": 91.29, "elapsed_time": "1:33:04", "remaining_time": "0:08:53", "throughput": 8038.13, "total_tokens": 44892032} +{"current_steps": 14265, "total_steps": 15621, "loss": 0.36, "lr": 4.563011535153949e-08, "epoch": 0.9131937776070674, "percentage": 91.32, "elapsed_time": "1:33:05", "remaining_time": "0:08:50", "throughput": 8039.91, "total_tokens": 44907328} +{"current_steps": 14270, "total_steps": 15621, "loss": 0.2689, "lr": 4.52970347835181e-08, "epoch": 0.9135138595480443, "percentage": 91.35, "elapsed_time": "1:33:06", "remaining_time": "0:08:48", "throughput": 8041.68, "total_tokens": 44922560} +{"current_steps": 14275, "total_steps": 15621, "loss": 0.327, "lr": 4.496514617335845e-08, "epoch": 0.9138339414890212, "percentage": 91.38, "elapsed_time": "1:33:06", "remaining_time": "0:08:46", "throughput": 8043.45, "total_tokens": 44937728} +{"current_steps": 14280, "total_steps": 15621, "loss": 0.3603, "lr": 4.4634449935427197e-08, "epoch": 0.9141540234299981, "percentage": 91.42, "elapsed_time": "1:33:07", "remaining_time": "0:08:44", "throughput": 8045.43, "total_tokens": 44954560} +{"current_steps": 14285, "total_steps": 15621, "loss": 0.3096, "lr": 4.430494648260219e-08, "epoch": 0.914474105370975, "percentage": 91.45, "elapsed_time": "1:33:08", "remaining_time": "0:08:42", "throughput": 8047.43, "total_tokens": 44971520} +{"current_steps": 14290, "total_steps": 15621, "loss": 0.4524, "lr": 4.397663622627279e-08, "epoch": 0.9147941873119518, "percentage": 91.48, "elapsed_time": "1:33:08", "remaining_time": "0:08:40", "throughput": 8049.29, "total_tokens": 44987392} +{"current_steps": 14295, "total_steps": 15621, "loss": 0.3122, "lr": 4.364951957633789e-08, "epoch": 0.9151142692529287, "percentage": 91.51, "elapsed_time": "1:33:09", "remaining_time": "0:08:38", "throughput": 8051.07, "total_tokens": 45002688} +{"current_steps": 14300, "total_steps": 15621, "loss": 0.2953, "lr": 4.332359694120669e-08, "epoch": 0.9154343511939056, "percentage": 91.54, "elapsed_time": "1:33:10", "remaining_time": "0:08:36", "throughput": 8052.82, "total_tokens": 45017792} +{"current_steps": 14305, "total_steps": 15621, "loss": 0.3571, "lr": 4.299886872779734e-08, "epoch": 0.9157544331348826, "percentage": 91.58, "elapsed_time": "1:33:10", "remaining_time": "0:08:34", "throughput": 8054.53, "total_tokens": 45032640} +{"current_steps": 14310, "total_steps": 15621, "loss": 0.2975, "lr": 4.267533534153678e-08, "epoch": 0.9160745150758595, "percentage": 91.61, "elapsed_time": "1:33:11", "remaining_time": "0:08:32", "throughput": 8056.36, "total_tokens": 45048256} +{"current_steps": 14315, "total_steps": 15621, "loss": 0.3218, "lr": 4.2352997186360316e-08, "epoch": 0.9163945970168363, "percentage": 91.64, "elapsed_time": "1:33:12", "remaining_time": "0:08:30", "throughput": 8058.23, "total_tokens": 45064192} +{"current_steps": 14320, "total_steps": 15621, "loss": 0.3243, "lr": 4.203185466471082e-08, "epoch": 0.9167146789578132, "percentage": 91.67, "elapsed_time": "1:33:12", "remaining_time": "0:08:28", "throughput": 8059.99, "total_tokens": 45079488} +{"current_steps": 14325, "total_steps": 15621, "loss": 0.3984, "lr": 4.1711908177538556e-08, "epoch": 0.9170347608987901, "percentage": 91.7, "elapsed_time": "1:33:13", "remaining_time": "0:08:26", "throughput": 8061.88, "total_tokens": 45095616} +{"current_steps": 14330, "total_steps": 15621, "loss": 0.378, "lr": 4.139315812430055e-08, "epoch": 0.917354842839767, "percentage": 91.74, "elapsed_time": "1:33:14", "remaining_time": "0:08:23", "throughput": 8063.61, "total_tokens": 45110592} +{"current_steps": 14335, "total_steps": 15621, "loss": 0.3863, "lr": 4.1075604902959915e-08, "epoch": 0.9176749247807439, "percentage": 91.77, "elapsed_time": "1:33:15", "remaining_time": "0:08:21", "throughput": 8065.56, "total_tokens": 45127168} +{"current_steps": 14340, "total_steps": 15621, "loss": 0.3137, "lr": 4.07592489099855e-08, "epoch": 0.9179950067217207, "percentage": 91.8, "elapsed_time": "1:33:15", "remaining_time": "0:08:19", "throughput": 8067.3, "total_tokens": 45142208} +{"current_steps": 14345, "total_steps": 15621, "loss": 0.3934, "lr": 4.044409054035147e-08, "epoch": 0.9183150886626976, "percentage": 91.83, "elapsed_time": "1:33:16", "remaining_time": "0:08:17", "throughput": 8069.04, "total_tokens": 45157184} +{"current_steps": 14350, "total_steps": 15621, "loss": 0.3929, "lr": 4.0130130187537195e-08, "epoch": 0.9186351706036745, "percentage": 91.86, "elapsed_time": "1:33:17", "remaining_time": "0:08:15", "throughput": 8071.1, "total_tokens": 45174464} +{"current_steps": 14355, "total_steps": 15621, "loss": 0.3149, "lr": 3.981736824352522e-08, "epoch": 0.9189552525446514, "percentage": 91.9, "elapsed_time": "1:33:17", "remaining_time": "0:08:13", "throughput": 8072.77, "total_tokens": 45188992} +{"current_steps": 14360, "total_steps": 15621, "loss": 0.4703, "lr": 3.950580509880286e-08, "epoch": 0.9192753344856284, "percentage": 91.93, "elapsed_time": "1:33:18", "remaining_time": "0:08:11", "throughput": 8074.5, "total_tokens": 45204032} +{"current_steps": 14365, "total_steps": 15621, "loss": 0.3999, "lr": 3.9195441142360066e-08, "epoch": 0.9195954164266052, "percentage": 91.96, "elapsed_time": "1:33:19", "remaining_time": "0:08:09", "throughput": 8076.26, "total_tokens": 45219328} +{"current_steps": 14370, "total_steps": 15621, "loss": 0.321, "lr": 3.888627676169043e-08, "epoch": 0.9199154983675821, "percentage": 91.99, "elapsed_time": "1:33:19", "remaining_time": "0:08:07", "throughput": 8078.17, "total_tokens": 45235584} +{"current_steps": 14375, "total_steps": 15621, "loss": 0.3666, "lr": 3.857831234278886e-08, "epoch": 0.920235580308559, "percentage": 92.02, "elapsed_time": "1:33:20", "remaining_time": "0:08:05", "throughput": 8079.94, "total_tokens": 45250880} +{"current_steps": 14380, "total_steps": 15621, "loss": 0.4145, "lr": 3.827154827015255e-08, "epoch": 0.9205556622495359, "percentage": 92.06, "elapsed_time": "1:33:21", "remaining_time": "0:08:03", "throughput": 8081.79, "total_tokens": 45266752} +{"current_steps": 14385, "total_steps": 15621, "loss": 0.285, "lr": 3.7965984926780383e-08, "epoch": 0.9208757441905128, "percentage": 92.09, "elapsed_time": "1:33:21", "remaining_time": "0:08:01", "throughput": 8083.62, "total_tokens": 45282496} +{"current_steps": 14390, "total_steps": 15621, "loss": 0.3521, "lr": 3.766162269417139e-08, "epoch": 0.9211958261314896, "percentage": 92.12, "elapsed_time": "1:33:22", "remaining_time": "0:07:59", "throughput": 8085.28, "total_tokens": 45297024} +{"current_steps": 14395, "total_steps": 15621, "loss": 0.3723, "lr": 3.73584619523255e-08, "epoch": 0.9215159080724665, "percentage": 92.15, "elapsed_time": "1:33:23", "remaining_time": "0:07:57", "throughput": 8087.3, "total_tokens": 45314176} +{"current_steps": 14400, "total_steps": 15621, "loss": 0.352, "lr": 3.7056503079742616e-08, "epoch": 0.9218359900134434, "percentage": 92.18, "elapsed_time": "1:33:23", "remaining_time": "0:07:55", "throughput": 8089.05, "total_tokens": 45329344} +{"current_steps": 14405, "total_steps": 15621, "loss": 0.3452, "lr": 3.6755746453421945e-08, "epoch": 0.9221560719544203, "percentage": 92.22, "elapsed_time": "1:33:24", "remaining_time": "0:07:53", "throughput": 8090.8, "total_tokens": 45344384} +{"current_steps": 14410, "total_steps": 15621, "loss": 0.2969, "lr": 3.645619244886145e-08, "epoch": 0.9224761538953972, "percentage": 92.25, "elapsed_time": "1:33:25", "remaining_time": "0:07:51", "throughput": 8092.63, "total_tokens": 45360192} +{"current_steps": 14415, "total_steps": 15621, "loss": 0.3147, "lr": 3.615784144005796e-08, "epoch": 0.9227962358363742, "percentage": 92.28, "elapsed_time": "1:33:25", "remaining_time": "0:07:48", "throughput": 8094.46, "total_tokens": 45376000} +{"current_steps": 14420, "total_steps": 15621, "loss": 0.4197, "lr": 3.5860693799506184e-08, "epoch": 0.923116317777351, "percentage": 92.31, "elapsed_time": "1:33:26", "remaining_time": "0:07:46", "throughput": 8096.11, "total_tokens": 45390400} +{"current_steps": 14425, "total_steps": 15621, "loss": 0.4608, "lr": 3.5564749898198466e-08, "epoch": 0.9234363997183279, "percentage": 92.34, "elapsed_time": "1:33:27", "remaining_time": "0:07:44", "throughput": 8098.06, "total_tokens": 45406976} +{"current_steps": 14430, "total_steps": 15621, "loss": 0.3533, "lr": 3.527001010562425e-08, "epoch": 0.9237564816593048, "percentage": 92.38, "elapsed_time": "1:33:27", "remaining_time": "0:07:42", "throughput": 8099.8, "total_tokens": 45422080} +{"current_steps": 14435, "total_steps": 15621, "loss": 0.3585, "lr": 3.4976474789769504e-08, "epoch": 0.9240765636002817, "percentage": 92.41, "elapsed_time": "1:33:28", "remaining_time": "0:07:40", "throughput": 8101.84, "total_tokens": 45439296} +{"current_steps": 14440, "total_steps": 15621, "loss": 0.2994, "lr": 3.4684144317116636e-08, "epoch": 0.9243966455412586, "percentage": 92.44, "elapsed_time": "1:33:29", "remaining_time": "0:07:38", "throughput": 8103.57, "total_tokens": 45454208} +{"current_steps": 14445, "total_steps": 15621, "loss": 0.3015, "lr": 3.439301905264369e-08, "epoch": 0.9247167274822354, "percentage": 92.47, "elapsed_time": "1:33:29", "remaining_time": "0:07:36", "throughput": 8105.46, "total_tokens": 45470400} +{"current_steps": 14450, "total_steps": 15621, "loss": 0.324, "lr": 3.410309935982403e-08, "epoch": 0.9250368094232123, "percentage": 92.5, "elapsed_time": "1:33:30", "remaining_time": "0:07:34", "throughput": 8107.34, "total_tokens": 45486528} +{"current_steps": 14455, "total_steps": 15621, "loss": 0.3488, "lr": 3.381438560062555e-08, "epoch": 0.9253568913641892, "percentage": 92.54, "elapsed_time": "1:33:31", "remaining_time": "0:07:32", "throughput": 8109.06, "total_tokens": 45501440} +{"current_steps": 14460, "total_steps": 15621, "loss": 0.3167, "lr": 3.3526878135511025e-08, "epoch": 0.9256769733051661, "percentage": 92.57, "elapsed_time": "1:33:31", "remaining_time": "0:07:30", "throughput": 8110.97, "total_tokens": 45517760} +{"current_steps": 14465, "total_steps": 15621, "loss": 0.3751, "lr": 3.324057732343666e-08, "epoch": 0.9259970552461431, "percentage": 92.6, "elapsed_time": "1:33:32", "remaining_time": "0:07:28", "throughput": 8112.73, "total_tokens": 45533056} +{"current_steps": 14470, "total_steps": 15621, "loss": 0.421, "lr": 3.295548352185262e-08, "epoch": 0.9263171371871199, "percentage": 92.63, "elapsed_time": "1:33:33", "remaining_time": "0:07:26", "throughput": 8114.62, "total_tokens": 45549248} +{"current_steps": 14475, "total_steps": 15621, "loss": 0.3503, "lr": 3.2671597086701753e-08, "epoch": 0.9266372191280968, "percentage": 92.66, "elapsed_time": "1:33:33", "remaining_time": "0:07:24", "throughput": 8116.56, "total_tokens": 45565760} +{"current_steps": 14480, "total_steps": 15621, "loss": 0.3294, "lr": 3.238891837241964e-08, "epoch": 0.9269573010690737, "percentage": 92.7, "elapsed_time": "1:33:34", "remaining_time": "0:07:22", "throughput": 8118.4, "total_tokens": 45581568} +{"current_steps": 14485, "total_steps": 15621, "loss": 0.4179, "lr": 3.210744773193386e-08, "epoch": 0.9272773830100506, "percentage": 92.73, "elapsed_time": "1:33:35", "remaining_time": "0:07:20", "throughput": 8120.18, "total_tokens": 45596928} +{"current_steps": 14490, "total_steps": 15621, "loss": 0.3016, "lr": 3.182718551666386e-08, "epoch": 0.9275974649510275, "percentage": 92.76, "elapsed_time": "1:33:35", "remaining_time": "0:07:18", "throughput": 8122.03, "total_tokens": 45612800} +{"current_steps": 14495, "total_steps": 15621, "loss": 0.415, "lr": 3.154813207652063e-08, "epoch": 0.9279175468920043, "percentage": 92.79, "elapsed_time": "1:33:36", "remaining_time": "0:07:16", "throughput": 8123.74, "total_tokens": 45627584} +{"current_steps": 14500, "total_steps": 15621, "loss": 0.3294, "lr": 3.1270287759905143e-08, "epoch": 0.9282376288329812, "percentage": 92.82, "elapsed_time": "1:33:37", "remaining_time": "0:07:14", "throughput": 8125.63, "total_tokens": 45643840} +{"current_steps": 14505, "total_steps": 15621, "loss": 0.2947, "lr": 3.0993652913709476e-08, "epoch": 0.9285577107739581, "percentage": 92.86, "elapsed_time": "1:33:37", "remaining_time": "0:07:12", "throughput": 8127.39, "total_tokens": 45659072} +{"current_steps": 14510, "total_steps": 15621, "loss": 0.4243, "lr": 3.0718227883315796e-08, "epoch": 0.928877792714935, "percentage": 92.89, "elapsed_time": "1:33:38", "remaining_time": "0:07:10", "throughput": 8129.29, "total_tokens": 45675328} +{"current_steps": 14515, "total_steps": 15621, "loss": 0.3658, "lr": 3.044401301259503e-08, "epoch": 0.9291978746559119, "percentage": 92.92, "elapsed_time": "1:33:39", "remaining_time": "0:07:08", "throughput": 8131.08, "total_tokens": 45690816} +{"current_steps": 14520, "total_steps": 15621, "loss": 0.3301, "lr": 3.017100864390787e-08, "epoch": 0.9295179565968889, "percentage": 92.95, "elapsed_time": "1:33:39", "remaining_time": "0:07:06", "throughput": 8132.89, "total_tokens": 45706432} +{"current_steps": 14525, "total_steps": 15621, "loss": 0.3406, "lr": 2.9899215118103446e-08, "epoch": 0.9298380385378657, "percentage": 92.98, "elapsed_time": "1:33:40", "remaining_time": "0:07:04", "throughput": 8134.68, "total_tokens": 45721920} +{"current_steps": 14530, "total_steps": 15621, "loss": 0.3547, "lr": 2.9628632774519435e-08, "epoch": 0.9301581204788426, "percentage": 93.02, "elapsed_time": "1:33:41", "remaining_time": "0:07:02", "throughput": 8136.56, "total_tokens": 45738048} +{"current_steps": 14535, "total_steps": 15621, "loss": 0.3313, "lr": 2.9359261950980485e-08, "epoch": 0.9304782024198195, "percentage": 93.05, "elapsed_time": "1:33:41", "remaining_time": "0:07:00", "throughput": 8138.39, "total_tokens": 45753856} +{"current_steps": 14540, "total_steps": 15621, "loss": 0.2998, "lr": 2.90911029837998e-08, "epoch": 0.9307982843607964, "percentage": 93.08, "elapsed_time": "1:33:42", "remaining_time": "0:06:58", "throughput": 8140.1, "total_tokens": 45768704} +{"current_steps": 14545, "total_steps": 15621, "loss": 0.2851, "lr": 2.8824156207776673e-08, "epoch": 0.9311183663017732, "percentage": 93.11, "elapsed_time": "1:33:43", "remaining_time": "0:06:55", "throughput": 8141.86, "total_tokens": 45783936} +{"current_steps": 14550, "total_steps": 15621, "loss": 0.4491, "lr": 2.8558421956197397e-08, "epoch": 0.9314384482427501, "percentage": 93.14, "elapsed_time": "1:33:43", "remaining_time": "0:06:53", "throughput": 8143.77, "total_tokens": 45800320} +{"current_steps": 14555, "total_steps": 15621, "loss": 0.3872, "lr": 2.829390056083436e-08, "epoch": 0.931758530183727, "percentage": 93.18, "elapsed_time": "1:33:44", "remaining_time": "0:06:51", "throughput": 8145.65, "total_tokens": 45816512} +{"current_steps": 14560, "total_steps": 15621, "loss": 0.3173, "lr": 2.8030592351945492e-08, "epoch": 0.9320786121247039, "percentage": 93.21, "elapsed_time": "1:33:45", "remaining_time": "0:06:49", "throughput": 8147.43, "total_tokens": 45831936} +{"current_steps": 14565, "total_steps": 15621, "loss": 0.2995, "lr": 2.776849765827427e-08, "epoch": 0.9323986940656808, "percentage": 93.24, "elapsed_time": "1:33:45", "remaining_time": "0:06:47", "throughput": 8149.14, "total_tokens": 45846784} +{"current_steps": 14570, "total_steps": 15621, "loss": 0.4281, "lr": 2.750761680704905e-08, "epoch": 0.9327187760066578, "percentage": 93.27, "elapsed_time": "1:33:46", "remaining_time": "0:06:45", "throughput": 8150.89, "total_tokens": 45862080} +{"current_steps": 14575, "total_steps": 15621, "loss": 0.3977, "lr": 2.724795012398251e-08, "epoch": 0.9330388579476346, "percentage": 93.3, "elapsed_time": "1:33:47", "remaining_time": "0:06:43", "throughput": 8152.8, "total_tokens": 45878528} +{"current_steps": 14580, "total_steps": 15621, "loss": 0.3726, "lr": 2.6989497933271543e-08, "epoch": 0.9333589398886115, "percentage": 93.34, "elapsed_time": "1:33:47", "remaining_time": "0:06:41", "throughput": 8154.59, "total_tokens": 45894016} +{"current_steps": 14585, "total_steps": 15621, "loss": 0.3228, "lr": 2.673226055759692e-08, "epoch": 0.9336790218295884, "percentage": 93.37, "elapsed_time": "1:33:48", "remaining_time": "0:06:39", "throughput": 8156.37, "total_tokens": 45909504} +{"current_steps": 14590, "total_steps": 15621, "loss": 0.341, "lr": 2.6476238318122402e-08, "epoch": 0.9339991037705653, "percentage": 93.4, "elapsed_time": "1:33:49", "remaining_time": "0:06:37", "throughput": 8158.21, "total_tokens": 45925376} +{"current_steps": 14595, "total_steps": 15621, "loss": 0.3917, "lr": 2.6221431534494742e-08, "epoch": 0.9343191857115422, "percentage": 93.43, "elapsed_time": "1:33:49", "remaining_time": "0:06:35", "throughput": 8159.9, "total_tokens": 45940224} +{"current_steps": 14600, "total_steps": 15621, "loss": 0.3508, "lr": 2.5967840524843243e-08, "epoch": 0.934639267652519, "percentage": 93.46, "elapsed_time": "1:33:50", "remaining_time": "0:06:33", "throughput": 8161.6, "total_tokens": 45955072} +{"current_steps": 14605, "total_steps": 15621, "loss": 0.4243, "lr": 2.5715465605779195e-08, "epoch": 0.9349593495934959, "percentage": 93.5, "elapsed_time": "1:33:51", "remaining_time": "0:06:31", "throughput": 8163.35, "total_tokens": 45970240} +{"current_steps": 14610, "total_steps": 15621, "loss": 0.4145, "lr": 2.5464307092395777e-08, "epoch": 0.9352794315344728, "percentage": 93.53, "elapsed_time": "1:33:51", "remaining_time": "0:06:29", "throughput": 8165.14, "total_tokens": 45985856} +{"current_steps": 14615, "total_steps": 15621, "loss": 0.345, "lr": 2.5214365298267148e-08, "epoch": 0.9355995134754497, "percentage": 93.56, "elapsed_time": "1:33:52", "remaining_time": "0:06:27", "throughput": 8166.78, "total_tokens": 46000256} +{"current_steps": 14620, "total_steps": 15621, "loss": 0.3203, "lr": 2.4965640535448917e-08, "epoch": 0.9359195954164266, "percentage": 93.59, "elapsed_time": "1:33:53", "remaining_time": "0:06:25", "throughput": 8168.55, "total_tokens": 46015616} +{"current_steps": 14625, "total_steps": 15621, "loss": 0.3659, "lr": 2.471813311447657e-08, "epoch": 0.9362396773574035, "percentage": 93.62, "elapsed_time": "1:33:53", "remaining_time": "0:06:23", "throughput": 8170.32, "total_tokens": 46031040} +{"current_steps": 14630, "total_steps": 15621, "loss": 0.3221, "lr": 2.4471843344365915e-08, "epoch": 0.9365597592983804, "percentage": 93.66, "elapsed_time": "1:33:54", "remaining_time": "0:06:21", "throughput": 8172.05, "total_tokens": 46046016} +{"current_steps": 14635, "total_steps": 15621, "loss": 0.2701, "lr": 2.42267715326131e-08, "epoch": 0.9368798412393573, "percentage": 93.69, "elapsed_time": "1:33:55", "remaining_time": "0:06:19", "throughput": 8173.97, "total_tokens": 46062528} +{"current_steps": 14640, "total_steps": 15621, "loss": 0.3421, "lr": 2.3982917985192697e-08, "epoch": 0.9371999231803342, "percentage": 93.72, "elapsed_time": "1:33:55", "remaining_time": "0:06:17", "throughput": 8175.78, "total_tokens": 46078144} +{"current_steps": 14645, "total_steps": 15621, "loss": 0.3982, "lr": 2.3740283006558838e-08, "epoch": 0.9375200051213111, "percentage": 93.75, "elapsed_time": "1:33:56", "remaining_time": "0:06:15", "throughput": 8178.0, "total_tokens": 46096896} +{"current_steps": 14650, "total_steps": 15621, "loss": 0.3756, "lr": 2.349886689964431e-08, "epoch": 0.9378400870622879, "percentage": 93.78, "elapsed_time": "1:33:57", "remaining_time": "0:06:13", "throughput": 8179.71, "total_tokens": 46111808} +{"current_steps": 14655, "total_steps": 15621, "loss": 0.2836, "lr": 2.32586699658599e-08, "epoch": 0.9381601690032648, "percentage": 93.82, "elapsed_time": "1:33:58", "remaining_time": "0:06:11", "throughput": 8181.58, "total_tokens": 46127936} +{"current_steps": 14660, "total_steps": 15621, "loss": 0.3551, "lr": 2.3019692505094056e-08, "epoch": 0.9384802509442417, "percentage": 93.85, "elapsed_time": "1:33:58", "remaining_time": "0:06:09", "throughput": 8183.29, "total_tokens": 46142848} +{"current_steps": 14665, "total_steps": 15621, "loss": 0.5477, "lr": 2.2781934815713223e-08, "epoch": 0.9388003328852186, "percentage": 93.88, "elapsed_time": "1:33:59", "remaining_time": "0:06:07", "throughput": 8185.13, "total_tokens": 46158848} +{"current_steps": 14670, "total_steps": 15621, "loss": 0.3611, "lr": 2.254539719456061e-08, "epoch": 0.9391204148261955, "percentage": 93.91, "elapsed_time": "1:34:00", "remaining_time": "0:06:05", "throughput": 8186.98, "total_tokens": 46174912} +{"current_steps": 14675, "total_steps": 15621, "loss": 0.2694, "lr": 2.231007993695633e-08, "epoch": 0.9394404967671725, "percentage": 93.94, "elapsed_time": "1:34:00", "remaining_time": "0:06:03", "throughput": 8188.6, "total_tokens": 46189248} +{"current_steps": 14680, "total_steps": 15621, "loss": 0.3136, "lr": 2.2075983336696357e-08, "epoch": 0.9397605787081493, "percentage": 93.98, "elapsed_time": "1:34:01", "remaining_time": "0:06:01", "throughput": 8190.41, "total_tokens": 46204928} +{"current_steps": 14685, "total_steps": 15621, "loss": 0.3964, "lr": 2.1843107686053353e-08, "epoch": 0.9400806606491262, "percentage": 94.01, "elapsed_time": "1:34:02", "remaining_time": "0:05:59", "throughput": 8192.15, "total_tokens": 46220160} +{"current_steps": 14690, "total_steps": 15621, "loss": 0.4228, "lr": 2.1611453275775405e-08, "epoch": 0.9404007425901031, "percentage": 94.04, "elapsed_time": "1:34:02", "remaining_time": "0:05:57", "throughput": 8193.93, "total_tokens": 46235584} +{"current_steps": 14695, "total_steps": 15621, "loss": 0.2719, "lr": 2.138102039508538e-08, "epoch": 0.94072082453108, "percentage": 94.07, "elapsed_time": "1:34:03", "remaining_time": "0:05:55", "throughput": 8195.82, "total_tokens": 46251904} +{"current_steps": 14700, "total_steps": 15621, "loss": 0.3995, "lr": 2.1151809331681703e-08, "epoch": 0.9410409064720568, "percentage": 94.1, "elapsed_time": "1:34:04", "remaining_time": "0:05:53", "throughput": 8197.67, "total_tokens": 46268032} +{"current_steps": 14705, "total_steps": 15621, "loss": 0.3371, "lr": 2.092382037173701e-08, "epoch": 0.9413609884130337, "percentage": 94.14, "elapsed_time": "1:34:04", "remaining_time": "0:05:51", "throughput": 8199.43, "total_tokens": 46283392} +{"current_steps": 14710, "total_steps": 15621, "loss": 0.3089, "lr": 2.0697053799898277e-08, "epoch": 0.9416810703540106, "percentage": 94.17, "elapsed_time": "1:34:05", "remaining_time": "0:05:49", "throughput": 8201.18, "total_tokens": 46298752} +{"current_steps": 14715, "total_steps": 15621, "loss": 0.3394, "lr": 2.0471509899286144e-08, "epoch": 0.9420011522949875, "percentage": 94.2, "elapsed_time": "1:34:06", "remaining_time": "0:05:47", "throughput": 8203.01, "total_tokens": 46314624} +{"current_steps": 14720, "total_steps": 15621, "loss": 0.3517, "lr": 2.0247188951494797e-08, "epoch": 0.9423212342359644, "percentage": 94.23, "elapsed_time": "1:34:06", "remaining_time": "0:05:45", "throughput": 8205.0, "total_tokens": 46331712} +{"current_steps": 14725, "total_steps": 15621, "loss": 0.5446, "lr": 2.0024091236591655e-08, "epoch": 0.9426413161769412, "percentage": 94.26, "elapsed_time": "1:34:07", "remaining_time": "0:05:43", "throughput": 8206.71, "total_tokens": 46347200} +{"current_steps": 14730, "total_steps": 15621, "loss": 0.3148, "lr": 1.98022170331168e-08, "epoch": 0.9429613981179182, "percentage": 94.3, "elapsed_time": "1:34:08", "remaining_time": "0:05:41", "throughput": 8208.54, "total_tokens": 46363008} +{"current_steps": 14735, "total_steps": 15621, "loss": 0.3808, "lr": 1.9581566618082744e-08, "epoch": 0.9432814800588951, "percentage": 94.33, "elapsed_time": "1:34:08", "remaining_time": "0:05:39", "throughput": 8210.36, "total_tokens": 46378816} +{"current_steps": 14740, "total_steps": 15621, "loss": 0.4079, "lr": 1.9362140266974025e-08, "epoch": 0.943601561999872, "percentage": 94.36, "elapsed_time": "1:34:09", "remaining_time": "0:05:37", "throughput": 8212.24, "total_tokens": 46395200} +{"current_steps": 14745, "total_steps": 15621, "loss": 0.3223, "lr": 1.9143938253747383e-08, "epoch": 0.9439216439408489, "percentage": 94.39, "elapsed_time": "1:34:10", "remaining_time": "0:05:35", "throughput": 8214.17, "total_tokens": 46411840} +{"current_steps": 14750, "total_steps": 15621, "loss": 0.4503, "lr": 1.892696085083023e-08, "epoch": 0.9442417258818258, "percentage": 94.42, "elapsed_time": "1:34:10", "remaining_time": "0:05:33", "throughput": 8215.99, "total_tokens": 46427776} +{"current_steps": 14755, "total_steps": 15621, "loss": 0.3146, "lr": 1.8711208329121542e-08, "epoch": 0.9445618078228026, "percentage": 94.46, "elapsed_time": "1:34:11", "remaining_time": "0:05:31", "throughput": 8217.96, "total_tokens": 46444736} +{"current_steps": 14760, "total_steps": 15621, "loss": 0.3372, "lr": 1.849668095799084e-08, "epoch": 0.9448818897637795, "percentage": 94.49, "elapsed_time": "1:34:12", "remaining_time": "0:05:29", "throughput": 8219.79, "total_tokens": 46460672} +{"current_steps": 14765, "total_steps": 15621, "loss": 0.3458, "lr": 1.8283379005278098e-08, "epoch": 0.9452019717047564, "percentage": 94.52, "elapsed_time": "1:34:12", "remaining_time": "0:05:27", "throughput": 8221.63, "total_tokens": 46476736} +{"current_steps": 14770, "total_steps": 15621, "loss": 0.3238, "lr": 1.807130273729329e-08, "epoch": 0.9455220536457333, "percentage": 94.55, "elapsed_time": "1:34:13", "remaining_time": "0:05:25", "throughput": 8223.43, "total_tokens": 46492416} +{"current_steps": 14775, "total_steps": 15621, "loss": 0.3331, "lr": 1.7860452418816173e-08, "epoch": 0.9458421355867102, "percentage": 94.58, "elapsed_time": "1:34:14", "remaining_time": "0:05:23", "throughput": 8225.12, "total_tokens": 46507264} +{"current_steps": 14780, "total_steps": 15621, "loss": 0.3365, "lr": 1.7650828313095834e-08, "epoch": 0.946162217527687, "percentage": 94.62, "elapsed_time": "1:34:15", "remaining_time": "0:05:21", "throughput": 8227.09, "total_tokens": 46524224} +{"current_steps": 14785, "total_steps": 15621, "loss": 0.3172, "lr": 1.7442430681850362e-08, "epoch": 0.946482299468664, "percentage": 94.65, "elapsed_time": "1:34:15", "remaining_time": "0:05:19", "throughput": 8228.83, "total_tokens": 46539456} +{"current_steps": 14790, "total_steps": 15621, "loss": 0.402, "lr": 1.723525978526652e-08, "epoch": 0.9468023814096409, "percentage": 94.68, "elapsed_time": "1:34:16", "remaining_time": "0:05:17", "throughput": 8230.62, "total_tokens": 46555136} +{"current_steps": 14795, "total_steps": 15621, "loss": 0.3503, "lr": 1.702931588199996e-08, "epoch": 0.9471224633506178, "percentage": 94.71, "elapsed_time": "1:34:16", "remaining_time": "0:05:15", "throughput": 8232.37, "total_tokens": 46570432} +{"current_steps": 14800, "total_steps": 15621, "loss": 0.3141, "lr": 1.6824599229173897e-08, "epoch": 0.9474425452915947, "percentage": 94.74, "elapsed_time": "1:34:17", "remaining_time": "0:05:13", "throughput": 8234.19, "total_tokens": 46586304} +{"current_steps": 14805, "total_steps": 15621, "loss": 0.2946, "lr": 1.662111008237932e-08, "epoch": 0.9477626272325715, "percentage": 94.78, "elapsed_time": "1:34:18", "remaining_time": "0:05:11", "throughput": 8236.04, "total_tokens": 46602432} +{"current_steps": 14810, "total_steps": 15621, "loss": 0.3135, "lr": 1.6418848695675003e-08, "epoch": 0.9480827091735484, "percentage": 94.81, "elapsed_time": "1:34:19", "remaining_time": "0:05:09", "throughput": 8237.74, "total_tokens": 46617472} +{"current_steps": 14815, "total_steps": 15621, "loss": 0.3713, "lr": 1.6217815321586614e-08, "epoch": 0.9484027911145253, "percentage": 94.84, "elapsed_time": "1:34:19", "remaining_time": "0:05:07", "throughput": 8239.5, "total_tokens": 46632896} +{"current_steps": 14820, "total_steps": 15621, "loss": 0.3516, "lr": 1.6018010211106602e-08, "epoch": 0.9487228730555022, "percentage": 94.87, "elapsed_time": "1:34:20", "remaining_time": "0:05:05", "throughput": 8241.4, "total_tokens": 46649408} +{"current_steps": 14825, "total_steps": 15621, "loss": 0.2899, "lr": 1.58194336136942e-08, "epoch": 0.9490429549964791, "percentage": 94.9, "elapsed_time": "1:34:21", "remaining_time": "0:05:03", "throughput": 8243.22, "total_tokens": 46665344} +{"current_steps": 14830, "total_steps": 15621, "loss": 0.4377, "lr": 1.5622085777274417e-08, "epoch": 0.9493630369374559, "percentage": 94.94, "elapsed_time": "1:34:21", "remaining_time": "0:05:01", "throughput": 8244.96, "total_tokens": 46680704} +{"current_steps": 14835, "total_steps": 15621, "loss": 0.3267, "lr": 1.542596694823839e-08, "epoch": 0.9496831188784329, "percentage": 94.97, "elapsed_time": "1:34:22", "remaining_time": "0:05:00", "throughput": 8246.68, "total_tokens": 46695936} +{"current_steps": 14840, "total_steps": 15621, "loss": 0.4208, "lr": 1.5231077371442914e-08, "epoch": 0.9500032008194098, "percentage": 95.0, "elapsed_time": "1:34:23", "remaining_time": "0:04:58", "throughput": 8248.48, "total_tokens": 46711680} +{"current_steps": 14845, "total_steps": 15621, "loss": 0.2846, "lr": 1.5037417290209685e-08, "epoch": 0.9503232827603867, "percentage": 95.03, "elapsed_time": "1:34:23", "remaining_time": "0:04:56", "throughput": 8250.23, "total_tokens": 46727040} +{"current_steps": 14850, "total_steps": 15621, "loss": 0.3933, "lr": 1.4844986946325743e-08, "epoch": 0.9506433647013636, "percentage": 95.06, "elapsed_time": "1:34:24", "remaining_time": "0:04:54", "throughput": 8252.01, "total_tokens": 46742720} +{"current_steps": 14855, "total_steps": 15621, "loss": 0.2686, "lr": 1.4653786580042681e-08, "epoch": 0.9509634466423404, "percentage": 95.1, "elapsed_time": "1:34:25", "remaining_time": "0:04:52", "throughput": 8253.78, "total_tokens": 46758336} +{"current_steps": 14858, "total_steps": 15621, "eval_loss": 0.35565948486328125, "epoch": 0.9511554958069266, "percentage": 95.12, "elapsed_time": "1:35:16", "remaining_time": "0:04:53", "throughput": 8181.71, "total_tokens": 46767552} +{"current_steps": 14860, "total_steps": 15621, "loss": 0.2999, "lr": 1.4463816430076215e-08, "epoch": 0.9512835285833173, "percentage": 95.13, "elapsed_time": "1:38:02", "remaining_time": "0:05:01", "throughput": 7950.96, "total_tokens": 46773312} +{"current_steps": 14865, "total_steps": 15621, "loss": 0.3573, "lr": 1.4275076733606395e-08, "epoch": 0.9516036105242942, "percentage": 95.16, "elapsed_time": "1:38:03", "remaining_time": "0:04:59", "throughput": 7952.58, "total_tokens": 46787968} +{"current_steps": 14870, "total_steps": 15621, "loss": 0.2955, "lr": 1.4087567726277061e-08, "epoch": 0.9519236924652711, "percentage": 95.19, "elapsed_time": "1:38:04", "remaining_time": "0:04:57", "throughput": 7954.34, "total_tokens": 46803712} +{"current_steps": 14875, "total_steps": 15621, "loss": 0.2811, "lr": 1.390128964219528e-08, "epoch": 0.952243774406248, "percentage": 95.22, "elapsed_time": "1:38:04", "remaining_time": "0:04:55", "throughput": 7956.23, "total_tokens": 46820288} +{"current_steps": 14880, "total_steps": 15621, "loss": 0.3966, "lr": 1.3716242713931348e-08, "epoch": 0.9525638563472248, "percentage": 95.26, "elapsed_time": "1:38:05", "remaining_time": "0:04:53", "throughput": 7957.97, "total_tokens": 46835904} +{"current_steps": 14885, "total_steps": 15621, "loss": 0.3738, "lr": 1.3532427172518789e-08, "epoch": 0.9528839382882017, "percentage": 95.29, "elapsed_time": "1:38:06", "remaining_time": "0:04:51", "throughput": 7959.66, "total_tokens": 46851136} +{"current_steps": 14890, "total_steps": 15621, "loss": 0.3431, "lr": 1.3349843247453252e-08, "epoch": 0.9532040202291787, "percentage": 95.32, "elapsed_time": "1:38:06", "remaining_time": "0:04:49", "throughput": 7961.49, "total_tokens": 46867456} +{"current_steps": 14895, "total_steps": 15621, "loss": 0.2796, "lr": 1.3168491166692941e-08, "epoch": 0.9535241021701556, "percentage": 95.35, "elapsed_time": "1:38:07", "remaining_time": "0:04:46", "throughput": 7963.2, "total_tokens": 46882816} +{"current_steps": 14900, "total_steps": 15621, "loss": 0.4594, "lr": 1.2988371156658073e-08, "epoch": 0.9538441841111325, "percentage": 95.38, "elapsed_time": "1:38:08", "remaining_time": "0:04:44", "throughput": 7964.97, "total_tokens": 46898624} +{"current_steps": 14905, "total_steps": 15621, "loss": 0.2959, "lr": 1.2809483442230763e-08, "epoch": 0.9541642660521094, "percentage": 95.42, "elapsed_time": "1:38:08", "remaining_time": "0:04:42", "throughput": 7966.73, "total_tokens": 46914304} +{"current_steps": 14910, "total_steps": 15621, "loss": 0.373, "lr": 1.2631828246754128e-08, "epoch": 0.9544843479930862, "percentage": 95.45, "elapsed_time": "1:38:09", "remaining_time": "0:04:40", "throughput": 7968.53, "total_tokens": 46930368} +{"current_steps": 14915, "total_steps": 15621, "loss": 0.3678, "lr": 1.2455405792032969e-08, "epoch": 0.9548044299340631, "percentage": 95.48, "elapsed_time": "1:38:10", "remaining_time": "0:04:38", "throughput": 7970.25, "total_tokens": 46945792} +{"current_steps": 14920, "total_steps": 15621, "loss": 0.3474, "lr": 1.2280216298332646e-08, "epoch": 0.95512451187504, "percentage": 95.51, "elapsed_time": "1:38:10", "remaining_time": "0:04:36", "throughput": 7972.08, "total_tokens": 46962048} +{"current_steps": 14925, "total_steps": 15621, "loss": 0.4736, "lr": 1.2106259984379642e-08, "epoch": 0.9554445938160169, "percentage": 95.54, "elapsed_time": "1:38:11", "remaining_time": "0:04:34", "throughput": 7973.71, "total_tokens": 46976768} +{"current_steps": 14930, "total_steps": 15621, "loss": 0.4153, "lr": 1.1933537067359889e-08, "epoch": 0.9557646757569938, "percentage": 95.58, "elapsed_time": "1:38:12", "remaining_time": "0:04:32", "throughput": 7975.34, "total_tokens": 46991424} +{"current_steps": 14935, "total_steps": 15621, "loss": 0.3603, "lr": 1.1762047762920446e-08, "epoch": 0.9560847576979706, "percentage": 95.61, "elapsed_time": "1:38:12", "remaining_time": "0:04:30", "throughput": 7977.03, "total_tokens": 47006656} +{"current_steps": 14940, "total_steps": 15621, "loss": 0.3643, "lr": 1.1591792285167602e-08, "epoch": 0.9564048396389476, "percentage": 95.64, "elapsed_time": "1:38:13", "remaining_time": "0:04:28", "throughput": 7978.72, "total_tokens": 47021824} +{"current_steps": 14945, "total_steps": 15621, "loss": 0.3862, "lr": 1.1422770846667206e-08, "epoch": 0.9567249215799245, "percentage": 95.67, "elapsed_time": "1:38:14", "remaining_time": "0:04:26", "throughput": 7980.47, "total_tokens": 47037440} +{"current_steps": 14950, "total_steps": 15621, "loss": 0.303, "lr": 1.1254983658444572e-08, "epoch": 0.9570450035209014, "percentage": 95.7, "elapsed_time": "1:38:14", "remaining_time": "0:04:24", "throughput": 7982.29, "total_tokens": 47053760} +{"current_steps": 14955, "total_steps": 15621, "loss": 0.3218, "lr": 1.1088430929984017e-08, "epoch": 0.9573650854618783, "percentage": 95.74, "elapsed_time": "1:38:15", "remaining_time": "0:04:22", "throughput": 7983.96, "total_tokens": 47068928} +{"current_steps": 14960, "total_steps": 15621, "loss": 0.3807, "lr": 1.0923112869228645e-08, "epoch": 0.9576851674028551, "percentage": 95.77, "elapsed_time": "1:38:16", "remaining_time": "0:04:20", "throughput": 7985.72, "total_tokens": 47084672} +{"current_steps": 14965, "total_steps": 15621, "loss": 0.3554, "lr": 1.0759029682579801e-08, "epoch": 0.958005249343832, "percentage": 95.8, "elapsed_time": "1:38:16", "remaining_time": "0:04:18", "throughput": 7987.63, "total_tokens": 47101632} +{"current_steps": 14970, "total_steps": 15621, "loss": 0.3051, "lr": 1.0596181574897389e-08, "epoch": 0.9583253312848089, "percentage": 95.83, "elapsed_time": "1:38:17", "remaining_time": "0:04:16", "throughput": 7989.27, "total_tokens": 47116480} +{"current_steps": 14975, "total_steps": 15621, "loss": 0.3227, "lr": 1.0434568749499107e-08, "epoch": 0.9586454132257858, "percentage": 95.86, "elapsed_time": "1:38:18", "remaining_time": "0:04:14", "throughput": 7991.13, "total_tokens": 47132992} +{"current_steps": 14980, "total_steps": 15621, "loss": 0.3077, "lr": 1.027419140816066e-08, "epoch": 0.9589654951667627, "percentage": 95.9, "elapsed_time": "1:38:18", "remaining_time": "0:04:12", "throughput": 7992.93, "total_tokens": 47149056} +{"current_steps": 14985, "total_steps": 15621, "loss": 0.3029, "lr": 1.0115049751114768e-08, "epoch": 0.9592855771077395, "percentage": 95.93, "elapsed_time": "1:38:19", "remaining_time": "0:04:10", "throughput": 7994.7, "total_tokens": 47164864} +{"current_steps": 14990, "total_steps": 15621, "loss": 0.3514, "lr": 9.957143977051941e-09, "epoch": 0.9596056590487164, "percentage": 95.96, "elapsed_time": "1:38:20", "remaining_time": "0:04:08", "throughput": 7996.44, "total_tokens": 47180544} +{"current_steps": 14995, "total_steps": 15621, "loss": 0.3879, "lr": 9.800474283119142e-09, "epoch": 0.9599257409896934, "percentage": 95.99, "elapsed_time": "1:38:20", "remaining_time": "0:04:06", "throughput": 7998.23, "total_tokens": 47196608} +{"current_steps": 15000, "total_steps": 15621, "loss": 0.3755, "lr": 9.645040864920462e-09, "epoch": 0.9602458229306703, "percentage": 96.02, "elapsed_time": "1:38:21", "remaining_time": "0:04:04", "throughput": 8000.12, "total_tokens": 47213504} +{"current_steps": 15005, "total_steps": 15621, "loss": 0.4015, "lr": 9.490843916516334e-09, "epoch": 0.9605659048716472, "percentage": 96.06, "elapsed_time": "1:38:22", "remaining_time": "0:04:02", "throughput": 8001.76, "total_tokens": 47228288} +{"current_steps": 15010, "total_steps": 15621, "loss": 0.452, "lr": 9.337883630423316e-09, "epoch": 0.960885986812624, "percentage": 96.09, "elapsed_time": "1:38:22", "remaining_time": "0:04:00", "throughput": 8003.47, "total_tokens": 47243712} +{"current_steps": 15015, "total_steps": 15621, "loss": 0.5173, "lr": 9.186160197614423e-09, "epoch": 0.9612060687536009, "percentage": 96.12, "elapsed_time": "1:38:23", "remaining_time": "0:03:58", "throughput": 8005.29, "total_tokens": 47259904} +{"current_steps": 15020, "total_steps": 15621, "loss": 0.4795, "lr": 9.035673807517795e-09, "epoch": 0.9615261506945778, "percentage": 96.15, "elapsed_time": "1:38:24", "remaining_time": "0:03:56", "throughput": 8006.96, "total_tokens": 47275072} +{"current_steps": 15025, "total_steps": 15621, "loss": 0.2802, "lr": 8.886424648017698e-09, "epoch": 0.9618462326355547, "percentage": 96.18, "elapsed_time": "1:38:24", "remaining_time": "0:03:54", "throughput": 8008.69, "total_tokens": 47290688} +{"current_steps": 15030, "total_steps": 15621, "loss": 0.34, "lr": 8.738412905453408e-09, "epoch": 0.9621663145765316, "percentage": 96.22, "elapsed_time": "1:38:25", "remaining_time": "0:03:52", "throughput": 8010.45, "total_tokens": 47306496} +{"current_steps": 15035, "total_steps": 15621, "loss": 0.3524, "lr": 8.591638764619324e-09, "epoch": 0.9624863965175084, "percentage": 96.25, "elapsed_time": "1:38:26", "remaining_time": "0:03:50", "throughput": 8012.08, "total_tokens": 47321280} +{"current_steps": 15040, "total_steps": 15621, "loss": 0.3707, "lr": 8.446102408764643e-09, "epoch": 0.9628064784584853, "percentage": 96.28, "elapsed_time": "1:38:26", "remaining_time": "0:03:48", "throughput": 8013.9, "total_tokens": 47337536} +{"current_steps": 15045, "total_steps": 15621, "loss": 0.2796, "lr": 8.301804019593129e-09, "epoch": 0.9631265603994623, "percentage": 96.31, "elapsed_time": "1:38:27", "remaining_time": "0:03:46", "throughput": 8015.61, "total_tokens": 47353024} +{"current_steps": 15050, "total_steps": 15621, "loss": 0.3505, "lr": 8.158743777263333e-09, "epoch": 0.9634466423404392, "percentage": 96.34, "elapsed_time": "1:38:28", "remaining_time": "0:03:44", "throughput": 8017.39, "total_tokens": 47369088} +{"current_steps": 15055, "total_steps": 15621, "loss": 0.3566, "lr": 8.016921860387272e-09, "epoch": 0.9637667242814161, "percentage": 96.38, "elapsed_time": "1:38:28", "remaining_time": "0:03:42", "throughput": 8019.08, "total_tokens": 47384320} +{"current_steps": 15060, "total_steps": 15621, "loss": 0.3949, "lr": 7.876338446031416e-09, "epoch": 0.964086806222393, "percentage": 96.41, "elapsed_time": "1:38:29", "remaining_time": "0:03:40", "throughput": 8020.92, "total_tokens": 47400896} +{"current_steps": 15065, "total_steps": 15621, "loss": 0.3234, "lr": 7.736993709716033e-09, "epoch": 0.9644068881633698, "percentage": 96.44, "elapsed_time": "1:38:30", "remaining_time": "0:03:38", "throughput": 8022.7, "total_tokens": 47416896} +{"current_steps": 15070, "total_steps": 15621, "loss": 0.4736, "lr": 7.59888782541418e-09, "epoch": 0.9647269701043467, "percentage": 96.47, "elapsed_time": "1:38:31", "remaining_time": "0:03:36", "throughput": 8024.41, "total_tokens": 47432320} +{"current_steps": 15075, "total_steps": 15621, "loss": 0.2698, "lr": 7.462020965553151e-09, "epoch": 0.9650470520453236, "percentage": 96.5, "elapsed_time": "1:38:31", "remaining_time": "0:03:34", "throughput": 8026.2, "total_tokens": 47448320} +{"current_steps": 15080, "total_steps": 15621, "loss": 0.4844, "lr": 7.32639330101259e-09, "epoch": 0.9653671339863005, "percentage": 96.54, "elapsed_time": "1:38:32", "remaining_time": "0:03:32", "throughput": 8027.88, "total_tokens": 47463488} +{"current_steps": 15085, "total_steps": 15621, "loss": 0.3884, "lr": 7.1920050011252675e-09, "epoch": 0.9656872159272774, "percentage": 96.57, "elapsed_time": "1:38:33", "remaining_time": "0:03:30", "throughput": 8029.61, "total_tokens": 47479104} +{"current_steps": 15090, "total_steps": 15621, "loss": 0.3994, "lr": 7.058856233676525e-09, "epoch": 0.9660072978682542, "percentage": 96.6, "elapsed_time": "1:38:33", "remaining_time": "0:03:28", "throughput": 8031.56, "total_tokens": 47496448} +{"current_steps": 15095, "total_steps": 15621, "loss": 0.3758, "lr": 6.926947164904162e-09, "epoch": 0.9663273798092311, "percentage": 96.63, "elapsed_time": "1:38:34", "remaining_time": "0:03:26", "throughput": 8033.28, "total_tokens": 47511936} +{"current_steps": 15100, "total_steps": 15621, "loss": 0.4048, "lr": 6.796277959498331e-09, "epoch": 0.9666474617502081, "percentage": 96.66, "elapsed_time": "1:38:35", "remaining_time": "0:03:24", "throughput": 8035.1, "total_tokens": 47528320} +{"current_steps": 15105, "total_steps": 15621, "loss": 0.2726, "lr": 6.666848780600864e-09, "epoch": 0.966967543691185, "percentage": 96.7, "elapsed_time": "1:38:35", "remaining_time": "0:03:22", "throughput": 8036.76, "total_tokens": 47543296} +{"current_steps": 15110, "total_steps": 15621, "loss": 0.2706, "lr": 6.538659789805834e-09, "epoch": 0.9672876256321619, "percentage": 96.73, "elapsed_time": "1:38:36", "remaining_time": "0:03:20", "throughput": 8038.45, "total_tokens": 47558656} +{"current_steps": 15115, "total_steps": 15621, "loss": 0.3739, "lr": 6.411711147158438e-09, "epoch": 0.9676077075731387, "percentage": 96.76, "elapsed_time": "1:38:37", "remaining_time": "0:03:18", "throughput": 8040.24, "total_tokens": 47574720} +{"current_steps": 15120, "total_steps": 15621, "loss": 0.3126, "lr": 6.286003011155783e-09, "epoch": 0.9679277895141156, "percentage": 96.79, "elapsed_time": "1:38:37", "remaining_time": "0:03:16", "throughput": 8041.96, "total_tokens": 47590272} +{"current_steps": 15125, "total_steps": 15621, "loss": 0.4041, "lr": 6.161535538745877e-09, "epoch": 0.9682478714550925, "percentage": 96.82, "elapsed_time": "1:38:38", "remaining_time": "0:03:14", "throughput": 8043.66, "total_tokens": 47605696} +{"current_steps": 15130, "total_steps": 15621, "loss": 0.3798, "lr": 6.0383088853277475e-09, "epoch": 0.9685679533960694, "percentage": 96.86, "elapsed_time": "1:38:39", "remaining_time": "0:03:12", "throughput": 8045.46, "total_tokens": 47621760} +{"current_steps": 15135, "total_steps": 15621, "loss": 0.3175, "lr": 5.916323204751439e-09, "epoch": 0.9688880353370463, "percentage": 96.89, "elapsed_time": "1:38:39", "remaining_time": "0:03:10", "throughput": 8047.44, "total_tokens": 47639296} +{"current_steps": 15140, "total_steps": 15621, "loss": 0.2636, "lr": 5.795578649317345e-09, "epoch": 0.9692081172780231, "percentage": 96.92, "elapsed_time": "1:38:40", "remaining_time": "0:03:08", "throughput": 8049.13, "total_tokens": 47654656} +{"current_steps": 15145, "total_steps": 15621, "loss": 0.3059, "lr": 5.676075369776656e-09, "epoch": 0.969528199219, "percentage": 96.95, "elapsed_time": "1:38:41", "remaining_time": "0:03:06", "throughput": 8050.96, "total_tokens": 47671168} +{"current_steps": 15150, "total_steps": 15621, "loss": 0.3451, "lr": 5.557813515330468e-09, "epoch": 0.9698482811599769, "percentage": 96.98, "elapsed_time": "1:38:41", "remaining_time": "0:03:04", "throughput": 8052.63, "total_tokens": 47686400} +{"current_steps": 15155, "total_steps": 15621, "loss": 0.3484, "lr": 5.440793233630115e-09, "epoch": 0.9701683631009539, "percentage": 97.02, "elapsed_time": "1:38:42", "remaining_time": "0:03:02", "throughput": 8054.31, "total_tokens": 47701760} +{"current_steps": 15160, "total_steps": 15621, "loss": 0.3073, "lr": 5.325014670776951e-09, "epoch": 0.9704884450419308, "percentage": 97.05, "elapsed_time": "1:38:43", "remaining_time": "0:03:00", "throughput": 8056.01, "total_tokens": 47717248} +{"current_steps": 15165, "total_steps": 15621, "loss": 0.3607, "lr": 5.21047797132157e-09, "epoch": 0.9708085269829076, "percentage": 97.08, "elapsed_time": "1:38:43", "remaining_time": "0:02:58", "throughput": 8057.93, "total_tokens": 47734336} +{"current_steps": 15170, "total_steps": 15621, "loss": 0.3428, "lr": 5.097183278264694e-09, "epoch": 0.9711286089238845, "percentage": 97.11, "elapsed_time": "1:38:44", "remaining_time": "0:02:56", "throughput": 8059.71, "total_tokens": 47750464} +{"current_steps": 15175, "total_steps": 15621, "loss": 0.4272, "lr": 4.985130733055954e-09, "epoch": 0.9714486908648614, "percentage": 97.14, "elapsed_time": "1:38:45", "remaining_time": "0:02:54", "throughput": 8061.4, "total_tokens": 47765824} +{"current_steps": 15180, "total_steps": 15621, "loss": 0.381, "lr": 4.874320475594107e-09, "epoch": 0.9717687728058383, "percentage": 97.18, "elapsed_time": "1:38:45", "remaining_time": "0:02:52", "throughput": 8063.15, "total_tokens": 47781760} +{"current_steps": 15185, "total_steps": 15621, "loss": 0.292, "lr": 4.764752644227377e-09, "epoch": 0.9720888547468152, "percentage": 97.21, "elapsed_time": "1:38:46", "remaining_time": "0:02:50", "throughput": 8064.86, "total_tokens": 47797312} +{"current_steps": 15190, "total_steps": 15621, "loss": 0.335, "lr": 4.656427375752336e-09, "epoch": 0.972408936687792, "percentage": 97.24, "elapsed_time": "1:38:47", "remaining_time": "0:02:48", "throughput": 8066.67, "total_tokens": 47813440} +{"current_steps": 15195, "total_steps": 15621, "loss": 0.343, "lr": 4.549344805414246e-09, "epoch": 0.9727290186287689, "percentage": 97.27, "elapsed_time": "1:38:47", "remaining_time": "0:02:46", "throughput": 8068.44, "total_tokens": 47829440} +{"current_steps": 15200, "total_steps": 15621, "loss": 0.4009, "lr": 4.443505066907049e-09, "epoch": 0.9730491005697458, "percentage": 97.3, "elapsed_time": "1:38:48", "remaining_time": "0:02:44", "throughput": 8070.1, "total_tokens": 47844608} +{"current_steps": 15205, "total_steps": 15621, "loss": 0.2898, "lr": 4.338908292372934e-09, "epoch": 0.9733691825107228, "percentage": 97.34, "elapsed_time": "1:38:49", "remaining_time": "0:02:42", "throughput": 8071.81, "total_tokens": 47860160} +{"current_steps": 15210, "total_steps": 15621, "loss": 0.3906, "lr": 4.235554612402214e-09, "epoch": 0.9736892644516997, "percentage": 97.37, "elapsed_time": "1:38:49", "remaining_time": "0:02:40", "throughput": 8073.52, "total_tokens": 47875648} +{"current_steps": 15215, "total_steps": 15621, "loss": 0.3799, "lr": 4.133444156033006e-09, "epoch": 0.9740093463926766, "percentage": 97.4, "elapsed_time": "1:38:50", "remaining_time": "0:02:38", "throughput": 8075.43, "total_tokens": 47892736} +{"current_steps": 15220, "total_steps": 15621, "loss": 0.3319, "lr": 4.032577050751551e-09, "epoch": 0.9743294283336534, "percentage": 97.43, "elapsed_time": "1:38:51", "remaining_time": "0:02:36", "throughput": 8077.22, "total_tokens": 47908992} +{"current_steps": 15225, "total_steps": 15621, "loss": 0.3489, "lr": 3.932953422491669e-09, "epoch": 0.9746495102746303, "percentage": 97.46, "elapsed_time": "1:38:52", "remaining_time": "0:02:34", "throughput": 8078.96, "total_tokens": 47924736} +{"current_steps": 15230, "total_steps": 15621, "loss": 0.2816, "lr": 3.8345733956345326e-09, "epoch": 0.9749695922156072, "percentage": 97.5, "elapsed_time": "1:38:52", "remaining_time": "0:02:32", "throughput": 8080.76, "total_tokens": 47941056} +{"current_steps": 15235, "total_steps": 15621, "loss": 0.3635, "lr": 3.737437093008777e-09, "epoch": 0.9752896741565841, "percentage": 97.53, "elapsed_time": "1:38:53", "remaining_time": "0:02:30", "throughput": 8082.62, "total_tokens": 47957824} +{"current_steps": 15240, "total_steps": 15621, "loss": 0.4132, "lr": 3.641544635890281e-09, "epoch": 0.975609756097561, "percentage": 97.56, "elapsed_time": "1:38:54", "remaining_time": "0:02:28", "throughput": 8084.28, "total_tokens": 47973056} +{"current_steps": 15245, "total_steps": 15621, "loss": 0.3959, "lr": 3.546896144001832e-09, "epoch": 0.9759298380385378, "percentage": 97.59, "elapsed_time": "1:38:54", "remaining_time": "0:02:26", "throughput": 8086.03, "total_tokens": 47988928} +{"current_steps": 15250, "total_steps": 15621, "loss": 0.3935, "lr": 3.4534917355132364e-09, "epoch": 0.9762499199795147, "percentage": 97.62, "elapsed_time": "1:38:55", "remaining_time": "0:02:24", "throughput": 8087.67, "total_tokens": 48004032} +{"current_steps": 15255, "total_steps": 15621, "loss": 0.4168, "lr": 3.361331527040878e-09, "epoch": 0.9765700019204916, "percentage": 97.66, "elapsed_time": "1:38:56", "remaining_time": "0:02:22", "throughput": 8089.53, "total_tokens": 48020800} +{"current_steps": 15260, "total_steps": 15621, "loss": 0.3997, "lr": 3.270415633647938e-09, "epoch": 0.9768900838614686, "percentage": 97.69, "elapsed_time": "1:38:56", "remaining_time": "0:02:20", "throughput": 8091.3, "total_tokens": 48036800} +{"current_steps": 15265, "total_steps": 15621, "loss": 0.2911, "lr": 3.180744168843952e-09, "epoch": 0.9772101658024455, "percentage": 97.72, "elapsed_time": "1:38:57", "remaining_time": "0:02:18", "throughput": 8092.87, "total_tokens": 48051264} +{"current_steps": 15270, "total_steps": 15621, "loss": 0.226, "lr": 3.0923172445849187e-09, "epoch": 0.9775302477434223, "percentage": 97.75, "elapsed_time": "1:38:58", "remaining_time": "0:02:16", "throughput": 8094.5, "total_tokens": 48066176} +{"current_steps": 15275, "total_steps": 15621, "loss": 0.3135, "lr": 3.0051349712727493e-09, "epoch": 0.9778503296843992, "percentage": 97.79, "elapsed_time": "1:38:58", "remaining_time": "0:02:14", "throughput": 8096.23, "total_tokens": 48081984} +{"current_steps": 15280, "total_steps": 15621, "loss": 0.4143, "lr": 2.9191974577555954e-09, "epoch": 0.9781704116253761, "percentage": 97.82, "elapsed_time": "1:38:59", "remaining_time": "0:02:12", "throughput": 8097.85, "total_tokens": 48096896} +{"current_steps": 15285, "total_steps": 15621, "loss": 0.2341, "lr": 2.8345048113274096e-09, "epoch": 0.978490493566353, "percentage": 97.85, "elapsed_time": "1:39:00", "remaining_time": "0:02:10", "throughput": 8099.52, "total_tokens": 48112128} +{"current_steps": 15290, "total_steps": 15621, "loss": 0.3353, "lr": 2.751057137727941e-09, "epoch": 0.9788105755073299, "percentage": 97.88, "elapsed_time": "1:39:00", "remaining_time": "0:02:08", "throughput": 8101.22, "total_tokens": 48127616} +{"current_steps": 15295, "total_steps": 15621, "loss": 0.3961, "lr": 2.66885454114274e-09, "epoch": 0.9791306574483067, "percentage": 97.91, "elapsed_time": "1:39:01", "remaining_time": "0:02:06", "throughput": 8102.8, "total_tokens": 48142144} +{"current_steps": 15300, "total_steps": 15621, "loss": 0.3685, "lr": 2.5878971242025983e-09, "epoch": 0.9794507393892836, "percentage": 97.95, "elapsed_time": "1:39:02", "remaining_time": "0:02:04", "throughput": 8104.57, "total_tokens": 48158272} +{"current_steps": 15305, "total_steps": 15621, "loss": 0.3216, "lr": 2.5081849879837746e-09, "epoch": 0.9797708213302605, "percentage": 97.98, "elapsed_time": "1:39:02", "remaining_time": "0:02:02", "throughput": 8106.19, "total_tokens": 48173120} +{"current_steps": 15310, "total_steps": 15621, "loss": 0.3423, "lr": 2.429718232007771e-09, "epoch": 0.9800909032712375, "percentage": 98.01, "elapsed_time": "1:39:03", "remaining_time": "0:02:00", "throughput": 8107.88, "total_tokens": 48188672} +{"current_steps": 15315, "total_steps": 15621, "loss": 0.2693, "lr": 2.3524969542414453e-09, "epoch": 0.9804109852122144, "percentage": 98.04, "elapsed_time": "1:39:04", "remaining_time": "0:01:58", "throughput": 8109.62, "total_tokens": 48204480} +{"current_steps": 15320, "total_steps": 15621, "loss": 0.3537, "lr": 2.2765212510963418e-09, "epoch": 0.9807310671531912, "percentage": 98.07, "elapsed_time": "1:39:04", "remaining_time": "0:01:56", "throughput": 8111.27, "total_tokens": 48219584} +{"current_steps": 15325, "total_steps": 15621, "loss": 0.2813, "lr": 2.2017912174289164e-09, "epoch": 0.9810511490941681, "percentage": 98.11, "elapsed_time": "1:39:05", "remaining_time": "0:01:54", "throughput": 8113.07, "total_tokens": 48235904} +{"current_steps": 15330, "total_steps": 15621, "loss": 0.3963, "lr": 2.128306946540648e-09, "epoch": 0.981371231035145, "percentage": 98.14, "elapsed_time": "1:39:06", "remaining_time": "0:01:52", "throughput": 8114.98, "total_tokens": 48252992} +{"current_steps": 15335, "total_steps": 15621, "loss": 0.3328, "lr": 2.0560685301774792e-09, "epoch": 0.9816913129761219, "percentage": 98.17, "elapsed_time": "1:39:06", "remaining_time": "0:01:50", "throughput": 8116.59, "total_tokens": 48267840} +{"current_steps": 15340, "total_steps": 15621, "loss": 0.3753, "lr": 1.985076058529933e-09, "epoch": 0.9820113949170988, "percentage": 98.2, "elapsed_time": "1:39:07", "remaining_time": "0:01:48", "throughput": 8118.2, "total_tokens": 48282688} +{"current_steps": 15345, "total_steps": 15621, "loss": 0.478, "lr": 1.9153296202328863e-09, "epoch": 0.9823314768580756, "percentage": 98.23, "elapsed_time": "1:39:08", "remaining_time": "0:01:46", "throughput": 8120.15, "total_tokens": 48300096} +{"current_steps": 15350, "total_steps": 15621, "loss": 0.3943, "lr": 1.8468293023656823e-09, "epoch": 0.9826515587990525, "percentage": 98.27, "elapsed_time": "1:39:08", "remaining_time": "0:01:45", "throughput": 8121.78, "total_tokens": 48315136} +{"current_steps": 15355, "total_steps": 15621, "loss": 0.4025, "lr": 1.7795751904515766e-09, "epoch": 0.9829716407400294, "percentage": 98.3, "elapsed_time": "1:39:09", "remaining_time": "0:01:43", "throughput": 8123.43, "total_tokens": 48330240} +{"current_steps": 15360, "total_steps": 15621, "loss": 0.3109, "lr": 1.7135673684584019e-09, "epoch": 0.9832917226810063, "percentage": 98.33, "elapsed_time": "1:39:10", "remaining_time": "0:01:41", "throughput": 8125.06, "total_tokens": 48345280} +{"current_steps": 15365, "total_steps": 15621, "loss": 0.403, "lr": 1.6488059187974579e-09, "epoch": 0.9836118046219833, "percentage": 98.36, "elapsed_time": "1:39:10", "remaining_time": "0:01:39", "throughput": 8126.89, "total_tokens": 48361792} +{"current_steps": 15370, "total_steps": 15621, "loss": 0.4034, "lr": 1.5852909223242894e-09, "epoch": 0.9839318865629602, "percentage": 98.39, "elapsed_time": "1:39:11", "remaining_time": "0:01:37", "throughput": 8128.59, "total_tokens": 48377408} +{"current_steps": 15375, "total_steps": 15621, "loss": 0.3679, "lr": 1.5230224583380192e-09, "epoch": 0.984251968503937, "percentage": 98.43, "elapsed_time": "1:39:12", "remaining_time": "0:01:35", "throughput": 8130.28, "total_tokens": 48392896} +{"current_steps": 15380, "total_steps": 15621, "loss": 0.4625, "lr": 1.4620006045816813e-09, "epoch": 0.9845720504449139, "percentage": 98.46, "elapsed_time": "1:39:12", "remaining_time": "0:01:33", "throughput": 8131.87, "total_tokens": 48407552} +{"current_steps": 15385, "total_steps": 15621, "loss": 0.2809, "lr": 1.4022254372417774e-09, "epoch": 0.9848921323858908, "percentage": 98.49, "elapsed_time": "1:39:13", "remaining_time": "0:01:31", "throughput": 8133.72, "total_tokens": 48424320} +{"current_steps": 15390, "total_steps": 15621, "loss": 0.5055, "lr": 1.3436970309481655e-09, "epoch": 0.9852122143268677, "percentage": 98.52, "elapsed_time": "1:39:14", "remaining_time": "0:01:29", "throughput": 8135.68, "total_tokens": 48441984} +{"current_steps": 15395, "total_steps": 15621, "loss": 0.333, "lr": 1.2864154587742815e-09, "epoch": 0.9855322962678446, "percentage": 98.55, "elapsed_time": "1:39:14", "remaining_time": "0:01:27", "throughput": 8137.29, "total_tokens": 48456832} +{"current_steps": 15400, "total_steps": 15621, "loss": 0.3719, "lr": 1.2303807922370292e-09, "epoch": 0.9858523782088214, "percentage": 98.59, "elapsed_time": "1:39:15", "remaining_time": "0:01:25", "throughput": 8138.99, "total_tokens": 48472512} +{"current_steps": 15405, "total_steps": 15621, "loss": 0.3169, "lr": 1.1755931012961128e-09, "epoch": 0.9861724601497983, "percentage": 98.62, "elapsed_time": "1:39:16", "remaining_time": "0:01:23", "throughput": 8140.77, "total_tokens": 48488832} +{"current_steps": 15410, "total_steps": 15621, "loss": 0.3615, "lr": 1.122052454354705e-09, "epoch": 0.9864925420907752, "percentage": 98.65, "elapsed_time": "1:39:16", "remaining_time": "0:01:21", "throughput": 8142.4, "total_tokens": 48503936} +{"current_steps": 15415, "total_steps": 15621, "loss": 0.4383, "lr": 1.0697589182590005e-09, "epoch": 0.9868126240317522, "percentage": 98.68, "elapsed_time": "1:39:17", "remaining_time": "0:01:19", "throughput": 8144.05, "total_tokens": 48519040} +{"current_steps": 15420, "total_steps": 15621, "loss": 0.6, "lr": 1.018712558297996e-09, "epoch": 0.9871327059727291, "percentage": 98.71, "elapsed_time": "1:39:18", "remaining_time": "0:01:17", "throughput": 8145.8, "total_tokens": 48535040} +{"current_steps": 15425, "total_steps": 15621, "loss": 0.4438, "lr": 9.689134382037113e-10, "epoch": 0.9874527879137059, "percentage": 98.75, "elapsed_time": "1:39:18", "remaining_time": "0:01:15", "throughput": 8147.65, "total_tokens": 48551808} +{"current_steps": 15430, "total_steps": 15621, "loss": 0.3976, "lr": 9.203616201508557e-10, "epoch": 0.9877728698546828, "percentage": 98.78, "elapsed_time": "1:39:19", "remaining_time": "0:01:13", "throughput": 8149.25, "total_tokens": 48566592} +{"current_steps": 15435, "total_steps": 15621, "loss": 0.3103, "lr": 8.730571647570517e-10, "epoch": 0.9880929517956597, "percentage": 98.81, "elapsed_time": "1:39:20", "remaining_time": "0:01:11", "throughput": 8151.01, "total_tokens": 48582720} +{"current_steps": 15440, "total_steps": 15621, "loss": 0.4765, "lr": 8.270001310825003e-10, "epoch": 0.9884130337366366, "percentage": 98.84, "elapsed_time": "1:39:21", "remaining_time": "0:01:09", "throughput": 8152.81, "total_tokens": 48599104} +{"current_steps": 15445, "total_steps": 15621, "loss": 0.3114, "lr": 7.821905766297599e-10, "epoch": 0.9887331156776135, "percentage": 98.87, "elapsed_time": "1:39:21", "remaining_time": "0:01:07", "throughput": 8154.55, "total_tokens": 48615040} +{"current_steps": 15450, "total_steps": 15621, "loss": 0.3971, "lr": 7.386285573441897e-10, "epoch": 0.9890531976185903, "percentage": 98.91, "elapsed_time": "1:39:22", "remaining_time": "0:01:05", "throughput": 8156.29, "total_tokens": 48630976} +{"current_steps": 15455, "total_steps": 15621, "loss": 0.283, "lr": 6.963141276136175e-10, "epoch": 0.9893732795595672, "percentage": 98.94, "elapsed_time": "1:39:23", "remaining_time": "0:01:04", "throughput": 8157.94, "total_tokens": 48646080} +{"current_steps": 15460, "total_steps": 15621, "loss": 0.2476, "lr": 6.552473402678949e-10, "epoch": 0.9896933615005441, "percentage": 98.97, "elapsed_time": "1:39:23", "remaining_time": "0:01:02", "throughput": 8159.75, "total_tokens": 48662528} +{"current_steps": 15465, "total_steps": 15621, "loss": 0.3244, "lr": 6.154282465794524e-10, "epoch": 0.990013443441521, "percentage": 99.0, "elapsed_time": "1:39:24", "remaining_time": "0:01:00", "throughput": 8161.69, "total_tokens": 48680000} +{"current_steps": 15470, "total_steps": 15621, "loss": 0.4256, "lr": 5.768568962629672e-10, "epoch": 0.990333525382498, "percentage": 99.03, "elapsed_time": "1:39:25", "remaining_time": "0:00:58", "throughput": 8163.48, "total_tokens": 48696256} +{"current_steps": 15475, "total_steps": 15621, "loss": 0.3062, "lr": 5.395333374751398e-10, "epoch": 0.9906536073234748, "percentage": 99.07, "elapsed_time": "1:39:25", "remaining_time": "0:00:56", "throughput": 8165.1, "total_tokens": 48711168} +{"current_steps": 15480, "total_steps": 15621, "loss": 0.5477, "lr": 5.034576168149174e-10, "epoch": 0.9909736892644517, "percentage": 99.1, "elapsed_time": "1:39:26", "remaining_time": "0:00:54", "throughput": 8166.81, "total_tokens": 48726848} +{"current_steps": 15485, "total_steps": 15621, "loss": 0.4838, "lr": 4.686297793231597e-10, "epoch": 0.9912937712054286, "percentage": 99.13, "elapsed_time": "1:39:27", "remaining_time": "0:00:52", "throughput": 8168.61, "total_tokens": 48743232} +{"current_steps": 15490, "total_steps": 15621, "loss": 0.4541, "lr": 4.350498684829729e-10, "epoch": 0.9916138531464055, "percentage": 99.16, "elapsed_time": "1:39:27", "remaining_time": "0:00:50", "throughput": 8170.22, "total_tokens": 48758080} +{"current_steps": 15495, "total_steps": 15621, "loss": 0.3123, "lr": 4.0271792621926483e-10, "epoch": 0.9919339350873824, "percentage": 99.19, "elapsed_time": "1:39:28", "remaining_time": "0:00:48", "throughput": 8171.85, "total_tokens": 48773120} +{"current_steps": 15500, "total_steps": 15621, "loss": 0.3749, "lr": 3.716339928987455e-10, "epoch": 0.9922540170283592, "percentage": 99.23, "elapsed_time": "1:39:29", "remaining_time": "0:00:46", "throughput": 8173.58, "total_tokens": 48789056} +{"current_steps": 15505, "total_steps": 15621, "loss": 0.4189, "lr": 3.41798107330149e-10, "epoch": 0.9925740989693361, "percentage": 99.26, "elapsed_time": "1:39:29", "remaining_time": "0:00:44", "throughput": 8175.23, "total_tokens": 48804288} +{"current_steps": 15510, "total_steps": 15621, "loss": 0.3683, "lr": 3.1321030676390027e-10, "epoch": 0.992894180910313, "percentage": 99.29, "elapsed_time": "1:39:30", "remaining_time": "0:00:42", "throughput": 8176.8, "total_tokens": 48818816} +{"current_steps": 15515, "total_steps": 15621, "loss": 0.291, "lr": 2.8587062689222617e-10, "epoch": 0.9932142628512899, "percentage": 99.32, "elapsed_time": "1:39:31", "remaining_time": "0:00:40", "throughput": 8178.64, "total_tokens": 48835520} +{"current_steps": 15520, "total_steps": 15621, "loss": 0.3139, "lr": 2.5977910184904473e-10, "epoch": 0.9935343447922668, "percentage": 99.35, "elapsed_time": "1:39:31", "remaining_time": "0:00:38", "throughput": 8180.36, "total_tokens": 48851328} +{"current_steps": 15525, "total_steps": 15621, "loss": 0.3466, "lr": 2.3493576420985373e-10, "epoch": 0.9938544267332438, "percentage": 99.39, "elapsed_time": "1:39:32", "remaining_time": "0:00:36", "throughput": 8181.97, "total_tokens": 48866304} +{"current_steps": 15530, "total_steps": 15621, "loss": 0.3311, "lr": 2.11340644991842e-10, "epoch": 0.9941745086742206, "percentage": 99.42, "elapsed_time": "1:39:33", "remaining_time": "0:00:35", "throughput": 8183.78, "total_tokens": 48882752} +{"current_steps": 15535, "total_steps": 15621, "loss": 0.3046, "lr": 1.8899377365388936e-10, "epoch": 0.9944945906151975, "percentage": 99.45, "elapsed_time": "1:39:33", "remaining_time": "0:00:33", "throughput": 8185.46, "total_tokens": 48898304} +{"current_steps": 15540, "total_steps": 15621, "loss": 0.4284, "lr": 1.6789517809634447e-10, "epoch": 0.9948146725561744, "percentage": 99.48, "elapsed_time": "1:39:34", "remaining_time": "0:00:31", "throughput": 8187.17, "total_tokens": 48914048} +{"current_steps": 15545, "total_steps": 15621, "loss": 0.3291, "lr": 1.480448846609139e-10, "epoch": 0.9951347544971513, "percentage": 99.51, "elapsed_time": "1:39:35", "remaining_time": "0:00:29", "throughput": 8188.93, "total_tokens": 48930176} +{"current_steps": 15550, "total_steps": 15621, "loss": 0.3522, "lr": 1.294429181311063e-10, "epoch": 0.9954548364381282, "percentage": 99.55, "elapsed_time": "1:39:35", "remaining_time": "0:00:27", "throughput": 8190.63, "total_tokens": 48945920} +{"current_steps": 15555, "total_steps": 15621, "loss": 0.4063, "lr": 1.1208930173145503e-10, "epoch": 0.995774918379105, "percentage": 99.58, "elapsed_time": "1:39:36", "remaining_time": "0:00:25", "throughput": 8192.24, "total_tokens": 48960832} +{"current_steps": 15560, "total_steps": 15621, "loss": 0.3278, "lr": 9.598405712840651e-11, "epoch": 0.9960950003200819, "percentage": 99.61, "elapsed_time": "1:39:37", "remaining_time": "0:00:23", "throughput": 8194.03, "total_tokens": 48977280} +{"current_steps": 15565, "total_steps": 15621, "loss": 0.3526, "lr": 8.1127204429432e-11, "epoch": 0.9964150822610588, "percentage": 99.64, "elapsed_time": "1:39:37", "remaining_time": "0:00:21", "throughput": 8195.66, "total_tokens": 48992512} +{"current_steps": 15570, "total_steps": 15621, "loss": 0.351, "lr": 6.751876218336061e-11, "epoch": 0.9967351642020357, "percentage": 99.67, "elapsed_time": "1:39:38", "remaining_time": "0:00:19", "throughput": 8197.36, "total_tokens": 49008128} +{"current_steps": 15575, "total_steps": 15621, "loss": 0.3451, "lr": 5.515874738071247e-11, "epoch": 0.9970552461430127, "percentage": 99.71, "elapsed_time": "1:39:39", "remaining_time": "0:00:17", "throughput": 8199.14, "total_tokens": 49024512} +{"current_steps": 15580, "total_steps": 15621, "loss": 0.3131, "lr": 4.404717545303249e-11, "epoch": 0.9973753280839895, "percentage": 99.74, "elapsed_time": "1:39:39", "remaining_time": "0:00:15", "throughput": 8200.83, "total_tokens": 49040128} +{"current_steps": 15585, "total_steps": 15621, "loss": 0.3111, "lr": 3.418406027322352e-11, "epoch": 0.9976954100249664, "percentage": 99.77, "elapsed_time": "1:39:40", "remaining_time": "0:00:13", "throughput": 8202.48, "total_tokens": 49055360} +{"current_steps": 15590, "total_steps": 15621, "loss": 0.3576, "lr": 2.5569414155546254e-11, "epoch": 0.9980154919659433, "percentage": 99.8, "elapsed_time": "1:39:41", "remaining_time": "0:00:11", "throughput": 8204.21, "total_tokens": 49071360} +{"current_steps": 15595, "total_steps": 15621, "loss": 0.2698, "lr": 1.8203247855397287e-11, "epoch": 0.9983355739069202, "percentage": 99.83, "elapsed_time": "1:39:41", "remaining_time": "0:00:09", "throughput": 8205.8, "total_tokens": 49086144} +{"current_steps": 15600, "total_steps": 15621, "loss": 0.3915, "lr": 1.2085570569642101e-11, "epoch": 0.9986556558478971, "percentage": 99.87, "elapsed_time": "1:39:42", "remaining_time": "0:00:08", "throughput": 8207.43, "total_tokens": 49101312} +{"current_steps": 15605, "total_steps": 15621, "loss": 0.3151, "lr": 7.216389936171019e-12, "epoch": 0.9989757377888739, "percentage": 99.9, "elapsed_time": "1:39:43", "remaining_time": "0:00:06", "throughput": 8209.08, "total_tokens": 49116672} +{"current_steps": 15610, "total_steps": 15621, "loss": 0.1751, "lr": 3.5957120342322567e-12, "epoch": 0.9992958197298508, "percentage": 99.93, "elapsed_time": "1:39:43", "remaining_time": "0:00:04", "throughput": 8210.78, "total_tokens": 49132288} +{"current_steps": 15615, "total_steps": 15621, "loss": 0.3884, "lr": 1.2235413842098807e-12, "epoch": 0.9996159016708277, "percentage": 99.96, "elapsed_time": "1:39:44", "remaining_time": "0:00:02", "throughput": 8212.48, "total_tokens": 49148096} +{"current_steps": 15620, "total_steps": 15621, "loss": 0.2485, "lr": 9.98809480678986e-14, "epoch": 0.9999359836118046, "percentage": 99.99, "elapsed_time": "1:39:45", "remaining_time": "0:00:00", "throughput": 8214.19, "total_tokens": 49163840} +{"current_steps": 15621, "total_steps": 15621, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1:40:29", "remaining_time": "0:00:00", "throughput": 8154.18, "total_tokens": 49166912} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..cea2cd1 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,25207 @@ +{ + "best_global_step": 14858, + "best_metric": 0.35565948486328125, + "best_model_checkpoint": "saves_bts_preliminary/base/llama-3.2-1b-instruct/train_record_42_1779354541/checkpoint-14858", + "epoch": 1.0, + "eval_steps": 782, + "global_step": 15621, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003200819409768901, + "grad_norm": 664.248046875, + "learning_rate": 5.118362124120281e-09, + "loss": 2.1538, + "num_input_tokens_seen": 15360, + "step": 5 + }, + { + "epoch": 0.0006401638819537802, + "grad_norm": 454.5251770019531, + "learning_rate": 1.1516314779270634e-08, + "loss": 2.3529, + "num_input_tokens_seen": 31104, + "step": 10 + }, + { + "epoch": 0.0009602458229306702, + "grad_norm": 552.8055419921875, + "learning_rate": 1.7914267434420987e-08, + "loss": 2.1426, + "num_input_tokens_seen": 46208, + "step": 15 + }, + { + "epoch": 0.0012803277639075604, + "grad_norm": 382.2819519042969, + "learning_rate": 2.431222008957134e-08, + "loss": 2.7631, + "num_input_tokens_seen": 62464, + "step": 20 + }, + { + "epoch": 0.0016004097048844504, + "grad_norm": 433.5123291015625, + "learning_rate": 3.071017274472169e-08, + "loss": 2.1363, + "num_input_tokens_seen": 79104, + "step": 25 + }, + { + "epoch": 0.0019204916458613404, + "grad_norm": 372.2735900878906, + "learning_rate": 3.710812539987204e-08, + "loss": 2.2293, + "num_input_tokens_seen": 94912, + "step": 30 + }, + { + "epoch": 0.0022405735868382304, + "grad_norm": 534.3236083984375, + "learning_rate": 4.350607805502239e-08, + "loss": 2.3549, + "num_input_tokens_seen": 110784, + "step": 35 + }, + { + "epoch": 0.002560655527815121, + "grad_norm": 314.8287353515625, + "learning_rate": 4.990403071017274e-08, + "loss": 2.1602, + "num_input_tokens_seen": 125696, + "step": 40 + }, + { + "epoch": 0.002880737468792011, + "grad_norm": 401.165283203125, + "learning_rate": 5.6301983365323095e-08, + "loss": 2.1132, + "num_input_tokens_seen": 140672, + "step": 45 + }, + { + "epoch": 0.003200819409768901, + "grad_norm": 396.3443298339844, + "learning_rate": 6.269993602047345e-08, + "loss": 2.0446, + "num_input_tokens_seen": 155456, + "step": 50 + }, + { + "epoch": 0.003520901350745791, + "grad_norm": 373.389892578125, + "learning_rate": 6.90978886756238e-08, + "loss": 1.9213, + "num_input_tokens_seen": 170816, + "step": 55 + }, + { + "epoch": 0.003840983291722681, + "grad_norm": 337.19696044921875, + "learning_rate": 7.549584133077414e-08, + "loss": 2.0365, + "num_input_tokens_seen": 185088, + "step": 60 + }, + { + "epoch": 0.004161065232699571, + "grad_norm": 370.04510498046875, + "learning_rate": 8.18937939859245e-08, + "loss": 1.6514, + "num_input_tokens_seen": 200384, + "step": 65 + }, + { + "epoch": 0.004481147173676461, + "grad_norm": 271.975830078125, + "learning_rate": 8.829174664107485e-08, + "loss": 1.6765, + "num_input_tokens_seen": 215744, + "step": 70 + }, + { + "epoch": 0.004801229114653352, + "grad_norm": 173.92369079589844, + "learning_rate": 9.468969929622521e-08, + "loss": 1.6609, + "num_input_tokens_seen": 230400, + "step": 75 + }, + { + "epoch": 0.005121311055630242, + "grad_norm": 279.897705078125, + "learning_rate": 1.0108765195137556e-07, + "loss": 1.326, + "num_input_tokens_seen": 246592, + "step": 80 + }, + { + "epoch": 0.005441392996607132, + "grad_norm": 108.02691650390625, + "learning_rate": 1.074856046065259e-07, + "loss": 1.1489, + "num_input_tokens_seen": 262272, + "step": 85 + }, + { + "epoch": 0.005761474937584022, + "grad_norm": 122.35417175292969, + "learning_rate": 1.1388355726167625e-07, + "loss": 1.0445, + "num_input_tokens_seen": 277760, + "step": 90 + }, + { + "epoch": 0.006081556878560912, + "grad_norm": 163.84530639648438, + "learning_rate": 1.202815099168266e-07, + "loss": 1.2459, + "num_input_tokens_seen": 292992, + "step": 95 + }, + { + "epoch": 0.006401638819537802, + "grad_norm": 157.150146484375, + "learning_rate": 1.2667946257197694e-07, + "loss": 1.1182, + "num_input_tokens_seen": 307840, + "step": 100 + }, + { + "epoch": 0.006721720760514692, + "grad_norm": 111.58399200439453, + "learning_rate": 1.3307741522712732e-07, + "loss": 1.0297, + "num_input_tokens_seen": 323008, + "step": 105 + }, + { + "epoch": 0.007041802701491582, + "grad_norm": 91.58705139160156, + "learning_rate": 1.3947536788227767e-07, + "loss": 1.051, + "num_input_tokens_seen": 339456, + "step": 110 + }, + { + "epoch": 0.007361884642468472, + "grad_norm": 85.68628692626953, + "learning_rate": 1.45873320537428e-07, + "loss": 1.1295, + "num_input_tokens_seen": 354816, + "step": 115 + }, + { + "epoch": 0.007681966583445362, + "grad_norm": 78.04808044433594, + "learning_rate": 1.5227127319257838e-07, + "loss": 0.8247, + "num_input_tokens_seen": 369472, + "step": 120 + }, + { + "epoch": 0.008002048524422252, + "grad_norm": 71.98005676269531, + "learning_rate": 1.586692258477287e-07, + "loss": 0.9577, + "num_input_tokens_seen": 384768, + "step": 125 + }, + { + "epoch": 0.008322130465399142, + "grad_norm": 98.72233581542969, + "learning_rate": 1.6506717850287908e-07, + "loss": 1.0328, + "num_input_tokens_seen": 400192, + "step": 130 + }, + { + "epoch": 0.008642212406376032, + "grad_norm": 121.78581237792969, + "learning_rate": 1.7146513115802943e-07, + "loss": 0.8953, + "num_input_tokens_seen": 416640, + "step": 135 + }, + { + "epoch": 0.008962294347352922, + "grad_norm": 66.73460388183594, + "learning_rate": 1.7786308381317976e-07, + "loss": 0.8265, + "num_input_tokens_seen": 432640, + "step": 140 + }, + { + "epoch": 0.009282376288329812, + "grad_norm": 78.51518249511719, + "learning_rate": 1.8426103646833014e-07, + "loss": 0.8983, + "num_input_tokens_seen": 448640, + "step": 145 + }, + { + "epoch": 0.009602458229306703, + "grad_norm": 89.75164794921875, + "learning_rate": 1.9065898912348046e-07, + "loss": 0.9503, + "num_input_tokens_seen": 464448, + "step": 150 + }, + { + "epoch": 0.009922540170283593, + "grad_norm": 85.8103256225586, + "learning_rate": 1.9705694177863084e-07, + "loss": 0.858, + "num_input_tokens_seen": 479488, + "step": 155 + }, + { + "epoch": 0.010242622111260483, + "grad_norm": 56.45402526855469, + "learning_rate": 2.034548944337812e-07, + "loss": 0.7304, + "num_input_tokens_seen": 495296, + "step": 160 + }, + { + "epoch": 0.010562704052237373, + "grad_norm": 89.73834991455078, + "learning_rate": 2.0985284708893152e-07, + "loss": 0.7848, + "num_input_tokens_seen": 510144, + "step": 165 + }, + { + "epoch": 0.010882785993214263, + "grad_norm": 81.91443634033203, + "learning_rate": 2.162507997440819e-07, + "loss": 0.8469, + "num_input_tokens_seen": 524928, + "step": 170 + }, + { + "epoch": 0.011202867934191153, + "grad_norm": 47.02535629272461, + "learning_rate": 2.2264875239923222e-07, + "loss": 0.7184, + "num_input_tokens_seen": 541504, + "step": 175 + }, + { + "epoch": 0.011522949875168043, + "grad_norm": 83.94172668457031, + "learning_rate": 2.290467050543826e-07, + "loss": 0.7354, + "num_input_tokens_seen": 556096, + "step": 180 + }, + { + "epoch": 0.011843031816144933, + "grad_norm": 78.48973846435547, + "learning_rate": 2.3544465770953295e-07, + "loss": 0.7351, + "num_input_tokens_seen": 572736, + "step": 185 + }, + { + "epoch": 0.012163113757121823, + "grad_norm": 74.28348541259766, + "learning_rate": 2.418426103646833e-07, + "loss": 0.853, + "num_input_tokens_seen": 588352, + "step": 190 + }, + { + "epoch": 0.012483195698098713, + "grad_norm": 56.58919906616211, + "learning_rate": 2.4824056301983363e-07, + "loss": 0.9742, + "num_input_tokens_seen": 603520, + "step": 195 + }, + { + "epoch": 0.012803277639075603, + "grad_norm": 68.8056640625, + "learning_rate": 2.54638515674984e-07, + "loss": 0.8167, + "num_input_tokens_seen": 619392, + "step": 200 + }, + { + "epoch": 0.013123359580052493, + "grad_norm": 60.73455047607422, + "learning_rate": 2.6103646833013433e-07, + "loss": 0.7996, + "num_input_tokens_seen": 635456, + "step": 205 + }, + { + "epoch": 0.013443441521029383, + "grad_norm": 123.12853240966797, + "learning_rate": 2.6743442098528466e-07, + "loss": 0.8732, + "num_input_tokens_seen": 650880, + "step": 210 + }, + { + "epoch": 0.013763523462006273, + "grad_norm": 52.10979461669922, + "learning_rate": 2.7383237364043504e-07, + "loss": 0.8244, + "num_input_tokens_seen": 666688, + "step": 215 + }, + { + "epoch": 0.014083605402983163, + "grad_norm": 70.8306884765625, + "learning_rate": 2.802303262955854e-07, + "loss": 0.7909, + "num_input_tokens_seen": 682112, + "step": 220 + }, + { + "epoch": 0.014403687343960053, + "grad_norm": 66.97418975830078, + "learning_rate": 2.866282789507358e-07, + "loss": 0.8318, + "num_input_tokens_seen": 697728, + "step": 225 + }, + { + "epoch": 0.014723769284936943, + "grad_norm": 54.565391540527344, + "learning_rate": 2.9302623160588607e-07, + "loss": 0.6828, + "num_input_tokens_seen": 712704, + "step": 230 + }, + { + "epoch": 0.015043851225913833, + "grad_norm": 93.17967987060547, + "learning_rate": 2.9942418426103644e-07, + "loss": 0.9689, + "num_input_tokens_seen": 729408, + "step": 235 + }, + { + "epoch": 0.015363933166890723, + "grad_norm": 90.8511962890625, + "learning_rate": 3.058221369161868e-07, + "loss": 0.7854, + "num_input_tokens_seen": 745344, + "step": 240 + }, + { + "epoch": 0.015684015107867613, + "grad_norm": 57.57070541381836, + "learning_rate": 3.1222008957133715e-07, + "loss": 0.7013, + "num_input_tokens_seen": 762688, + "step": 245 + }, + { + "epoch": 0.016004097048844503, + "grad_norm": 64.6364974975586, + "learning_rate": 3.186180422264875e-07, + "loss": 0.7128, + "num_input_tokens_seen": 779392, + "step": 250 + }, + { + "epoch": 0.016324178989821393, + "grad_norm": 82.40945434570312, + "learning_rate": 3.2501599488163785e-07, + "loss": 0.7911, + "num_input_tokens_seen": 794112, + "step": 255 + }, + { + "epoch": 0.016644260930798283, + "grad_norm": 51.280452728271484, + "learning_rate": 3.314139475367882e-07, + "loss": 0.8429, + "num_input_tokens_seen": 810112, + "step": 260 + }, + { + "epoch": 0.016964342871775173, + "grad_norm": 80.96479797363281, + "learning_rate": 3.3781190019193855e-07, + "loss": 0.8602, + "num_input_tokens_seen": 825472, + "step": 265 + }, + { + "epoch": 0.017284424812752063, + "grad_norm": 70.75425720214844, + "learning_rate": 3.4420985284708893e-07, + "loss": 0.9818, + "num_input_tokens_seen": 840128, + "step": 270 + }, + { + "epoch": 0.017604506753728953, + "grad_norm": 72.48260498046875, + "learning_rate": 3.5060780550223926e-07, + "loss": 0.7846, + "num_input_tokens_seen": 855104, + "step": 275 + }, + { + "epoch": 0.017924588694705843, + "grad_norm": 69.50082397460938, + "learning_rate": 3.570057581573896e-07, + "loss": 0.8039, + "num_input_tokens_seen": 870848, + "step": 280 + }, + { + "epoch": 0.018244670635682733, + "grad_norm": 47.575138092041016, + "learning_rate": 3.6340371081253996e-07, + "loss": 0.7489, + "num_input_tokens_seen": 885760, + "step": 285 + }, + { + "epoch": 0.018564752576659623, + "grad_norm": 45.56133270263672, + "learning_rate": 3.6980166346769034e-07, + "loss": 0.705, + "num_input_tokens_seen": 900928, + "step": 290 + }, + { + "epoch": 0.018884834517636517, + "grad_norm": 55.36705780029297, + "learning_rate": 3.7619961612284067e-07, + "loss": 0.7869, + "num_input_tokens_seen": 915968, + "step": 295 + }, + { + "epoch": 0.019204916458613407, + "grad_norm": 110.47761535644531, + "learning_rate": 3.8259756877799104e-07, + "loss": 0.9906, + "num_input_tokens_seen": 933056, + "step": 300 + }, + { + "epoch": 0.019524998399590297, + "grad_norm": 85.01045227050781, + "learning_rate": 3.889955214331414e-07, + "loss": 0.737, + "num_input_tokens_seen": 948416, + "step": 305 + }, + { + "epoch": 0.019845080340567187, + "grad_norm": 68.13928985595703, + "learning_rate": 3.953934740882917e-07, + "loss": 0.7708, + "num_input_tokens_seen": 962880, + "step": 310 + }, + { + "epoch": 0.020165162281544077, + "grad_norm": 60.105281829833984, + "learning_rate": 4.0179142674344207e-07, + "loss": 0.8126, + "num_input_tokens_seen": 979904, + "step": 315 + }, + { + "epoch": 0.020485244222520967, + "grad_norm": 68.0919189453125, + "learning_rate": 4.0818937939859245e-07, + "loss": 0.8299, + "num_input_tokens_seen": 995136, + "step": 320 + }, + { + "epoch": 0.020805326163497857, + "grad_norm": 61.429813385009766, + "learning_rate": 4.145873320537428e-07, + "loss": 0.771, + "num_input_tokens_seen": 1011008, + "step": 325 + }, + { + "epoch": 0.021125408104474747, + "grad_norm": 60.88750076293945, + "learning_rate": 4.2098528470889315e-07, + "loss": 0.831, + "num_input_tokens_seen": 1025792, + "step": 330 + }, + { + "epoch": 0.021445490045451637, + "grad_norm": 56.13808059692383, + "learning_rate": 4.273832373640435e-07, + "loss": 0.6929, + "num_input_tokens_seen": 1042944, + "step": 335 + }, + { + "epoch": 0.021765571986428527, + "grad_norm": 71.17967987060547, + "learning_rate": 4.3378119001919386e-07, + "loss": 0.8271, + "num_input_tokens_seen": 1058688, + "step": 340 + }, + { + "epoch": 0.022085653927405417, + "grad_norm": 53.61168670654297, + "learning_rate": 4.401791426743442e-07, + "loss": 0.7202, + "num_input_tokens_seen": 1074560, + "step": 345 + }, + { + "epoch": 0.022405735868382307, + "grad_norm": 76.7827377319336, + "learning_rate": 4.4657709532949456e-07, + "loss": 0.6947, + "num_input_tokens_seen": 1089728, + "step": 350 + }, + { + "epoch": 0.022725817809359197, + "grad_norm": 94.65788269042969, + "learning_rate": 4.5297504798464494e-07, + "loss": 0.9107, + "num_input_tokens_seen": 1105024, + "step": 355 + }, + { + "epoch": 0.023045899750336087, + "grad_norm": 52.23056411743164, + "learning_rate": 4.593730006397952e-07, + "loss": 0.7831, + "num_input_tokens_seen": 1121088, + "step": 360 + }, + { + "epoch": 0.023365981691312977, + "grad_norm": 52.77978515625, + "learning_rate": 4.657709532949456e-07, + "loss": 0.7062, + "num_input_tokens_seen": 1136896, + "step": 365 + }, + { + "epoch": 0.023686063632289867, + "grad_norm": 61.802242279052734, + "learning_rate": 4.7216890595009597e-07, + "loss": 0.7133, + "num_input_tokens_seen": 1153280, + "step": 370 + }, + { + "epoch": 0.024006145573266757, + "grad_norm": 56.2958869934082, + "learning_rate": 4.785668586052463e-07, + "loss": 0.8307, + "num_input_tokens_seen": 1169536, + "step": 375 + }, + { + "epoch": 0.024326227514243647, + "grad_norm": 89.10625457763672, + "learning_rate": 4.849648112603967e-07, + "loss": 0.7573, + "num_input_tokens_seen": 1185088, + "step": 380 + }, + { + "epoch": 0.024646309455220537, + "grad_norm": 36.04088592529297, + "learning_rate": 4.91362763915547e-07, + "loss": 0.6599, + "num_input_tokens_seen": 1200832, + "step": 385 + }, + { + "epoch": 0.024966391396197427, + "grad_norm": 51.54562759399414, + "learning_rate": 4.977607165706974e-07, + "loss": 0.6614, + "num_input_tokens_seen": 1216320, + "step": 390 + }, + { + "epoch": 0.025286473337174317, + "grad_norm": 63.85747528076172, + "learning_rate": 5.041586692258478e-07, + "loss": 0.7446, + "num_input_tokens_seen": 1232832, + "step": 395 + }, + { + "epoch": 0.025606555278151207, + "grad_norm": 51.690006256103516, + "learning_rate": 5.10556621880998e-07, + "loss": 0.7023, + "num_input_tokens_seen": 1248384, + "step": 400 + }, + { + "epoch": 0.025926637219128097, + "grad_norm": 37.42890167236328, + "learning_rate": 5.169545745361484e-07, + "loss": 0.672, + "num_input_tokens_seen": 1263936, + "step": 405 + }, + { + "epoch": 0.026246719160104987, + "grad_norm": 67.20600891113281, + "learning_rate": 5.233525271912988e-07, + "loss": 1.1224, + "num_input_tokens_seen": 1294208, + "step": 410 + }, + { + "epoch": 0.026566801101081877, + "grad_norm": 64.76204681396484, + "learning_rate": 5.297504798464492e-07, + "loss": 0.7874, + "num_input_tokens_seen": 1309120, + "step": 415 + }, + { + "epoch": 0.026886883042058767, + "grad_norm": 58.28200912475586, + "learning_rate": 5.361484325015994e-07, + "loss": 0.8574, + "num_input_tokens_seen": 1324224, + "step": 420 + }, + { + "epoch": 0.027206964983035656, + "grad_norm": 76.2400131225586, + "learning_rate": 5.425463851567498e-07, + "loss": 0.683, + "num_input_tokens_seen": 1341056, + "step": 425 + }, + { + "epoch": 0.027527046924012546, + "grad_norm": 53.95072555541992, + "learning_rate": 5.489443378119002e-07, + "loss": 0.7444, + "num_input_tokens_seen": 1356544, + "step": 430 + }, + { + "epoch": 0.027847128864989436, + "grad_norm": 65.18901824951172, + "learning_rate": 5.553422904670505e-07, + "loss": 0.6717, + "num_input_tokens_seen": 1371840, + "step": 435 + }, + { + "epoch": 0.028167210805966326, + "grad_norm": 56.888824462890625, + "learning_rate": 5.61740243122201e-07, + "loss": 0.6805, + "num_input_tokens_seen": 1386816, + "step": 440 + }, + { + "epoch": 0.028487292746943216, + "grad_norm": 65.56224822998047, + "learning_rate": 5.681381957773512e-07, + "loss": 0.744, + "num_input_tokens_seen": 1401792, + "step": 445 + }, + { + "epoch": 0.028807374687920106, + "grad_norm": 67.67861938476562, + "learning_rate": 5.745361484325015e-07, + "loss": 0.6219, + "num_input_tokens_seen": 1416896, + "step": 450 + }, + { + "epoch": 0.029127456628896996, + "grad_norm": 84.42303466796875, + "learning_rate": 5.80934101087652e-07, + "loss": 0.7479, + "num_input_tokens_seen": 1432704, + "step": 455 + }, + { + "epoch": 0.029447538569873886, + "grad_norm": 87.79871368408203, + "learning_rate": 5.873320537428022e-07, + "loss": 0.7009, + "num_input_tokens_seen": 1448384, + "step": 460 + }, + { + "epoch": 0.029767620510850776, + "grad_norm": 60.47886276245117, + "learning_rate": 5.937300063979526e-07, + "loss": 0.7189, + "num_input_tokens_seen": 1464832, + "step": 465 + }, + { + "epoch": 0.030087702451827666, + "grad_norm": 71.14859008789062, + "learning_rate": 6.00127959053103e-07, + "loss": 0.678, + "num_input_tokens_seen": 1479424, + "step": 470 + }, + { + "epoch": 0.030407784392804556, + "grad_norm": 41.08133316040039, + "learning_rate": 6.065259117082533e-07, + "loss": 0.7233, + "num_input_tokens_seen": 1494336, + "step": 475 + }, + { + "epoch": 0.030727866333781446, + "grad_norm": 54.67021179199219, + "learning_rate": 6.129238643634037e-07, + "loss": 0.6771, + "num_input_tokens_seen": 1509184, + "step": 480 + }, + { + "epoch": 0.031047948274758336, + "grad_norm": 37.22821807861328, + "learning_rate": 6.19321817018554e-07, + "loss": 0.8088, + "num_input_tokens_seen": 1525504, + "step": 485 + }, + { + "epoch": 0.031368030215735226, + "grad_norm": 41.66913604736328, + "learning_rate": 6.257197696737044e-07, + "loss": 0.5954, + "num_input_tokens_seen": 1541504, + "step": 490 + }, + { + "epoch": 0.03168811215671212, + "grad_norm": 39.45866012573242, + "learning_rate": 6.321177223288548e-07, + "loss": 0.6166, + "num_input_tokens_seen": 1557184, + "step": 495 + }, + { + "epoch": 0.032008194097689006, + "grad_norm": 60.6429443359375, + "learning_rate": 6.385156749840051e-07, + "loss": 0.7699, + "num_input_tokens_seen": 1573440, + "step": 500 + }, + { + "epoch": 0.0323282760386659, + "grad_norm": 35.02703857421875, + "learning_rate": 6.449136276391554e-07, + "loss": 0.8718, + "num_input_tokens_seen": 1588736, + "step": 505 + }, + { + "epoch": 0.032648357979642786, + "grad_norm": 43.51701354980469, + "learning_rate": 6.513115802943058e-07, + "loss": 0.6977, + "num_input_tokens_seen": 1604352, + "step": 510 + }, + { + "epoch": 0.03296843992061968, + "grad_norm": 41.14889907836914, + "learning_rate": 6.577095329494562e-07, + "loss": 0.6582, + "num_input_tokens_seen": 1618816, + "step": 515 + }, + { + "epoch": 0.033288521861596566, + "grad_norm": 70.44395446777344, + "learning_rate": 6.641074856046065e-07, + "loss": 0.7409, + "num_input_tokens_seen": 1635648, + "step": 520 + }, + { + "epoch": 0.03360860380257346, + "grad_norm": 53.1386833190918, + "learning_rate": 6.705054382597568e-07, + "loss": 0.7248, + "num_input_tokens_seen": 1651328, + "step": 525 + }, + { + "epoch": 0.033928685743550346, + "grad_norm": 42.05000305175781, + "learning_rate": 6.769033909149072e-07, + "loss": 0.7271, + "num_input_tokens_seen": 1668928, + "step": 530 + }, + { + "epoch": 0.03424876768452724, + "grad_norm": 51.52647018432617, + "learning_rate": 6.833013435700575e-07, + "loss": 0.6188, + "num_input_tokens_seen": 1685504, + "step": 535 + }, + { + "epoch": 0.034568849625504126, + "grad_norm": 57.56531524658203, + "learning_rate": 6.89699296225208e-07, + "loss": 0.7016, + "num_input_tokens_seen": 1701952, + "step": 540 + }, + { + "epoch": 0.03488893156648102, + "grad_norm": 58.09773635864258, + "learning_rate": 6.960972488803583e-07, + "loss": 0.7293, + "num_input_tokens_seen": 1716992, + "step": 545 + }, + { + "epoch": 0.035209013507457906, + "grad_norm": 27.947566986083984, + "learning_rate": 7.024952015355085e-07, + "loss": 0.583, + "num_input_tokens_seen": 1732160, + "step": 550 + }, + { + "epoch": 0.0355290954484348, + "grad_norm": 66.85079193115234, + "learning_rate": 7.08893154190659e-07, + "loss": 0.656, + "num_input_tokens_seen": 1748416, + "step": 555 + }, + { + "epoch": 0.035849177389411686, + "grad_norm": 48.763916015625, + "learning_rate": 7.152911068458093e-07, + "loss": 0.7074, + "num_input_tokens_seen": 1763776, + "step": 560 + }, + { + "epoch": 0.03616925933038858, + "grad_norm": 55.289859771728516, + "learning_rate": 7.216890595009597e-07, + "loss": 0.7046, + "num_input_tokens_seen": 1780160, + "step": 565 + }, + { + "epoch": 0.036489341271365466, + "grad_norm": 51.82642364501953, + "learning_rate": 7.2808701215611e-07, + "loss": 0.6024, + "num_input_tokens_seen": 1795968, + "step": 570 + }, + { + "epoch": 0.03680942321234236, + "grad_norm": 46.479549407958984, + "learning_rate": 7.344849648112603e-07, + "loss": 0.6319, + "num_input_tokens_seen": 1815424, + "step": 575 + }, + { + "epoch": 0.037129505153319246, + "grad_norm": 86.72647857666016, + "learning_rate": 7.408829174664107e-07, + "loss": 0.8256, + "num_input_tokens_seen": 1831936, + "step": 580 + }, + { + "epoch": 0.03744958709429614, + "grad_norm": 34.57395935058594, + "learning_rate": 7.472808701215611e-07, + "loss": 0.6147, + "num_input_tokens_seen": 1847424, + "step": 585 + }, + { + "epoch": 0.03776966903527303, + "grad_norm": 47.81095886230469, + "learning_rate": 7.536788227767114e-07, + "loss": 0.7338, + "num_input_tokens_seen": 1862400, + "step": 590 + }, + { + "epoch": 0.03808975097624992, + "grad_norm": 85.52812194824219, + "learning_rate": 7.600767754318617e-07, + "loss": 0.7698, + "num_input_tokens_seen": 1876928, + "step": 595 + }, + { + "epoch": 0.03840983291722681, + "grad_norm": 54.25386047363281, + "learning_rate": 7.664747280870121e-07, + "loss": 0.6403, + "num_input_tokens_seen": 1892608, + "step": 600 + }, + { + "epoch": 0.0387299148582037, + "grad_norm": 37.492774963378906, + "learning_rate": 7.728726807421625e-07, + "loss": 0.7287, + "num_input_tokens_seen": 1909696, + "step": 605 + }, + { + "epoch": 0.03904999679918059, + "grad_norm": 40.18218231201172, + "learning_rate": 7.792706333973129e-07, + "loss": 0.7582, + "num_input_tokens_seen": 1924864, + "step": 610 + }, + { + "epoch": 0.03937007874015748, + "grad_norm": 36.508460998535156, + "learning_rate": 7.856685860524632e-07, + "loss": 0.5567, + "num_input_tokens_seen": 1939968, + "step": 615 + }, + { + "epoch": 0.03969016068113437, + "grad_norm": 70.67202758789062, + "learning_rate": 7.920665387076135e-07, + "loss": 0.711, + "num_input_tokens_seen": 1955136, + "step": 620 + }, + { + "epoch": 0.04001024262211126, + "grad_norm": 44.07026290893555, + "learning_rate": 7.984644913627639e-07, + "loss": 0.7024, + "num_input_tokens_seen": 1970880, + "step": 625 + }, + { + "epoch": 0.04033032456308815, + "grad_norm": 62.611148834228516, + "learning_rate": 8.048624440179143e-07, + "loss": 0.583, + "num_input_tokens_seen": 1986752, + "step": 630 + }, + { + "epoch": 0.04065040650406504, + "grad_norm": 41.27976608276367, + "learning_rate": 8.112603966730645e-07, + "loss": 0.5988, + "num_input_tokens_seen": 2001856, + "step": 635 + }, + { + "epoch": 0.04097048844504193, + "grad_norm": 51.214908599853516, + "learning_rate": 8.17658349328215e-07, + "loss": 0.6581, + "num_input_tokens_seen": 2019968, + "step": 640 + }, + { + "epoch": 0.04129057038601882, + "grad_norm": 60.009483337402344, + "learning_rate": 8.240563019833653e-07, + "loss": 0.7118, + "num_input_tokens_seen": 2035328, + "step": 645 + }, + { + "epoch": 0.04161065232699571, + "grad_norm": 44.75967788696289, + "learning_rate": 8.304542546385156e-07, + "loss": 0.6236, + "num_input_tokens_seen": 2055168, + "step": 650 + }, + { + "epoch": 0.0419307342679726, + "grad_norm": 44.7182731628418, + "learning_rate": 8.36852207293666e-07, + "loss": 0.7478, + "num_input_tokens_seen": 2071808, + "step": 655 + }, + { + "epoch": 0.04225081620894949, + "grad_norm": 45.788204193115234, + "learning_rate": 8.432501599488163e-07, + "loss": 0.6519, + "num_input_tokens_seen": 2087424, + "step": 660 + }, + { + "epoch": 0.04257089814992638, + "grad_norm": 36.59204864501953, + "learning_rate": 8.496481126039667e-07, + "loss": 0.7696, + "num_input_tokens_seen": 2102592, + "step": 665 + }, + { + "epoch": 0.04289098009090327, + "grad_norm": 45.53342056274414, + "learning_rate": 8.560460652591171e-07, + "loss": 0.6487, + "num_input_tokens_seen": 2119488, + "step": 670 + }, + { + "epoch": 0.04321106203188016, + "grad_norm": 38.34718704223633, + "learning_rate": 8.624440179142674e-07, + "loss": 0.6395, + "num_input_tokens_seen": 2136000, + "step": 675 + }, + { + "epoch": 0.04353114397285705, + "grad_norm": 49.36905288696289, + "learning_rate": 8.688419705694177e-07, + "loss": 0.7432, + "num_input_tokens_seen": 2152448, + "step": 680 + }, + { + "epoch": 0.04385122591383394, + "grad_norm": 45.985836029052734, + "learning_rate": 8.752399232245681e-07, + "loss": 0.6236, + "num_input_tokens_seen": 2168000, + "step": 685 + }, + { + "epoch": 0.04417130785481083, + "grad_norm": 44.200565338134766, + "learning_rate": 8.816378758797185e-07, + "loss": 0.5919, + "num_input_tokens_seen": 2183552, + "step": 690 + }, + { + "epoch": 0.04449138979578772, + "grad_norm": 55.06573486328125, + "learning_rate": 8.880358285348688e-07, + "loss": 0.7068, + "num_input_tokens_seen": 2199488, + "step": 695 + }, + { + "epoch": 0.04481147173676461, + "grad_norm": 37.759578704833984, + "learning_rate": 8.944337811900191e-07, + "loss": 0.6095, + "num_input_tokens_seen": 2215296, + "step": 700 + }, + { + "epoch": 0.0451315536777415, + "grad_norm": 52.18317794799805, + "learning_rate": 9.008317338451695e-07, + "loss": 0.7106, + "num_input_tokens_seen": 2230016, + "step": 705 + }, + { + "epoch": 0.04545163561871839, + "grad_norm": 50.49892044067383, + "learning_rate": 9.072296865003198e-07, + "loss": 0.666, + "num_input_tokens_seen": 2245056, + "step": 710 + }, + { + "epoch": 0.04577171755969528, + "grad_norm": 36.86115646362305, + "learning_rate": 9.136276391554703e-07, + "loss": 0.6173, + "num_input_tokens_seen": 2261248, + "step": 715 + }, + { + "epoch": 0.04609179950067217, + "grad_norm": 57.64673614501953, + "learning_rate": 9.200255918106205e-07, + "loss": 0.65, + "num_input_tokens_seen": 2278016, + "step": 720 + }, + { + "epoch": 0.04641188144164906, + "grad_norm": 47.956661224365234, + "learning_rate": 9.264235444657708e-07, + "loss": 0.5715, + "num_input_tokens_seen": 2292800, + "step": 725 + }, + { + "epoch": 0.04673196338262595, + "grad_norm": 36.23506546020508, + "learning_rate": 9.328214971209213e-07, + "loss": 0.5988, + "num_input_tokens_seen": 2308224, + "step": 730 + }, + { + "epoch": 0.04705204532360284, + "grad_norm": 41.42891311645508, + "learning_rate": 9.392194497760716e-07, + "loss": 0.7054, + "num_input_tokens_seen": 2325760, + "step": 735 + }, + { + "epoch": 0.04737212726457973, + "grad_norm": 58.167598724365234, + "learning_rate": 9.456174024312221e-07, + "loss": 0.6911, + "num_input_tokens_seen": 2341632, + "step": 740 + }, + { + "epoch": 0.04769220920555662, + "grad_norm": 40.3576774597168, + "learning_rate": 9.520153550863723e-07, + "loss": 0.7079, + "num_input_tokens_seen": 2357504, + "step": 745 + }, + { + "epoch": 0.04801229114653351, + "grad_norm": 43.75523376464844, + "learning_rate": 9.584133077415226e-07, + "loss": 0.7284, + "num_input_tokens_seen": 2372608, + "step": 750 + }, + { + "epoch": 0.0483323730875104, + "grad_norm": 44.16348648071289, + "learning_rate": 9.64811260396673e-07, + "loss": 0.5945, + "num_input_tokens_seen": 2388352, + "step": 755 + }, + { + "epoch": 0.04865245502848729, + "grad_norm": 48.235191345214844, + "learning_rate": 9.712092130518234e-07, + "loss": 0.7012, + "num_input_tokens_seen": 2404480, + "step": 760 + }, + { + "epoch": 0.04897253696946418, + "grad_norm": 34.269805908203125, + "learning_rate": 9.776071657069737e-07, + "loss": 0.5138, + "num_input_tokens_seen": 2419648, + "step": 765 + }, + { + "epoch": 0.04929261891044107, + "grad_norm": 46.598114013671875, + "learning_rate": 9.840051183621241e-07, + "loss": 0.71, + "num_input_tokens_seen": 2435584, + "step": 770 + }, + { + "epoch": 0.04961270085141796, + "grad_norm": 59.983123779296875, + "learning_rate": 9.904030710172743e-07, + "loss": 0.6251, + "num_input_tokens_seen": 2451072, + "step": 775 + }, + { + "epoch": 0.04993278279239485, + "grad_norm": 61.95142364501953, + "learning_rate": 9.968010236724249e-07, + "loss": 0.7605, + "num_input_tokens_seen": 2467968, + "step": 780 + }, + { + "epoch": 0.05006081556878561, + "eval_loss": 0.6365677118301392, + "eval_runtime": 50.6079, + "eval_samples_per_second": 274.384, + "eval_steps_per_second": 34.303, + "num_input_tokens_seen": 2474432, + "step": 782 + }, + { + "epoch": 0.05025286473337175, + "grad_norm": 54.79026412963867, + "learning_rate": 1.0031989763275752e-06, + "loss": 0.6742, + "num_input_tokens_seen": 2484928, + "step": 785 + }, + { + "epoch": 0.05057294667434863, + "grad_norm": 43.971065521240234, + "learning_rate": 1.0095969289827256e-06, + "loss": 0.6802, + "num_input_tokens_seen": 2501504, + "step": 790 + }, + { + "epoch": 0.050893028615325527, + "grad_norm": 34.02169418334961, + "learning_rate": 1.0159948816378758e-06, + "loss": 0.5171, + "num_input_tokens_seen": 2518848, + "step": 795 + }, + { + "epoch": 0.05121311055630241, + "grad_norm": 34.22026443481445, + "learning_rate": 1.0223928342930262e-06, + "loss": 0.5474, + "num_input_tokens_seen": 2535680, + "step": 800 + }, + { + "epoch": 0.051533192497279307, + "grad_norm": 52.957420349121094, + "learning_rate": 1.0287907869481766e-06, + "loss": 0.6751, + "num_input_tokens_seen": 2550976, + "step": 805 + }, + { + "epoch": 0.05185327443825619, + "grad_norm": 40.167659759521484, + "learning_rate": 1.035188739603327e-06, + "loss": 0.5552, + "num_input_tokens_seen": 2566656, + "step": 810 + }, + { + "epoch": 0.052173356379233086, + "grad_norm": 51.743473052978516, + "learning_rate": 1.0415866922584773e-06, + "loss": 0.6319, + "num_input_tokens_seen": 2581568, + "step": 815 + }, + { + "epoch": 0.05249343832020997, + "grad_norm": 53.62697219848633, + "learning_rate": 1.0479846449136277e-06, + "loss": 0.64, + "num_input_tokens_seen": 2596608, + "step": 820 + }, + { + "epoch": 0.052813520261186866, + "grad_norm": 43.395721435546875, + "learning_rate": 1.0543825975687779e-06, + "loss": 0.672, + "num_input_tokens_seen": 2612032, + "step": 825 + }, + { + "epoch": 0.05313360220216375, + "grad_norm": 55.59555435180664, + "learning_rate": 1.0607805502239282e-06, + "loss": 0.7367, + "num_input_tokens_seen": 2627264, + "step": 830 + }, + { + "epoch": 0.053453684143140646, + "grad_norm": 45.545921325683594, + "learning_rate": 1.0671785028790788e-06, + "loss": 0.549, + "num_input_tokens_seen": 2643264, + "step": 835 + }, + { + "epoch": 0.05377376608411753, + "grad_norm": 44.034141540527344, + "learning_rate": 1.073576455534229e-06, + "loss": 0.542, + "num_input_tokens_seen": 2659264, + "step": 840 + }, + { + "epoch": 0.054093848025094426, + "grad_norm": 39.247581481933594, + "learning_rate": 1.0799744081893794e-06, + "loss": 0.5724, + "num_input_tokens_seen": 2673856, + "step": 845 + }, + { + "epoch": 0.05441392996607131, + "grad_norm": 36.05900192260742, + "learning_rate": 1.0863723608445297e-06, + "loss": 0.4715, + "num_input_tokens_seen": 2688448, + "step": 850 + }, + { + "epoch": 0.054734011907048206, + "grad_norm": 70.99398803710938, + "learning_rate": 1.09277031349968e-06, + "loss": 0.6158, + "num_input_tokens_seen": 2703872, + "step": 855 + }, + { + "epoch": 0.05505409384802509, + "grad_norm": 51.03853988647461, + "learning_rate": 1.0991682661548305e-06, + "loss": 0.6116, + "num_input_tokens_seen": 2719040, + "step": 860 + }, + { + "epoch": 0.055374175789001986, + "grad_norm": 37.42866897583008, + "learning_rate": 1.1055662188099809e-06, + "loss": 0.6182, + "num_input_tokens_seen": 2735168, + "step": 865 + }, + { + "epoch": 0.05569425772997887, + "grad_norm": 39.55155944824219, + "learning_rate": 1.111964171465131e-06, + "loss": 0.5281, + "num_input_tokens_seen": 2750592, + "step": 870 + }, + { + "epoch": 0.056014339670955766, + "grad_norm": 22.90837860107422, + "learning_rate": 1.1183621241202814e-06, + "loss": 0.6583, + "num_input_tokens_seen": 2767232, + "step": 875 + }, + { + "epoch": 0.05633442161193265, + "grad_norm": 55.55328369140625, + "learning_rate": 1.1247600767754318e-06, + "loss": 0.6623, + "num_input_tokens_seen": 2784768, + "step": 880 + }, + { + "epoch": 0.056654503552909546, + "grad_norm": 40.864261627197266, + "learning_rate": 1.1311580294305822e-06, + "loss": 0.5782, + "num_input_tokens_seen": 2799872, + "step": 885 + }, + { + "epoch": 0.05697458549388643, + "grad_norm": 53.441192626953125, + "learning_rate": 1.1375559820857326e-06, + "loss": 0.6444, + "num_input_tokens_seen": 2816000, + "step": 890 + }, + { + "epoch": 0.057294667434863326, + "grad_norm": 48.67129898071289, + "learning_rate": 1.143953934740883e-06, + "loss": 0.5844, + "num_input_tokens_seen": 2831744, + "step": 895 + }, + { + "epoch": 0.05761474937584021, + "grad_norm": 57.840087890625, + "learning_rate": 1.150351887396033e-06, + "loss": 0.6139, + "num_input_tokens_seen": 2847424, + "step": 900 + }, + { + "epoch": 0.057934831316817106, + "grad_norm": 54.10224151611328, + "learning_rate": 1.1567498400511835e-06, + "loss": 0.6182, + "num_input_tokens_seen": 2862272, + "step": 905 + }, + { + "epoch": 0.05825491325779399, + "grad_norm": 39.821617126464844, + "learning_rate": 1.163147792706334e-06, + "loss": 0.4962, + "num_input_tokens_seen": 2877120, + "step": 910 + }, + { + "epoch": 0.058574995198770886, + "grad_norm": 41.681732177734375, + "learning_rate": 1.1695457453614842e-06, + "loss": 0.5176, + "num_input_tokens_seen": 2894592, + "step": 915 + }, + { + "epoch": 0.05889507713974777, + "grad_norm": 43.29148864746094, + "learning_rate": 1.1759436980166346e-06, + "loss": 0.6171, + "num_input_tokens_seen": 2909888, + "step": 920 + }, + { + "epoch": 0.059215159080724666, + "grad_norm": 49.849117279052734, + "learning_rate": 1.182341650671785e-06, + "loss": 0.6226, + "num_input_tokens_seen": 2925632, + "step": 925 + }, + { + "epoch": 0.05953524102170155, + "grad_norm": 50.00711441040039, + "learning_rate": 1.1887396033269352e-06, + "loss": 0.6205, + "num_input_tokens_seen": 2941760, + "step": 930 + }, + { + "epoch": 0.059855322962678446, + "grad_norm": 48.1435546875, + "learning_rate": 1.1951375559820858e-06, + "loss": 0.7098, + "num_input_tokens_seen": 2957376, + "step": 935 + }, + { + "epoch": 0.06017540490365533, + "grad_norm": 68.08272552490234, + "learning_rate": 1.2015355086372361e-06, + "loss": 0.5669, + "num_input_tokens_seen": 2972800, + "step": 940 + }, + { + "epoch": 0.060495486844632226, + "grad_norm": 50.63016891479492, + "learning_rate": 1.2079334612923863e-06, + "loss": 0.7169, + "num_input_tokens_seen": 2988480, + "step": 945 + }, + { + "epoch": 0.06081556878560911, + "grad_norm": 44.2595100402832, + "learning_rate": 1.2143314139475367e-06, + "loss": 0.6581, + "num_input_tokens_seen": 3004480, + "step": 950 + }, + { + "epoch": 0.061135650726586006, + "grad_norm": 40.70684051513672, + "learning_rate": 1.220729366602687e-06, + "loss": 0.4512, + "num_input_tokens_seen": 3020288, + "step": 955 + }, + { + "epoch": 0.06145573266756289, + "grad_norm": 61.91799545288086, + "learning_rate": 1.2271273192578374e-06, + "loss": 0.5964, + "num_input_tokens_seen": 3035968, + "step": 960 + }, + { + "epoch": 0.061775814608539786, + "grad_norm": 62.57038116455078, + "learning_rate": 1.2335252719129878e-06, + "loss": 0.7436, + "num_input_tokens_seen": 3051776, + "step": 965 + }, + { + "epoch": 0.06209589654951667, + "grad_norm": 54.44983673095703, + "learning_rate": 1.2399232245681382e-06, + "loss": 0.5567, + "num_input_tokens_seen": 3066560, + "step": 970 + }, + { + "epoch": 0.062415978490493566, + "grad_norm": 55.24098587036133, + "learning_rate": 1.2463211772232884e-06, + "loss": 0.6348, + "num_input_tokens_seen": 3082496, + "step": 975 + }, + { + "epoch": 0.06273606043147045, + "grad_norm": 45.73814010620117, + "learning_rate": 1.2527191298784387e-06, + "loss": 0.6289, + "num_input_tokens_seen": 3097856, + "step": 980 + }, + { + "epoch": 0.06305614237244735, + "grad_norm": 26.932607650756836, + "learning_rate": 1.2591170825335893e-06, + "loss": 0.5081, + "num_input_tokens_seen": 3113664, + "step": 985 + }, + { + "epoch": 0.06337622431342424, + "grad_norm": 39.2259521484375, + "learning_rate": 1.2655150351887395e-06, + "loss": 0.6276, + "num_input_tokens_seen": 3129792, + "step": 990 + }, + { + "epoch": 0.06369630625440113, + "grad_norm": 31.826623916625977, + "learning_rate": 1.2719129878438899e-06, + "loss": 0.5917, + "num_input_tokens_seen": 3145024, + "step": 995 + }, + { + "epoch": 0.06401638819537801, + "grad_norm": 42.885284423828125, + "learning_rate": 1.2783109404990402e-06, + "loss": 0.7761, + "num_input_tokens_seen": 3161216, + "step": 1000 + }, + { + "epoch": 0.0643364701363549, + "grad_norm": 28.05913543701172, + "learning_rate": 1.2847088931541904e-06, + "loss": 0.4107, + "num_input_tokens_seen": 3176960, + "step": 1005 + }, + { + "epoch": 0.0646565520773318, + "grad_norm": 41.80731201171875, + "learning_rate": 1.291106845809341e-06, + "loss": 0.6098, + "num_input_tokens_seen": 3193088, + "step": 1010 + }, + { + "epoch": 0.0649766340183087, + "grad_norm": 64.06974029541016, + "learning_rate": 1.2975047984644914e-06, + "loss": 0.6776, + "num_input_tokens_seen": 3210112, + "step": 1015 + }, + { + "epoch": 0.06529671595928557, + "grad_norm": 42.754390716552734, + "learning_rate": 1.3039027511196418e-06, + "loss": 0.508, + "num_input_tokens_seen": 3224768, + "step": 1020 + }, + { + "epoch": 0.06561679790026247, + "grad_norm": 45.79197692871094, + "learning_rate": 1.310300703774792e-06, + "loss": 0.6266, + "num_input_tokens_seen": 3240128, + "step": 1025 + }, + { + "epoch": 0.06593687984123936, + "grad_norm": 43.44371032714844, + "learning_rate": 1.3166986564299423e-06, + "loss": 0.5607, + "num_input_tokens_seen": 3256576, + "step": 1030 + }, + { + "epoch": 0.06625696178221625, + "grad_norm": 35.83990478515625, + "learning_rate": 1.3230966090850929e-06, + "loss": 0.5973, + "num_input_tokens_seen": 3272384, + "step": 1035 + }, + { + "epoch": 0.06657704372319313, + "grad_norm": 39.57344436645508, + "learning_rate": 1.329494561740243e-06, + "loss": 0.4099, + "num_input_tokens_seen": 3288512, + "step": 1040 + }, + { + "epoch": 0.06689712566417003, + "grad_norm": 42.25546646118164, + "learning_rate": 1.3358925143953934e-06, + "loss": 0.4889, + "num_input_tokens_seen": 3306304, + "step": 1045 + }, + { + "epoch": 0.06721720760514692, + "grad_norm": 44.445255279541016, + "learning_rate": 1.3422904670505438e-06, + "loss": 0.4795, + "num_input_tokens_seen": 3321344, + "step": 1050 + }, + { + "epoch": 0.06753728954612381, + "grad_norm": 40.285926818847656, + "learning_rate": 1.348688419705694e-06, + "loss": 0.6207, + "num_input_tokens_seen": 3338560, + "step": 1055 + }, + { + "epoch": 0.06785737148710069, + "grad_norm": 27.29134750366211, + "learning_rate": 1.3550863723608446e-06, + "loss": 0.5093, + "num_input_tokens_seen": 3353152, + "step": 1060 + }, + { + "epoch": 0.06817745342807759, + "grad_norm": 47.73579788208008, + "learning_rate": 1.361484325015995e-06, + "loss": 0.5401, + "num_input_tokens_seen": 3369536, + "step": 1065 + }, + { + "epoch": 0.06849753536905448, + "grad_norm": 46.23472213745117, + "learning_rate": 1.3678822776711451e-06, + "loss": 0.6053, + "num_input_tokens_seen": 3384832, + "step": 1070 + }, + { + "epoch": 0.06881761731003137, + "grad_norm": 40.9404411315918, + "learning_rate": 1.3742802303262955e-06, + "loss": 0.494, + "num_input_tokens_seen": 3399424, + "step": 1075 + }, + { + "epoch": 0.06913769925100825, + "grad_norm": 32.21672058105469, + "learning_rate": 1.3806781829814459e-06, + "loss": 0.6625, + "num_input_tokens_seen": 3416704, + "step": 1080 + }, + { + "epoch": 0.06945778119198515, + "grad_norm": 61.273109436035156, + "learning_rate": 1.3870761356365963e-06, + "loss": 0.5524, + "num_input_tokens_seen": 3431552, + "step": 1085 + }, + { + "epoch": 0.06977786313296204, + "grad_norm": 38.4173469543457, + "learning_rate": 1.3934740882917466e-06, + "loss": 0.6509, + "num_input_tokens_seen": 3447488, + "step": 1090 + }, + { + "epoch": 0.07009794507393893, + "grad_norm": 41.265380859375, + "learning_rate": 1.399872040946897e-06, + "loss": 0.5205, + "num_input_tokens_seen": 3463424, + "step": 1095 + }, + { + "epoch": 0.07041802701491581, + "grad_norm": 52.113468170166016, + "learning_rate": 1.4062699936020472e-06, + "loss": 0.6568, + "num_input_tokens_seen": 3479680, + "step": 1100 + }, + { + "epoch": 0.0707381089558927, + "grad_norm": 51.376312255859375, + "learning_rate": 1.4126679462571976e-06, + "loss": 0.5165, + "num_input_tokens_seen": 3495552, + "step": 1105 + }, + { + "epoch": 0.0710581908968696, + "grad_norm": 57.10530471801758, + "learning_rate": 1.4190658989123481e-06, + "loss": 0.4769, + "num_input_tokens_seen": 3510976, + "step": 1110 + }, + { + "epoch": 0.0713782728378465, + "grad_norm": 50.24375534057617, + "learning_rate": 1.4254638515674983e-06, + "loss": 0.6453, + "num_input_tokens_seen": 3526016, + "step": 1115 + }, + { + "epoch": 0.07169835477882337, + "grad_norm": 22.63068962097168, + "learning_rate": 1.4318618042226487e-06, + "loss": 0.5647, + "num_input_tokens_seen": 3540544, + "step": 1120 + }, + { + "epoch": 0.07201843671980027, + "grad_norm": 44.166378021240234, + "learning_rate": 1.438259756877799e-06, + "loss": 0.6074, + "num_input_tokens_seen": 3556416, + "step": 1125 + }, + { + "epoch": 0.07233851866077716, + "grad_norm": 45.217864990234375, + "learning_rate": 1.4446577095329492e-06, + "loss": 0.5063, + "num_input_tokens_seen": 3572096, + "step": 1130 + }, + { + "epoch": 0.07265860060175405, + "grad_norm": 43.43882751464844, + "learning_rate": 1.4510556621880998e-06, + "loss": 0.4966, + "num_input_tokens_seen": 3587712, + "step": 1135 + }, + { + "epoch": 0.07297868254273093, + "grad_norm": 31.869800567626953, + "learning_rate": 1.4574536148432502e-06, + "loss": 0.5503, + "num_input_tokens_seen": 3605056, + "step": 1140 + }, + { + "epoch": 0.07329876448370783, + "grad_norm": 43.5185661315918, + "learning_rate": 1.4638515674984004e-06, + "loss": 0.6826, + "num_input_tokens_seen": 3621184, + "step": 1145 + }, + { + "epoch": 0.07361884642468472, + "grad_norm": 27.480981826782227, + "learning_rate": 1.4702495201535507e-06, + "loss": 0.5917, + "num_input_tokens_seen": 3635392, + "step": 1150 + }, + { + "epoch": 0.07393892836566161, + "grad_norm": 34.03066635131836, + "learning_rate": 1.4766474728087011e-06, + "loss": 0.5064, + "num_input_tokens_seen": 3649984, + "step": 1155 + }, + { + "epoch": 0.07425901030663849, + "grad_norm": 35.03831100463867, + "learning_rate": 1.4830454254638515e-06, + "loss": 0.5239, + "num_input_tokens_seen": 3665920, + "step": 1160 + }, + { + "epoch": 0.07457909224761539, + "grad_norm": 33.77798080444336, + "learning_rate": 1.4894433781190019e-06, + "loss": 0.4573, + "num_input_tokens_seen": 3680256, + "step": 1165 + }, + { + "epoch": 0.07489917418859228, + "grad_norm": 51.53587341308594, + "learning_rate": 1.4958413307741523e-06, + "loss": 0.5284, + "num_input_tokens_seen": 3697536, + "step": 1170 + }, + { + "epoch": 0.07521925612956917, + "grad_norm": 36.20228576660156, + "learning_rate": 1.5022392834293024e-06, + "loss": 0.6147, + "num_input_tokens_seen": 3713088, + "step": 1175 + }, + { + "epoch": 0.07553933807054607, + "grad_norm": 58.152000427246094, + "learning_rate": 1.5086372360844528e-06, + "loss": 0.6674, + "num_input_tokens_seen": 3729920, + "step": 1180 + }, + { + "epoch": 0.07585942001152295, + "grad_norm": 47.89228057861328, + "learning_rate": 1.5150351887396034e-06, + "loss": 0.5478, + "num_input_tokens_seen": 3745664, + "step": 1185 + }, + { + "epoch": 0.07617950195249984, + "grad_norm": 36.1474609375, + "learning_rate": 1.5214331413947536e-06, + "loss": 0.5246, + "num_input_tokens_seen": 3760576, + "step": 1190 + }, + { + "epoch": 0.07649958389347673, + "grad_norm": 30.582496643066406, + "learning_rate": 1.527831094049904e-06, + "loss": 0.5073, + "num_input_tokens_seen": 3776576, + "step": 1195 + }, + { + "epoch": 0.07681966583445363, + "grad_norm": 44.59170150756836, + "learning_rate": 1.5342290467050543e-06, + "loss": 0.5868, + "num_input_tokens_seen": 3792384, + "step": 1200 + }, + { + "epoch": 0.0771397477754305, + "grad_norm": 46.972347259521484, + "learning_rate": 1.5406269993602045e-06, + "loss": 0.6464, + "num_input_tokens_seen": 3806592, + "step": 1205 + }, + { + "epoch": 0.0774598297164074, + "grad_norm": 53.56911849975586, + "learning_rate": 1.547024952015355e-06, + "loss": 0.5778, + "num_input_tokens_seen": 3822080, + "step": 1210 + }, + { + "epoch": 0.07777991165738429, + "grad_norm": 39.08710479736328, + "learning_rate": 1.5534229046705055e-06, + "loss": 0.5346, + "num_input_tokens_seen": 3837120, + "step": 1215 + }, + { + "epoch": 0.07809999359836119, + "grad_norm": 39.212432861328125, + "learning_rate": 1.5598208573256556e-06, + "loss": 0.6952, + "num_input_tokens_seen": 3852864, + "step": 1220 + }, + { + "epoch": 0.07842007553933807, + "grad_norm": 34.752010345458984, + "learning_rate": 1.566218809980806e-06, + "loss": 0.5148, + "num_input_tokens_seen": 3869184, + "step": 1225 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 33.27532958984375, + "learning_rate": 1.5726167626359564e-06, + "loss": 0.5376, + "num_input_tokens_seen": 3885248, + "step": 1230 + }, + { + "epoch": 0.07906023942129185, + "grad_norm": 33.97770309448242, + "learning_rate": 1.5790147152911068e-06, + "loss": 0.4261, + "num_input_tokens_seen": 3900416, + "step": 1235 + }, + { + "epoch": 0.07938032136226875, + "grad_norm": 39.494102478027344, + "learning_rate": 1.5854126679462571e-06, + "loss": 0.553, + "num_input_tokens_seen": 3916096, + "step": 1240 + }, + { + "epoch": 0.07970040330324563, + "grad_norm": 50.25145721435547, + "learning_rate": 1.5918106206014075e-06, + "loss": 0.6992, + "num_input_tokens_seen": 3933312, + "step": 1245 + }, + { + "epoch": 0.08002048524422252, + "grad_norm": 77.80860900878906, + "learning_rate": 1.5982085732565577e-06, + "loss": 0.615, + "num_input_tokens_seen": 3949440, + "step": 1250 + }, + { + "epoch": 0.08034056718519941, + "grad_norm": 38.29566955566406, + "learning_rate": 1.604606525911708e-06, + "loss": 0.681, + "num_input_tokens_seen": 3964992, + "step": 1255 + }, + { + "epoch": 0.0806606491261763, + "grad_norm": 57.336204528808594, + "learning_rate": 1.6110044785668586e-06, + "loss": 0.733, + "num_input_tokens_seen": 3981696, + "step": 1260 + }, + { + "epoch": 0.08098073106715319, + "grad_norm": 39.39405059814453, + "learning_rate": 1.617402431222009e-06, + "loss": 0.6123, + "num_input_tokens_seen": 3997248, + "step": 1265 + }, + { + "epoch": 0.08130081300813008, + "grad_norm": 29.0351505279541, + "learning_rate": 1.6238003838771592e-06, + "loss": 0.6693, + "num_input_tokens_seen": 4011648, + "step": 1270 + }, + { + "epoch": 0.08162089494910697, + "grad_norm": 44.553062438964844, + "learning_rate": 1.6301983365323096e-06, + "loss": 0.5273, + "num_input_tokens_seen": 4028160, + "step": 1275 + }, + { + "epoch": 0.08194097689008387, + "grad_norm": 49.18455505371094, + "learning_rate": 1.63659628918746e-06, + "loss": 0.5358, + "num_input_tokens_seen": 4043584, + "step": 1280 + }, + { + "epoch": 0.08226105883106075, + "grad_norm": 49.502559661865234, + "learning_rate": 1.6429942418426103e-06, + "loss": 0.7184, + "num_input_tokens_seen": 4059456, + "step": 1285 + }, + { + "epoch": 0.08258114077203764, + "grad_norm": 26.83738899230957, + "learning_rate": 1.6493921944977607e-06, + "loss": 0.6079, + "num_input_tokens_seen": 4076096, + "step": 1290 + }, + { + "epoch": 0.08290122271301453, + "grad_norm": 43.34971618652344, + "learning_rate": 1.655790147152911e-06, + "loss": 0.6143, + "num_input_tokens_seen": 4093568, + "step": 1295 + }, + { + "epoch": 0.08322130465399143, + "grad_norm": 37.30617904663086, + "learning_rate": 1.6621880998080612e-06, + "loss": 0.66, + "num_input_tokens_seen": 4108864, + "step": 1300 + }, + { + "epoch": 0.0835413865949683, + "grad_norm": 38.67463684082031, + "learning_rate": 1.6685860524632116e-06, + "loss": 0.6375, + "num_input_tokens_seen": 4124224, + "step": 1305 + }, + { + "epoch": 0.0838614685359452, + "grad_norm": 42.833805084228516, + "learning_rate": 1.6749840051183622e-06, + "loss": 0.5497, + "num_input_tokens_seen": 4139008, + "step": 1310 + }, + { + "epoch": 0.08418155047692209, + "grad_norm": 25.77740478515625, + "learning_rate": 1.6813819577735124e-06, + "loss": 0.5553, + "num_input_tokens_seen": 4155008, + "step": 1315 + }, + { + "epoch": 0.08450163241789899, + "grad_norm": 28.039791107177734, + "learning_rate": 1.6877799104286628e-06, + "loss": 0.4854, + "num_input_tokens_seen": 4172544, + "step": 1320 + }, + { + "epoch": 0.08482171435887587, + "grad_norm": 49.88291549682617, + "learning_rate": 1.6941778630838131e-06, + "loss": 0.6347, + "num_input_tokens_seen": 4188416, + "step": 1325 + }, + { + "epoch": 0.08514179629985276, + "grad_norm": 20.655033111572266, + "learning_rate": 1.7005758157389633e-06, + "loss": 0.4947, + "num_input_tokens_seen": 4202560, + "step": 1330 + }, + { + "epoch": 0.08546187824082965, + "grad_norm": 36.83515548706055, + "learning_rate": 1.706973768394114e-06, + "loss": 0.5618, + "num_input_tokens_seen": 4219392, + "step": 1335 + }, + { + "epoch": 0.08578196018180655, + "grad_norm": 52.520286560058594, + "learning_rate": 1.7133717210492643e-06, + "loss": 0.4941, + "num_input_tokens_seen": 4235328, + "step": 1340 + }, + { + "epoch": 0.08610204212278343, + "grad_norm": 54.2568359375, + "learning_rate": 1.7197696737044144e-06, + "loss": 0.5828, + "num_input_tokens_seen": 4250368, + "step": 1345 + }, + { + "epoch": 0.08642212406376032, + "grad_norm": 29.071889877319336, + "learning_rate": 1.7261676263595648e-06, + "loss": 0.4594, + "num_input_tokens_seen": 4265856, + "step": 1350 + }, + { + "epoch": 0.08674220600473721, + "grad_norm": 41.43865203857422, + "learning_rate": 1.7325655790147152e-06, + "loss": 0.6623, + "num_input_tokens_seen": 4281792, + "step": 1355 + }, + { + "epoch": 0.0870622879457141, + "grad_norm": 39.55424118041992, + "learning_rate": 1.7389635316698656e-06, + "loss": 0.5861, + "num_input_tokens_seen": 4297088, + "step": 1360 + }, + { + "epoch": 0.087382369886691, + "grad_norm": 29.17723846435547, + "learning_rate": 1.745361484325016e-06, + "loss": 0.5083, + "num_input_tokens_seen": 4312192, + "step": 1365 + }, + { + "epoch": 0.08770245182766788, + "grad_norm": 29.204944610595703, + "learning_rate": 1.7517594369801663e-06, + "loss": 0.4848, + "num_input_tokens_seen": 4326720, + "step": 1370 + }, + { + "epoch": 0.08802253376864477, + "grad_norm": 62.19997787475586, + "learning_rate": 1.7581573896353165e-06, + "loss": 0.7944, + "num_input_tokens_seen": 4341760, + "step": 1375 + }, + { + "epoch": 0.08834261570962167, + "grad_norm": 38.76377868652344, + "learning_rate": 1.7645553422904669e-06, + "loss": 0.6211, + "num_input_tokens_seen": 4357760, + "step": 1380 + }, + { + "epoch": 0.08866269765059856, + "grad_norm": 37.737586975097656, + "learning_rate": 1.7709532949456175e-06, + "loss": 0.6269, + "num_input_tokens_seen": 4373824, + "step": 1385 + }, + { + "epoch": 0.08898277959157544, + "grad_norm": 33.710941314697266, + "learning_rate": 1.7773512476007676e-06, + "loss": 0.4764, + "num_input_tokens_seen": 4388992, + "step": 1390 + }, + { + "epoch": 0.08930286153255233, + "grad_norm": 31.27020835876465, + "learning_rate": 1.783749200255918e-06, + "loss": 0.5266, + "num_input_tokens_seen": 4404288, + "step": 1395 + }, + { + "epoch": 0.08962294347352923, + "grad_norm": 52.275421142578125, + "learning_rate": 1.7901471529110684e-06, + "loss": 0.5359, + "num_input_tokens_seen": 4419840, + "step": 1400 + }, + { + "epoch": 0.08994302541450612, + "grad_norm": 57.549407958984375, + "learning_rate": 1.7965451055662186e-06, + "loss": 0.5118, + "num_input_tokens_seen": 4435200, + "step": 1405 + }, + { + "epoch": 0.090263107355483, + "grad_norm": 55.89923858642578, + "learning_rate": 1.8029430582213691e-06, + "loss": 0.61, + "num_input_tokens_seen": 4450368, + "step": 1410 + }, + { + "epoch": 0.09058318929645989, + "grad_norm": 44.54636764526367, + "learning_rate": 1.8093410108765195e-06, + "loss": 0.4899, + "num_input_tokens_seen": 4466048, + "step": 1415 + }, + { + "epoch": 0.09090327123743679, + "grad_norm": 57.33565139770508, + "learning_rate": 1.8157389635316697e-06, + "loss": 0.6001, + "num_input_tokens_seen": 4481920, + "step": 1420 + }, + { + "epoch": 0.09122335317841368, + "grad_norm": 35.38874053955078, + "learning_rate": 1.82213691618682e-06, + "loss": 0.5666, + "num_input_tokens_seen": 4498112, + "step": 1425 + }, + { + "epoch": 0.09154343511939056, + "grad_norm": 33.644596099853516, + "learning_rate": 1.8285348688419704e-06, + "loss": 0.4314, + "num_input_tokens_seen": 4515648, + "step": 1430 + }, + { + "epoch": 0.09186351706036745, + "grad_norm": 37.955787658691406, + "learning_rate": 1.8349328214971208e-06, + "loss": 0.5809, + "num_input_tokens_seen": 4531840, + "step": 1435 + }, + { + "epoch": 0.09218359900134435, + "grad_norm": 49.74119186401367, + "learning_rate": 1.8413307741522712e-06, + "loss": 0.5463, + "num_input_tokens_seen": 4547456, + "step": 1440 + }, + { + "epoch": 0.09250368094232124, + "grad_norm": 41.80632019042969, + "learning_rate": 1.8477287268074216e-06, + "loss": 0.5548, + "num_input_tokens_seen": 4563328, + "step": 1445 + }, + { + "epoch": 0.09282376288329812, + "grad_norm": 29.985979080200195, + "learning_rate": 1.8541266794625718e-06, + "loss": 0.5856, + "num_input_tokens_seen": 4579392, + "step": 1450 + }, + { + "epoch": 0.09314384482427501, + "grad_norm": 82.66356658935547, + "learning_rate": 1.8605246321177221e-06, + "loss": 0.6159, + "num_input_tokens_seen": 4595584, + "step": 1455 + }, + { + "epoch": 0.0934639267652519, + "grad_norm": 35.51491165161133, + "learning_rate": 1.8669225847728727e-06, + "loss": 0.5707, + "num_input_tokens_seen": 4610112, + "step": 1460 + }, + { + "epoch": 0.0937840087062288, + "grad_norm": 56.392459869384766, + "learning_rate": 1.8733205374280229e-06, + "loss": 0.7073, + "num_input_tokens_seen": 4626432, + "step": 1465 + }, + { + "epoch": 0.09410409064720568, + "grad_norm": 40.24674606323242, + "learning_rate": 1.8797184900831733e-06, + "loss": 0.5799, + "num_input_tokens_seen": 4641792, + "step": 1470 + }, + { + "epoch": 0.09442417258818257, + "grad_norm": 19.75339698791504, + "learning_rate": 1.8861164427383236e-06, + "loss": 0.4144, + "num_input_tokens_seen": 4656896, + "step": 1475 + }, + { + "epoch": 0.09474425452915947, + "grad_norm": 44.9459342956543, + "learning_rate": 1.8925143953934738e-06, + "loss": 0.6021, + "num_input_tokens_seen": 4673472, + "step": 1480 + }, + { + "epoch": 0.09506433647013636, + "grad_norm": 26.943050384521484, + "learning_rate": 1.8989123480486244e-06, + "loss": 0.4252, + "num_input_tokens_seen": 4688896, + "step": 1485 + }, + { + "epoch": 0.09538441841111324, + "grad_norm": 44.868587493896484, + "learning_rate": 1.9053103007037748e-06, + "loss": 0.6809, + "num_input_tokens_seen": 4704576, + "step": 1490 + }, + { + "epoch": 0.09570450035209013, + "grad_norm": 39.3725471496582, + "learning_rate": 1.911708253358925e-06, + "loss": 0.6032, + "num_input_tokens_seen": 4719040, + "step": 1495 + }, + { + "epoch": 0.09602458229306703, + "grad_norm": 30.540884017944336, + "learning_rate": 1.9181062060140753e-06, + "loss": 0.4781, + "num_input_tokens_seen": 4733696, + "step": 1500 + }, + { + "epoch": 0.09634466423404392, + "grad_norm": 41.38250732421875, + "learning_rate": 1.9245041586692255e-06, + "loss": 0.6128, + "num_input_tokens_seen": 4748992, + "step": 1505 + }, + { + "epoch": 0.0966647461750208, + "grad_norm": 54.247108459472656, + "learning_rate": 1.930902111324376e-06, + "loss": 0.6961, + "num_input_tokens_seen": 4764992, + "step": 1510 + }, + { + "epoch": 0.09698482811599769, + "grad_norm": 48.3005256652832, + "learning_rate": 1.9373000639795267e-06, + "loss": 0.6847, + "num_input_tokens_seen": 4780352, + "step": 1515 + }, + { + "epoch": 0.09730491005697459, + "grad_norm": 32.71445846557617, + "learning_rate": 1.943698016634677e-06, + "loss": 0.5486, + "num_input_tokens_seen": 4796224, + "step": 1520 + }, + { + "epoch": 0.09762499199795148, + "grad_norm": 31.726274490356445, + "learning_rate": 1.950095969289827e-06, + "loss": 0.5423, + "num_input_tokens_seen": 4811840, + "step": 1525 + }, + { + "epoch": 0.09794507393892836, + "grad_norm": 27.152061462402344, + "learning_rate": 1.9564939219449776e-06, + "loss": 0.5833, + "num_input_tokens_seen": 4826432, + "step": 1530 + }, + { + "epoch": 0.09826515587990525, + "grad_norm": 30.344701766967773, + "learning_rate": 1.9628918746001278e-06, + "loss": 0.3899, + "num_input_tokens_seen": 4841920, + "step": 1535 + }, + { + "epoch": 0.09858523782088215, + "grad_norm": 31.89874267578125, + "learning_rate": 1.9692898272552783e-06, + "loss": 0.5976, + "num_input_tokens_seen": 4857536, + "step": 1540 + }, + { + "epoch": 0.09890531976185904, + "grad_norm": 59.150508880615234, + "learning_rate": 1.9756877799104285e-06, + "loss": 0.6862, + "num_input_tokens_seen": 4873408, + "step": 1545 + }, + { + "epoch": 0.09922540170283592, + "grad_norm": 44.73534393310547, + "learning_rate": 1.9820857325655787e-06, + "loss": 0.5992, + "num_input_tokens_seen": 4889536, + "step": 1550 + }, + { + "epoch": 0.09954548364381281, + "grad_norm": 59.36629104614258, + "learning_rate": 1.9884836852207293e-06, + "loss": 0.6222, + "num_input_tokens_seen": 4904448, + "step": 1555 + }, + { + "epoch": 0.0998655655847897, + "grad_norm": 27.847911834716797, + "learning_rate": 1.99488163787588e-06, + "loss": 0.6538, + "num_input_tokens_seen": 4919616, + "step": 1560 + }, + { + "epoch": 0.10012163113757122, + "eval_loss": 0.5419119000434875, + "eval_runtime": 50.629, + "eval_samples_per_second": 274.269, + "eval_steps_per_second": 34.289, + "num_input_tokens_seen": 4931328, + "step": 1564 + }, + { + "epoch": 0.1001856475257666, + "grad_norm": 31.496103286743164, + "learning_rate": 1.9999999750297625e-06, + "loss": 0.5142, + "num_input_tokens_seen": 4934144, + "step": 1565 + }, + { + "epoch": 0.1005057294667435, + "grad_norm": 33.73641586303711, + "learning_rate": 1.9999991010715873e-06, + "loss": 0.5243, + "num_input_tokens_seen": 4950272, + "step": 1570 + }, + { + "epoch": 0.10082581140772037, + "grad_norm": 45.70293045043945, + "learning_rate": 1.999996978602793e-06, + "loss": 0.5354, + "num_input_tokens_seen": 4965056, + "step": 1575 + }, + { + "epoch": 0.10114589334869727, + "grad_norm": 35.1424560546875, + "learning_rate": 1.99999360762603e-06, + "loss": 0.5617, + "num_input_tokens_seen": 4980160, + "step": 1580 + }, + { + "epoch": 0.10146597528967416, + "grad_norm": 37.27573776245117, + "learning_rate": 1.9999889881455065e-06, + "loss": 0.4574, + "num_input_tokens_seen": 4996992, + "step": 1585 + }, + { + "epoch": 0.10178605723065105, + "grad_norm": 36.42082977294922, + "learning_rate": 1.9999831201669897e-06, + "loss": 0.5212, + "num_input_tokens_seen": 5012608, + "step": 1590 + }, + { + "epoch": 0.10210613917162793, + "grad_norm": 40.335140228271484, + "learning_rate": 1.9999760036978067e-06, + "loss": 0.4917, + "num_input_tokens_seen": 5027840, + "step": 1595 + }, + { + "epoch": 0.10242622111260483, + "grad_norm": 35.37378692626953, + "learning_rate": 1.9999676387468417e-06, + "loss": 0.5698, + "num_input_tokens_seen": 5042752, + "step": 1600 + }, + { + "epoch": 0.10274630305358172, + "grad_norm": 28.116477966308594, + "learning_rate": 1.999958025324539e-06, + "loss": 0.5443, + "num_input_tokens_seen": 5058624, + "step": 1605 + }, + { + "epoch": 0.10306638499455861, + "grad_norm": 37.361881256103516, + "learning_rate": 1.999947163442901e-06, + "loss": 0.6261, + "num_input_tokens_seen": 5075008, + "step": 1610 + }, + { + "epoch": 0.10338646693553549, + "grad_norm": 51.469268798828125, + "learning_rate": 1.9999350531154884e-06, + "loss": 0.5363, + "num_input_tokens_seen": 5090880, + "step": 1615 + }, + { + "epoch": 0.10370654887651239, + "grad_norm": 51.33501052856445, + "learning_rate": 1.9999216943574223e-06, + "loss": 0.5701, + "num_input_tokens_seen": 5106816, + "step": 1620 + }, + { + "epoch": 0.10402663081748928, + "grad_norm": 25.08717155456543, + "learning_rate": 1.9999070871853796e-06, + "loss": 0.463, + "num_input_tokens_seen": 5123904, + "step": 1625 + }, + { + "epoch": 0.10434671275846617, + "grad_norm": 39.659610748291016, + "learning_rate": 1.9998912316175986e-06, + "loss": 0.4856, + "num_input_tokens_seen": 5140160, + "step": 1630 + }, + { + "epoch": 0.10466679469944305, + "grad_norm": 45.963687896728516, + "learning_rate": 1.9998741276738752e-06, + "loss": 0.5123, + "num_input_tokens_seen": 5156288, + "step": 1635 + }, + { + "epoch": 0.10498687664041995, + "grad_norm": 39.845699310302734, + "learning_rate": 1.999855775375563e-06, + "loss": 0.5907, + "num_input_tokens_seen": 5171776, + "step": 1640 + }, + { + "epoch": 0.10530695858139684, + "grad_norm": 41.036468505859375, + "learning_rate": 1.999836174745576e-06, + "loss": 0.6812, + "num_input_tokens_seen": 5189504, + "step": 1645 + }, + { + "epoch": 0.10562704052237373, + "grad_norm": 46.018531799316406, + "learning_rate": 1.9998153258083853e-06, + "loss": 0.5825, + "num_input_tokens_seen": 5205056, + "step": 1650 + }, + { + "epoch": 0.10594712246335061, + "grad_norm": 39.028587341308594, + "learning_rate": 1.9997932285900214e-06, + "loss": 0.5911, + "num_input_tokens_seen": 5222656, + "step": 1655 + }, + { + "epoch": 0.1062672044043275, + "grad_norm": 54.98348617553711, + "learning_rate": 1.9997698831180726e-06, + "loss": 0.6352, + "num_input_tokens_seen": 5238848, + "step": 1660 + }, + { + "epoch": 0.1065872863453044, + "grad_norm": 36.60569381713867, + "learning_rate": 1.999745289421686e-06, + "loss": 0.5226, + "num_input_tokens_seen": 5255296, + "step": 1665 + }, + { + "epoch": 0.10690736828628129, + "grad_norm": 59.835819244384766, + "learning_rate": 1.9997194475315674e-06, + "loss": 0.7595, + "num_input_tokens_seen": 5270336, + "step": 1670 + }, + { + "epoch": 0.10722745022725817, + "grad_norm": 37.14190673828125, + "learning_rate": 1.9996923574799808e-06, + "loss": 0.4864, + "num_input_tokens_seen": 5286720, + "step": 1675 + }, + { + "epoch": 0.10754753216823507, + "grad_norm": 34.92512893676758, + "learning_rate": 1.9996640193007476e-06, + "loss": 0.6553, + "num_input_tokens_seen": 5301632, + "step": 1680 + }, + { + "epoch": 0.10786761410921196, + "grad_norm": 50.379520416259766, + "learning_rate": 1.9996344330292495e-06, + "loss": 0.402, + "num_input_tokens_seen": 5316544, + "step": 1685 + }, + { + "epoch": 0.10818769605018885, + "grad_norm": 35.4121208190918, + "learning_rate": 1.9996035987024245e-06, + "loss": 0.5449, + "num_input_tokens_seen": 5332544, + "step": 1690 + }, + { + "epoch": 0.10850777799116573, + "grad_norm": 36.77679443359375, + "learning_rate": 1.99957151635877e-06, + "loss": 0.5498, + "num_input_tokens_seen": 5348096, + "step": 1695 + }, + { + "epoch": 0.10882785993214263, + "grad_norm": 51.19884490966797, + "learning_rate": 1.999538186038341e-06, + "loss": 0.6298, + "num_input_tokens_seen": 5362368, + "step": 1700 + }, + { + "epoch": 0.10914794187311952, + "grad_norm": 34.20772171020508, + "learning_rate": 1.999503607782751e-06, + "loss": 0.5357, + "num_input_tokens_seen": 5378176, + "step": 1705 + }, + { + "epoch": 0.10946802381409641, + "grad_norm": 45.92792510986328, + "learning_rate": 1.999467781635171e-06, + "loss": 0.5219, + "num_input_tokens_seen": 5394752, + "step": 1710 + }, + { + "epoch": 0.10978810575507329, + "grad_norm": 33.12445068359375, + "learning_rate": 1.9994307076403306e-06, + "loss": 0.7002, + "num_input_tokens_seen": 5412160, + "step": 1715 + }, + { + "epoch": 0.11010818769605019, + "grad_norm": 37.13945388793945, + "learning_rate": 1.999392385844517e-06, + "loss": 0.5297, + "num_input_tokens_seen": 5427840, + "step": 1720 + }, + { + "epoch": 0.11042826963702708, + "grad_norm": 37.877384185791016, + "learning_rate": 1.9993528162955753e-06, + "loss": 0.4006, + "num_input_tokens_seen": 5444224, + "step": 1725 + }, + { + "epoch": 0.11074835157800397, + "grad_norm": 63.48334503173828, + "learning_rate": 1.9993119990429095e-06, + "loss": 0.5775, + "num_input_tokens_seen": 5459648, + "step": 1730 + }, + { + "epoch": 0.11106843351898085, + "grad_norm": 49.40863800048828, + "learning_rate": 1.9992699341374794e-06, + "loss": 0.7845, + "num_input_tokens_seen": 5475008, + "step": 1735 + }, + { + "epoch": 0.11138851545995775, + "grad_norm": 33.796592712402344, + "learning_rate": 1.9992266216318033e-06, + "loss": 0.533, + "num_input_tokens_seen": 5491456, + "step": 1740 + }, + { + "epoch": 0.11170859740093464, + "grad_norm": 37.70038986206055, + "learning_rate": 1.9991820615799583e-06, + "loss": 0.5745, + "num_input_tokens_seen": 5507520, + "step": 1745 + }, + { + "epoch": 0.11202867934191153, + "grad_norm": 51.507301330566406, + "learning_rate": 1.999136254037578e-06, + "loss": 0.6964, + "num_input_tokens_seen": 5523072, + "step": 1750 + }, + { + "epoch": 0.11234876128288843, + "grad_norm": 38.705711364746094, + "learning_rate": 1.999089199061853e-06, + "loss": 0.5134, + "num_input_tokens_seen": 5538304, + "step": 1755 + }, + { + "epoch": 0.1126688432238653, + "grad_norm": 38.11091995239258, + "learning_rate": 1.9990408967115326e-06, + "loss": 0.4639, + "num_input_tokens_seen": 5553920, + "step": 1760 + }, + { + "epoch": 0.1129889251648422, + "grad_norm": 26.92587661743164, + "learning_rate": 1.998991347046922e-06, + "loss": 0.4624, + "num_input_tokens_seen": 5569344, + "step": 1765 + }, + { + "epoch": 0.11330900710581909, + "grad_norm": 34.729129791259766, + "learning_rate": 1.9989405501298857e-06, + "loss": 0.5057, + "num_input_tokens_seen": 5585856, + "step": 1770 + }, + { + "epoch": 0.11362908904679599, + "grad_norm": 51.49436950683594, + "learning_rate": 1.9988885060238436e-06, + "loss": 0.5777, + "num_input_tokens_seen": 5603840, + "step": 1775 + }, + { + "epoch": 0.11394917098777287, + "grad_norm": 26.645742416381836, + "learning_rate": 1.9988352147937735e-06, + "loss": 0.5185, + "num_input_tokens_seen": 5620352, + "step": 1780 + }, + { + "epoch": 0.11426925292874976, + "grad_norm": 47.49540328979492, + "learning_rate": 1.99878067650621e-06, + "loss": 0.5548, + "num_input_tokens_seen": 5636544, + "step": 1785 + }, + { + "epoch": 0.11458933486972665, + "grad_norm": 40.62596130371094, + "learning_rate": 1.998724891229245e-06, + "loss": 0.5416, + "num_input_tokens_seen": 5652672, + "step": 1790 + }, + { + "epoch": 0.11490941681070355, + "grad_norm": 43.69281005859375, + "learning_rate": 1.998667859032527e-06, + "loss": 0.5025, + "num_input_tokens_seen": 5668224, + "step": 1795 + }, + { + "epoch": 0.11522949875168043, + "grad_norm": 23.043895721435547, + "learning_rate": 1.9986095799872613e-06, + "loss": 0.4544, + "num_input_tokens_seen": 5684480, + "step": 1800 + }, + { + "epoch": 0.11554958069265732, + "grad_norm": 58.4548454284668, + "learning_rate": 1.99855005416621e-06, + "loss": 0.4475, + "num_input_tokens_seen": 5700864, + "step": 1805 + }, + { + "epoch": 0.11586966263363421, + "grad_norm": 43.189369201660156, + "learning_rate": 1.998489281643692e-06, + "loss": 0.6003, + "num_input_tokens_seen": 5716224, + "step": 1810 + }, + { + "epoch": 0.1161897445746111, + "grad_norm": 29.6669864654541, + "learning_rate": 1.998427262495582e-06, + "loss": 0.4876, + "num_input_tokens_seen": 5733056, + "step": 1815 + }, + { + "epoch": 0.11650982651558799, + "grad_norm": 37.5609016418457, + "learning_rate": 1.9983639967993124e-06, + "loss": 0.6507, + "num_input_tokens_seen": 5749120, + "step": 1820 + }, + { + "epoch": 0.11682990845656488, + "grad_norm": 34.187103271484375, + "learning_rate": 1.99829948463387e-06, + "loss": 0.7451, + "num_input_tokens_seen": 5763968, + "step": 1825 + }, + { + "epoch": 0.11714999039754177, + "grad_norm": 33.58884048461914, + "learning_rate": 1.9982337260798e-06, + "loss": 0.5556, + "num_input_tokens_seen": 5779520, + "step": 1830 + }, + { + "epoch": 0.11747007233851867, + "grad_norm": 37.880897521972656, + "learning_rate": 1.998166721219203e-06, + "loss": 0.5874, + "num_input_tokens_seen": 5798848, + "step": 1835 + }, + { + "epoch": 0.11779015427949555, + "grad_norm": 30.32021141052246, + "learning_rate": 1.9980984701357338e-06, + "loss": 0.5069, + "num_input_tokens_seen": 5813952, + "step": 1840 + }, + { + "epoch": 0.11811023622047244, + "grad_norm": 37.01994705200195, + "learning_rate": 1.998028972914606e-06, + "loss": 0.4306, + "num_input_tokens_seen": 5830016, + "step": 1845 + }, + { + "epoch": 0.11843031816144933, + "grad_norm": 40.025062561035156, + "learning_rate": 1.9979582296425877e-06, + "loss": 0.5965, + "num_input_tokens_seen": 5845312, + "step": 1850 + }, + { + "epoch": 0.11875040010242623, + "grad_norm": 22.668283462524414, + "learning_rate": 1.9978862404080022e-06, + "loss": 0.5894, + "num_input_tokens_seen": 5860672, + "step": 1855 + }, + { + "epoch": 0.1190704820434031, + "grad_norm": 37.38002014160156, + "learning_rate": 1.9978130053007295e-06, + "loss": 0.5369, + "num_input_tokens_seen": 5875776, + "step": 1860 + }, + { + "epoch": 0.11939056398438, + "grad_norm": 39.782684326171875, + "learning_rate": 1.9977385244122034e-06, + "loss": 0.4361, + "num_input_tokens_seen": 5891200, + "step": 1865 + }, + { + "epoch": 0.11971064592535689, + "grad_norm": 40.374305725097656, + "learning_rate": 1.997662797835415e-06, + "loss": 0.4922, + "num_input_tokens_seen": 5907008, + "step": 1870 + }, + { + "epoch": 0.12003072786633379, + "grad_norm": 35.58638000488281, + "learning_rate": 1.9975858256649097e-06, + "loss": 0.4561, + "num_input_tokens_seen": 5923264, + "step": 1875 + }, + { + "epoch": 0.12035080980731067, + "grad_norm": 52.00196075439453, + "learning_rate": 1.997507607996788e-06, + "loss": 0.4952, + "num_input_tokens_seen": 5939648, + "step": 1880 + }, + { + "epoch": 0.12067089174828756, + "grad_norm": 27.237184524536133, + "learning_rate": 1.997428144928706e-06, + "loss": 0.4576, + "num_input_tokens_seen": 5955520, + "step": 1885 + }, + { + "epoch": 0.12099097368926445, + "grad_norm": 43.06745147705078, + "learning_rate": 1.9973474365598736e-06, + "loss": 0.5277, + "num_input_tokens_seen": 5971072, + "step": 1890 + }, + { + "epoch": 0.12131105563024135, + "grad_norm": 40.0740966796875, + "learning_rate": 1.9972654829910568e-06, + "loss": 0.5794, + "num_input_tokens_seen": 5987264, + "step": 1895 + }, + { + "epoch": 0.12163113757121823, + "grad_norm": 55.42530059814453, + "learning_rate": 1.9971822843245748e-06, + "loss": 0.6246, + "num_input_tokens_seen": 6002880, + "step": 1900 + }, + { + "epoch": 0.12195121951219512, + "grad_norm": 41.884239196777344, + "learning_rate": 1.997097840664303e-06, + "loss": 0.5281, + "num_input_tokens_seen": 6019520, + "step": 1905 + }, + { + "epoch": 0.12227130145317201, + "grad_norm": 63.81690216064453, + "learning_rate": 1.99701215211567e-06, + "loss": 0.5722, + "num_input_tokens_seen": 6035904, + "step": 1910 + }, + { + "epoch": 0.1225913833941489, + "grad_norm": 33.150779724121094, + "learning_rate": 1.9969252187856587e-06, + "loss": 0.6162, + "num_input_tokens_seen": 6050816, + "step": 1915 + }, + { + "epoch": 0.12291146533512579, + "grad_norm": 27.57270622253418, + "learning_rate": 1.9968370407828065e-06, + "loss": 0.414, + "num_input_tokens_seen": 6065920, + "step": 1920 + }, + { + "epoch": 0.12323154727610268, + "grad_norm": 26.693384170532227, + "learning_rate": 1.996747618217205e-06, + "loss": 0.5995, + "num_input_tokens_seen": 6081728, + "step": 1925 + }, + { + "epoch": 0.12355162921707957, + "grad_norm": 29.05069351196289, + "learning_rate": 1.9966569512004987e-06, + "loss": 0.492, + "num_input_tokens_seen": 6097472, + "step": 1930 + }, + { + "epoch": 0.12387171115805647, + "grad_norm": 33.252803802490234, + "learning_rate": 1.996565039845887e-06, + "loss": 0.5079, + "num_input_tokens_seen": 6113152, + "step": 1935 + }, + { + "epoch": 0.12419179309903335, + "grad_norm": 49.879119873046875, + "learning_rate": 1.996471884268122e-06, + "loss": 0.6364, + "num_input_tokens_seen": 6129408, + "step": 1940 + }, + { + "epoch": 0.12451187504001024, + "grad_norm": 21.359004974365234, + "learning_rate": 1.9963774845835097e-06, + "loss": 0.5506, + "num_input_tokens_seen": 6144896, + "step": 1945 + }, + { + "epoch": 0.12483195698098713, + "grad_norm": 49.929439544677734, + "learning_rate": 1.996281840909909e-06, + "loss": 0.5895, + "num_input_tokens_seen": 6160256, + "step": 1950 + }, + { + "epoch": 0.12515203892196403, + "grad_norm": 38.942405700683594, + "learning_rate": 1.9961849533667322e-06, + "loss": 0.6389, + "num_input_tokens_seen": 6175104, + "step": 1955 + }, + { + "epoch": 0.1254721208629409, + "grad_norm": 31.281375885009766, + "learning_rate": 1.9960868220749447e-06, + "loss": 0.5267, + "num_input_tokens_seen": 6190272, + "step": 1960 + }, + { + "epoch": 0.1257922028039178, + "grad_norm": 40.56554412841797, + "learning_rate": 1.9959874471570644e-06, + "loss": 0.5836, + "num_input_tokens_seen": 6205952, + "step": 1965 + }, + { + "epoch": 0.1261122847448947, + "grad_norm": 46.20263671875, + "learning_rate": 1.9958868287371625e-06, + "loss": 0.5619, + "num_input_tokens_seen": 6222592, + "step": 1970 + }, + { + "epoch": 0.12643236668587157, + "grad_norm": 38.54600524902344, + "learning_rate": 1.9957849669408617e-06, + "loss": 0.4804, + "num_input_tokens_seen": 6237696, + "step": 1975 + }, + { + "epoch": 0.12675244862684848, + "grad_norm": 54.95522689819336, + "learning_rate": 1.995681861895338e-06, + "loss": 0.4947, + "num_input_tokens_seen": 6254080, + "step": 1980 + }, + { + "epoch": 0.12707253056782536, + "grad_norm": 28.571189880371094, + "learning_rate": 1.9955775137293187e-06, + "loss": 0.5828, + "num_input_tokens_seen": 6270016, + "step": 1985 + }, + { + "epoch": 0.12739261250880227, + "grad_norm": 52.56492233276367, + "learning_rate": 1.9954719225730845e-06, + "loss": 0.6161, + "num_input_tokens_seen": 6285184, + "step": 1990 + }, + { + "epoch": 0.12771269444977915, + "grad_norm": 50.46998596191406, + "learning_rate": 1.9953650885584666e-06, + "loss": 0.4833, + "num_input_tokens_seen": 6300992, + "step": 1995 + }, + { + "epoch": 0.12803277639075603, + "grad_norm": 17.092538833618164, + "learning_rate": 1.995257011818849e-06, + "loss": 0.5462, + "num_input_tokens_seen": 6315392, + "step": 2000 + }, + { + "epoch": 0.12835285833173293, + "grad_norm": 37.80610656738281, + "learning_rate": 1.9951476924891666e-06, + "loss": 0.4676, + "num_input_tokens_seen": 6331136, + "step": 2005 + }, + { + "epoch": 0.1286729402727098, + "grad_norm": 34.72353744506836, + "learning_rate": 1.9950371307059056e-06, + "loss": 0.5551, + "num_input_tokens_seen": 6347584, + "step": 2010 + }, + { + "epoch": 0.1289930222136867, + "grad_norm": 56.483497619628906, + "learning_rate": 1.9949253266071036e-06, + "loss": 0.5584, + "num_input_tokens_seen": 6362560, + "step": 2015 + }, + { + "epoch": 0.1293131041546636, + "grad_norm": 30.35417938232422, + "learning_rate": 1.9948122803323503e-06, + "loss": 0.5131, + "num_input_tokens_seen": 6378304, + "step": 2020 + }, + { + "epoch": 0.12963318609564048, + "grad_norm": 50.93225860595703, + "learning_rate": 1.9946979920227844e-06, + "loss": 0.5125, + "num_input_tokens_seen": 6393280, + "step": 2025 + }, + { + "epoch": 0.1299532680366174, + "grad_norm": 61.17381286621094, + "learning_rate": 1.994582461821096e-06, + "loss": 0.5188, + "num_input_tokens_seen": 6409472, + "step": 2030 + }, + { + "epoch": 0.13027334997759427, + "grad_norm": 70.97465515136719, + "learning_rate": 1.9944656898715267e-06, + "loss": 0.7149, + "num_input_tokens_seen": 6424960, + "step": 2035 + }, + { + "epoch": 0.13059343191857115, + "grad_norm": 33.06205368041992, + "learning_rate": 1.994347676319867e-06, + "loss": 0.6082, + "num_input_tokens_seen": 6440000, + "step": 2040 + }, + { + "epoch": 0.13091351385954805, + "grad_norm": 26.475330352783203, + "learning_rate": 1.994228421313459e-06, + "loss": 0.4607, + "num_input_tokens_seen": 6457600, + "step": 2045 + }, + { + "epoch": 0.13123359580052493, + "grad_norm": 41.18611526489258, + "learning_rate": 1.994107925001193e-06, + "loss": 0.5187, + "num_input_tokens_seen": 6473088, + "step": 2050 + }, + { + "epoch": 0.1315536777415018, + "grad_norm": 50.487796783447266, + "learning_rate": 1.9939861875335108e-06, + "loss": 0.595, + "num_input_tokens_seen": 6487680, + "step": 2055 + }, + { + "epoch": 0.13187375968247872, + "grad_norm": 37.29991912841797, + "learning_rate": 1.9938632090624025e-06, + "loss": 0.4909, + "num_input_tokens_seen": 6503296, + "step": 2060 + }, + { + "epoch": 0.1321938416234556, + "grad_norm": 15.12756061553955, + "learning_rate": 1.9937389897414087e-06, + "loss": 0.5368, + "num_input_tokens_seen": 6518912, + "step": 2065 + }, + { + "epoch": 0.1325139235644325, + "grad_norm": 43.301517486572266, + "learning_rate": 1.993613529725618e-06, + "loss": 0.5642, + "num_input_tokens_seen": 6534784, + "step": 2070 + }, + { + "epoch": 0.13283400550540939, + "grad_norm": 52.800323486328125, + "learning_rate": 1.99348682917167e-06, + "loss": 0.5303, + "num_input_tokens_seen": 6550528, + "step": 2075 + }, + { + "epoch": 0.13315408744638627, + "grad_norm": 36.16381072998047, + "learning_rate": 1.99335888823775e-06, + "loss": 0.5475, + "num_input_tokens_seen": 6566144, + "step": 2080 + }, + { + "epoch": 0.13347416938736317, + "grad_norm": 52.74684143066406, + "learning_rate": 1.993229707083595e-06, + "loss": 0.5654, + "num_input_tokens_seen": 6583872, + "step": 2085 + }, + { + "epoch": 0.13379425132834005, + "grad_norm": 21.61884307861328, + "learning_rate": 1.993099285870489e-06, + "loss": 0.4165, + "num_input_tokens_seen": 6602304, + "step": 2090 + }, + { + "epoch": 0.13411433326931693, + "grad_norm": 38.61161804199219, + "learning_rate": 1.992967624761264e-06, + "loss": 0.462, + "num_input_tokens_seen": 6618112, + "step": 2095 + }, + { + "epoch": 0.13443441521029384, + "grad_norm": 48.21979522705078, + "learning_rate": 1.9928347239203014e-06, + "loss": 0.6239, + "num_input_tokens_seen": 6635584, + "step": 2100 + }, + { + "epoch": 0.13475449715127072, + "grad_norm": 34.77821731567383, + "learning_rate": 1.9927005835135282e-06, + "loss": 0.5283, + "num_input_tokens_seen": 6653568, + "step": 2105 + }, + { + "epoch": 0.13507457909224763, + "grad_norm": 30.292238235473633, + "learning_rate": 1.9925652037084214e-06, + "loss": 0.4596, + "num_input_tokens_seen": 6668864, + "step": 2110 + }, + { + "epoch": 0.1353946610332245, + "grad_norm": 27.13306427001953, + "learning_rate": 1.9924285846740037e-06, + "loss": 0.4838, + "num_input_tokens_seen": 6684416, + "step": 2115 + }, + { + "epoch": 0.13571474297420139, + "grad_norm": 50.00841522216797, + "learning_rate": 1.9922907265808452e-06, + "loss": 0.5948, + "num_input_tokens_seen": 6699392, + "step": 2120 + }, + { + "epoch": 0.1360348249151783, + "grad_norm": 45.320167541503906, + "learning_rate": 1.9921516296010643e-06, + "loss": 0.544, + "num_input_tokens_seen": 6714560, + "step": 2125 + }, + { + "epoch": 0.13635490685615517, + "grad_norm": 50.58386993408203, + "learning_rate": 1.9920112939083246e-06, + "loss": 0.5678, + "num_input_tokens_seen": 6729920, + "step": 2130 + }, + { + "epoch": 0.13667498879713205, + "grad_norm": 27.673641204833984, + "learning_rate": 1.9918697196778367e-06, + "loss": 0.5607, + "num_input_tokens_seen": 6744768, + "step": 2135 + }, + { + "epoch": 0.13699507073810896, + "grad_norm": 27.846073150634766, + "learning_rate": 1.9917269070863578e-06, + "loss": 0.4531, + "num_input_tokens_seen": 6759680, + "step": 2140 + }, + { + "epoch": 0.13731515267908584, + "grad_norm": 36.35385513305664, + "learning_rate": 1.9915828563121915e-06, + "loss": 0.5091, + "num_input_tokens_seen": 6775168, + "step": 2145 + }, + { + "epoch": 0.13763523462006275, + "grad_norm": 43.63134765625, + "learning_rate": 1.9914375675351865e-06, + "loss": 0.5144, + "num_input_tokens_seen": 6791296, + "step": 2150 + }, + { + "epoch": 0.13795531656103963, + "grad_norm": 19.44449806213379, + "learning_rate": 1.991291040936738e-06, + "loss": 0.4326, + "num_input_tokens_seen": 6808640, + "step": 2155 + }, + { + "epoch": 0.1382753985020165, + "grad_norm": 43.09555435180664, + "learning_rate": 1.9911432766997857e-06, + "loss": 0.6764, + "num_input_tokens_seen": 6824064, + "step": 2160 + }, + { + "epoch": 0.1385954804429934, + "grad_norm": 59.859764099121094, + "learning_rate": 1.990994275008815e-06, + "loss": 0.455, + "num_input_tokens_seen": 6839872, + "step": 2165 + }, + { + "epoch": 0.1389155623839703, + "grad_norm": 62.95064926147461, + "learning_rate": 1.9908440360498565e-06, + "loss": 0.515, + "num_input_tokens_seen": 6855744, + "step": 2170 + }, + { + "epoch": 0.1392356443249472, + "grad_norm": 46.814388275146484, + "learning_rate": 1.990692560010485e-06, + "loss": 0.5589, + "num_input_tokens_seen": 6869632, + "step": 2175 + }, + { + "epoch": 0.13955572626592408, + "grad_norm": 30.18223762512207, + "learning_rate": 1.9905398470798206e-06, + "loss": 0.4574, + "num_input_tokens_seen": 6885696, + "step": 2180 + }, + { + "epoch": 0.13987580820690096, + "grad_norm": 29.679075241088867, + "learning_rate": 1.990385897448527e-06, + "loss": 0.37, + "num_input_tokens_seen": 6901504, + "step": 2185 + }, + { + "epoch": 0.14019589014787787, + "grad_norm": 37.57693862915039, + "learning_rate": 1.9902307113088114e-06, + "loss": 0.5817, + "num_input_tokens_seen": 6916480, + "step": 2190 + }, + { + "epoch": 0.14051597208885475, + "grad_norm": 50.049583435058594, + "learning_rate": 1.9900742888544264e-06, + "loss": 0.4882, + "num_input_tokens_seen": 6932416, + "step": 2195 + }, + { + "epoch": 0.14083605402983163, + "grad_norm": 46.006839752197266, + "learning_rate": 1.989916630280667e-06, + "loss": 0.5338, + "num_input_tokens_seen": 6948992, + "step": 2200 + }, + { + "epoch": 0.14115613597080853, + "grad_norm": 55.06525802612305, + "learning_rate": 1.989757735784372e-06, + "loss": 0.464, + "num_input_tokens_seen": 6964416, + "step": 2205 + }, + { + "epoch": 0.1414762179117854, + "grad_norm": 43.949302673339844, + "learning_rate": 1.989597605563923e-06, + "loss": 0.4246, + "num_input_tokens_seen": 6980544, + "step": 2210 + }, + { + "epoch": 0.14179629985276232, + "grad_norm": 28.58378791809082, + "learning_rate": 1.9894362398192437e-06, + "loss": 0.5755, + "num_input_tokens_seen": 6997440, + "step": 2215 + }, + { + "epoch": 0.1421163817937392, + "grad_norm": 24.560964584350586, + "learning_rate": 1.9892736387518023e-06, + "loss": 0.4218, + "num_input_tokens_seen": 7012672, + "step": 2220 + }, + { + "epoch": 0.14243646373471608, + "grad_norm": 55.798553466796875, + "learning_rate": 1.9891098025646075e-06, + "loss": 0.4798, + "num_input_tokens_seen": 7027648, + "step": 2225 + }, + { + "epoch": 0.142756545675693, + "grad_norm": 29.567869186401367, + "learning_rate": 1.9889447314622105e-06, + "loss": 0.5266, + "num_input_tokens_seen": 7043200, + "step": 2230 + }, + { + "epoch": 0.14307662761666987, + "grad_norm": 43.9607048034668, + "learning_rate": 1.9887784256507046e-06, + "loss": 0.7416, + "num_input_tokens_seen": 7058688, + "step": 2235 + }, + { + "epoch": 0.14339670955764675, + "grad_norm": 33.17695999145508, + "learning_rate": 1.988610885337725e-06, + "loss": 0.6734, + "num_input_tokens_seen": 7074048, + "step": 2240 + }, + { + "epoch": 0.14371679149862365, + "grad_norm": 32.72926330566406, + "learning_rate": 1.9884421107324476e-06, + "loss": 0.5319, + "num_input_tokens_seen": 7089792, + "step": 2245 + }, + { + "epoch": 0.14403687343960053, + "grad_norm": 41.187984466552734, + "learning_rate": 1.9882721020455893e-06, + "loss": 0.4753, + "num_input_tokens_seen": 7104640, + "step": 2250 + }, + { + "epoch": 0.14435695538057744, + "grad_norm": 33.69738006591797, + "learning_rate": 1.988100859489408e-06, + "loss": 0.5137, + "num_input_tokens_seen": 7120064, + "step": 2255 + }, + { + "epoch": 0.14467703732155432, + "grad_norm": 34.818851470947266, + "learning_rate": 1.9879283832777017e-06, + "loss": 0.4839, + "num_input_tokens_seen": 7135232, + "step": 2260 + }, + { + "epoch": 0.1449971192625312, + "grad_norm": 52.21475601196289, + "learning_rate": 1.9877546736258096e-06, + "loss": 0.5247, + "num_input_tokens_seen": 7149632, + "step": 2265 + }, + { + "epoch": 0.1453172012035081, + "grad_norm": 39.324825286865234, + "learning_rate": 1.98757973075061e-06, + "loss": 0.4134, + "num_input_tokens_seen": 7164352, + "step": 2270 + }, + { + "epoch": 0.14563728314448499, + "grad_norm": 36.88801193237305, + "learning_rate": 1.987403554870521e-06, + "loss": 0.52, + "num_input_tokens_seen": 7179776, + "step": 2275 + }, + { + "epoch": 0.14595736508546187, + "grad_norm": 38.47246170043945, + "learning_rate": 1.9872261462055003e-06, + "loss": 0.423, + "num_input_tokens_seen": 7194240, + "step": 2280 + }, + { + "epoch": 0.14627744702643877, + "grad_norm": 23.939250946044922, + "learning_rate": 1.987047504977045e-06, + "loss": 0.4393, + "num_input_tokens_seen": 7209472, + "step": 2285 + }, + { + "epoch": 0.14659752896741565, + "grad_norm": 50.50169372558594, + "learning_rate": 1.9868676314081902e-06, + "loss": 0.4174, + "num_input_tokens_seen": 7225088, + "step": 2290 + }, + { + "epoch": 0.14691761090839256, + "grad_norm": 75.09852600097656, + "learning_rate": 1.9866865257235107e-06, + "loss": 0.6811, + "num_input_tokens_seen": 7240704, + "step": 2295 + }, + { + "epoch": 0.14723769284936944, + "grad_norm": 33.65947723388672, + "learning_rate": 1.9865041881491188e-06, + "loss": 0.4241, + "num_input_tokens_seen": 7256000, + "step": 2300 + }, + { + "epoch": 0.14755777479034632, + "grad_norm": 50.8767204284668, + "learning_rate": 1.9863206189126653e-06, + "loss": 0.6191, + "num_input_tokens_seen": 7270336, + "step": 2305 + }, + { + "epoch": 0.14787785673132323, + "grad_norm": 47.13086700439453, + "learning_rate": 1.9861358182433382e-06, + "loss": 0.5735, + "num_input_tokens_seen": 7285440, + "step": 2310 + }, + { + "epoch": 0.1481979386723001, + "grad_norm": 38.765995025634766, + "learning_rate": 1.9859497863718634e-06, + "loss": 0.4719, + "num_input_tokens_seen": 7301120, + "step": 2315 + }, + { + "epoch": 0.14851802061327699, + "grad_norm": 23.727924346923828, + "learning_rate": 1.985762523530504e-06, + "loss": 0.5315, + "num_input_tokens_seen": 7316416, + "step": 2320 + }, + { + "epoch": 0.1488381025542539, + "grad_norm": 28.33704948425293, + "learning_rate": 1.98557402995306e-06, + "loss": 0.4997, + "num_input_tokens_seen": 7332160, + "step": 2325 + }, + { + "epoch": 0.14915818449523077, + "grad_norm": 41.80880355834961, + "learning_rate": 1.985384305874868e-06, + "loss": 0.7101, + "num_input_tokens_seen": 7347776, + "step": 2330 + }, + { + "epoch": 0.14947826643620768, + "grad_norm": 37.426422119140625, + "learning_rate": 1.9851933515328e-06, + "loss": 0.5478, + "num_input_tokens_seen": 7363200, + "step": 2335 + }, + { + "epoch": 0.14979834837718456, + "grad_norm": 44.769901275634766, + "learning_rate": 1.985001167165265e-06, + "loss": 0.475, + "num_input_tokens_seen": 7378752, + "step": 2340 + }, + { + "epoch": 0.15011843031816144, + "grad_norm": 38.463008880615234, + "learning_rate": 1.984807753012208e-06, + "loss": 0.5239, + "num_input_tokens_seen": 7393984, + "step": 2345 + }, + { + "epoch": 0.15018244670635683, + "eval_loss": 0.5113906264305115, + "eval_runtime": 50.6224, + "eval_samples_per_second": 274.306, + "eval_steps_per_second": 34.293, + "num_input_tokens_seen": 7397056, + "step": 2346 + }, + { + "epoch": 0.15043851225913835, + "grad_norm": 27.22393798828125, + "learning_rate": 1.9846131093151086e-06, + "loss": 0.5882, + "num_input_tokens_seen": 7408832, + "step": 2350 + }, + { + "epoch": 0.15075859420011523, + "grad_norm": 17.012371063232422, + "learning_rate": 1.9844172363169808e-06, + "loss": 0.4612, + "num_input_tokens_seen": 7423040, + "step": 2355 + }, + { + "epoch": 0.15107867614109213, + "grad_norm": 54.97491455078125, + "learning_rate": 1.9842201342623756e-06, + "loss": 0.5148, + "num_input_tokens_seen": 7438464, + "step": 2360 + }, + { + "epoch": 0.151398758082069, + "grad_norm": 32.32542037963867, + "learning_rate": 1.9840218033973766e-06, + "loss": 0.5219, + "num_input_tokens_seen": 7453824, + "step": 2365 + }, + { + "epoch": 0.1517188400230459, + "grad_norm": 39.23529052734375, + "learning_rate": 1.9838222439696027e-06, + "loss": 0.5858, + "num_input_tokens_seen": 7469312, + "step": 2370 + }, + { + "epoch": 0.1520389219640228, + "grad_norm": 51.202392578125, + "learning_rate": 1.9836214562282058e-06, + "loss": 0.7034, + "num_input_tokens_seen": 7485120, + "step": 2375 + }, + { + "epoch": 0.15235900390499968, + "grad_norm": 38.26160430908203, + "learning_rate": 1.9834194404238715e-06, + "loss": 0.5189, + "num_input_tokens_seen": 7500416, + "step": 2380 + }, + { + "epoch": 0.15267908584597656, + "grad_norm": 40.620052337646484, + "learning_rate": 1.9832161968088193e-06, + "loss": 0.4149, + "num_input_tokens_seen": 7516672, + "step": 2385 + }, + { + "epoch": 0.15299916778695347, + "grad_norm": 54.49562454223633, + "learning_rate": 1.9830117256368015e-06, + "loss": 0.4703, + "num_input_tokens_seen": 7532800, + "step": 2390 + }, + { + "epoch": 0.15331924972793035, + "grad_norm": 39.00943374633789, + "learning_rate": 1.982806027163102e-06, + "loss": 0.4994, + "num_input_tokens_seen": 7547776, + "step": 2395 + }, + { + "epoch": 0.15363933166890725, + "grad_norm": 30.828948974609375, + "learning_rate": 1.9825991016445386e-06, + "loss": 0.5718, + "num_input_tokens_seen": 7562496, + "step": 2400 + }, + { + "epoch": 0.15395941360988413, + "grad_norm": 39.274105072021484, + "learning_rate": 1.9823909493394594e-06, + "loss": 0.5263, + "num_input_tokens_seen": 7577920, + "step": 2405 + }, + { + "epoch": 0.154279495550861, + "grad_norm": 43.676815032958984, + "learning_rate": 1.9821815705077455e-06, + "loss": 0.5373, + "num_input_tokens_seen": 7593216, + "step": 2410 + }, + { + "epoch": 0.15459957749183792, + "grad_norm": 65.43962860107422, + "learning_rate": 1.9819709654108087e-06, + "loss": 0.5752, + "num_input_tokens_seen": 7608192, + "step": 2415 + }, + { + "epoch": 0.1549196594328148, + "grad_norm": 44.96727752685547, + "learning_rate": 1.981759134311592e-06, + "loss": 0.4606, + "num_input_tokens_seen": 7624448, + "step": 2420 + }, + { + "epoch": 0.15523974137379168, + "grad_norm": 48.34320831298828, + "learning_rate": 1.981546077474569e-06, + "loss": 0.4839, + "num_input_tokens_seen": 7640192, + "step": 2425 + }, + { + "epoch": 0.15555982331476859, + "grad_norm": 35.434444427490234, + "learning_rate": 1.981331795165744e-06, + "loss": 0.534, + "num_input_tokens_seen": 7654848, + "step": 2430 + }, + { + "epoch": 0.15587990525574547, + "grad_norm": 64.55530548095703, + "learning_rate": 1.9811162876526498e-06, + "loss": 0.6053, + "num_input_tokens_seen": 7670848, + "step": 2435 + }, + { + "epoch": 0.15619998719672237, + "grad_norm": 30.858980178833008, + "learning_rate": 1.9808995552043515e-06, + "loss": 0.6575, + "num_input_tokens_seen": 7686016, + "step": 2440 + }, + { + "epoch": 0.15652006913769925, + "grad_norm": 36.265830993652344, + "learning_rate": 1.9806815980914413e-06, + "loss": 0.5662, + "num_input_tokens_seen": 7701760, + "step": 2445 + }, + { + "epoch": 0.15684015107867613, + "grad_norm": 35.759735107421875, + "learning_rate": 1.9804624165860417e-06, + "loss": 0.5736, + "num_input_tokens_seen": 7717760, + "step": 2450 + }, + { + "epoch": 0.15716023301965304, + "grad_norm": 18.90166473388672, + "learning_rate": 1.9802420109618028e-06, + "loss": 0.3894, + "num_input_tokens_seen": 7733376, + "step": 2455 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 16.299848556518555, + "learning_rate": 1.980020381493904e-06, + "loss": 0.503, + "num_input_tokens_seen": 7750464, + "step": 2460 + }, + { + "epoch": 0.1578003969016068, + "grad_norm": 39.69455337524414, + "learning_rate": 1.979797528459052e-06, + "loss": 0.5024, + "num_input_tokens_seen": 7768576, + "step": 2465 + }, + { + "epoch": 0.1581204788425837, + "grad_norm": 45.433387756347656, + "learning_rate": 1.979573452135482e-06, + "loss": 0.5285, + "num_input_tokens_seen": 7784256, + "step": 2470 + }, + { + "epoch": 0.15844056078356059, + "grad_norm": 33.314964294433594, + "learning_rate": 1.979348152802955e-06, + "loss": 0.3218, + "num_input_tokens_seen": 7799232, + "step": 2475 + }, + { + "epoch": 0.1587606427245375, + "grad_norm": 48.998722076416016, + "learning_rate": 1.979121630742761e-06, + "loss": 0.5854, + "num_input_tokens_seen": 7815040, + "step": 2480 + }, + { + "epoch": 0.15908072466551437, + "grad_norm": 19.081119537353516, + "learning_rate": 1.9788938862377146e-06, + "loss": 0.4547, + "num_input_tokens_seen": 7830400, + "step": 2485 + }, + { + "epoch": 0.15940080660649125, + "grad_norm": 32.0287971496582, + "learning_rate": 1.9786649195721577e-06, + "loss": 0.4803, + "num_input_tokens_seen": 7846336, + "step": 2490 + }, + { + "epoch": 0.15972088854746816, + "grad_norm": 41.866336822509766, + "learning_rate": 1.978434731031958e-06, + "loss": 0.6471, + "num_input_tokens_seen": 7862528, + "step": 2495 + }, + { + "epoch": 0.16004097048844504, + "grad_norm": 39.477691650390625, + "learning_rate": 1.9782033209045085e-06, + "loss": 0.4554, + "num_input_tokens_seen": 7880000, + "step": 2500 + }, + { + "epoch": 0.16036105242942192, + "grad_norm": 20.426006317138672, + "learning_rate": 1.977970689478727e-06, + "loss": 0.4114, + "num_input_tokens_seen": 7895296, + "step": 2505 + }, + { + "epoch": 0.16068113437039883, + "grad_norm": 58.49917984008789, + "learning_rate": 1.9777368370450577e-06, + "loss": 0.5963, + "num_input_tokens_seen": 7911104, + "step": 2510 + }, + { + "epoch": 0.1610012163113757, + "grad_norm": 33.156394958496094, + "learning_rate": 1.9775017638954674e-06, + "loss": 0.5129, + "num_input_tokens_seen": 7925952, + "step": 2515 + }, + { + "epoch": 0.1613212982523526, + "grad_norm": 33.88132858276367, + "learning_rate": 1.9772654703234476e-06, + "loss": 0.6004, + "num_input_tokens_seen": 7940928, + "step": 2520 + }, + { + "epoch": 0.1616413801933295, + "grad_norm": 44.398887634277344, + "learning_rate": 1.977027956624014e-06, + "loss": 0.5638, + "num_input_tokens_seen": 7955200, + "step": 2525 + }, + { + "epoch": 0.16196146213430637, + "grad_norm": 43.607967376708984, + "learning_rate": 1.9767892230937046e-06, + "loss": 0.5759, + "num_input_tokens_seen": 7970944, + "step": 2530 + }, + { + "epoch": 0.16228154407528328, + "grad_norm": 49.36827087402344, + "learning_rate": 1.976549270030581e-06, + "loss": 0.4305, + "num_input_tokens_seen": 7985856, + "step": 2535 + }, + { + "epoch": 0.16260162601626016, + "grad_norm": 41.47651290893555, + "learning_rate": 1.9763080977342286e-06, + "loss": 0.4789, + "num_input_tokens_seen": 8001088, + "step": 2540 + }, + { + "epoch": 0.16292170795723707, + "grad_norm": 47.50954818725586, + "learning_rate": 1.9760657065057527e-06, + "loss": 0.4995, + "num_input_tokens_seen": 8017856, + "step": 2545 + }, + { + "epoch": 0.16324178989821395, + "grad_norm": 42.19331359863281, + "learning_rate": 1.975822096647782e-06, + "loss": 0.4597, + "num_input_tokens_seen": 8033792, + "step": 2550 + }, + { + "epoch": 0.16356187183919083, + "grad_norm": 40.176029205322266, + "learning_rate": 1.975577268464466e-06, + "loss": 0.4952, + "num_input_tokens_seen": 8048256, + "step": 2555 + }, + { + "epoch": 0.16388195378016773, + "grad_norm": 31.933691024780273, + "learning_rate": 1.9753312222614765e-06, + "loss": 0.5653, + "num_input_tokens_seen": 8063680, + "step": 2560 + }, + { + "epoch": 0.1642020357211446, + "grad_norm": 55.901126861572266, + "learning_rate": 1.9750839583460036e-06, + "loss": 0.4827, + "num_input_tokens_seen": 8079744, + "step": 2565 + }, + { + "epoch": 0.1645221176621215, + "grad_norm": 34.241172790527344, + "learning_rate": 1.9748354770267603e-06, + "loss": 0.5034, + "num_input_tokens_seen": 8094656, + "step": 2570 + }, + { + "epoch": 0.1648421996030984, + "grad_norm": 26.009151458740234, + "learning_rate": 1.9745857786139777e-06, + "loss": 0.5117, + "num_input_tokens_seen": 8110528, + "step": 2575 + }, + { + "epoch": 0.16516228154407528, + "grad_norm": 49.35831832885742, + "learning_rate": 1.974334863419408e-06, + "loss": 0.6109, + "num_input_tokens_seen": 8126720, + "step": 2580 + }, + { + "epoch": 0.1654823634850522, + "grad_norm": 36.18735885620117, + "learning_rate": 1.9740827317563212e-06, + "loss": 0.5038, + "num_input_tokens_seen": 8141312, + "step": 2585 + }, + { + "epoch": 0.16580244542602907, + "grad_norm": 36.603824615478516, + "learning_rate": 1.973829383939507e-06, + "loss": 0.485, + "num_input_tokens_seen": 8156736, + "step": 2590 + }, + { + "epoch": 0.16612252736700595, + "grad_norm": 52.14276885986328, + "learning_rate": 1.973574820285273e-06, + "loss": 0.4978, + "num_input_tokens_seen": 8172480, + "step": 2595 + }, + { + "epoch": 0.16644260930798285, + "grad_norm": 41.253135681152344, + "learning_rate": 1.9733190411114443e-06, + "loss": 0.581, + "num_input_tokens_seen": 8188224, + "step": 2600 + }, + { + "epoch": 0.16676269124895973, + "grad_norm": 37.012882232666016, + "learning_rate": 1.9730620467373654e-06, + "loss": 0.4388, + "num_input_tokens_seen": 8204352, + "step": 2605 + }, + { + "epoch": 0.1670827731899366, + "grad_norm": 45.37546157836914, + "learning_rate": 1.9728038374838958e-06, + "loss": 0.5835, + "num_input_tokens_seen": 8219328, + "step": 2610 + }, + { + "epoch": 0.16740285513091352, + "grad_norm": 22.488475799560547, + "learning_rate": 1.972544413673413e-06, + "loss": 0.392, + "num_input_tokens_seen": 8234560, + "step": 2615 + }, + { + "epoch": 0.1677229370718904, + "grad_norm": 31.539594650268555, + "learning_rate": 1.9722837756298108e-06, + "loss": 0.5766, + "num_input_tokens_seen": 8249344, + "step": 2620 + }, + { + "epoch": 0.1680430190128673, + "grad_norm": 54.007774353027344, + "learning_rate": 1.972021923678499e-06, + "loss": 0.551, + "num_input_tokens_seen": 8265600, + "step": 2625 + }, + { + "epoch": 0.16836310095384419, + "grad_norm": 28.686309814453125, + "learning_rate": 1.971758858146403e-06, + "loss": 0.4822, + "num_input_tokens_seen": 8280384, + "step": 2630 + }, + { + "epoch": 0.16868318289482107, + "grad_norm": 44.47602081298828, + "learning_rate": 1.9714945793619626e-06, + "loss": 0.4916, + "num_input_tokens_seen": 8295744, + "step": 2635 + }, + { + "epoch": 0.16900326483579797, + "grad_norm": 28.91101837158203, + "learning_rate": 1.971229087655133e-06, + "loss": 0.52, + "num_input_tokens_seen": 8311680, + "step": 2640 + }, + { + "epoch": 0.16932334677677485, + "grad_norm": 31.473901748657227, + "learning_rate": 1.9709623833573842e-06, + "loss": 0.4659, + "num_input_tokens_seen": 8326592, + "step": 2645 + }, + { + "epoch": 0.16964342871775173, + "grad_norm": 47.58165740966797, + "learning_rate": 1.9706944668016994e-06, + "loss": 0.4454, + "num_input_tokens_seen": 8341632, + "step": 2650 + }, + { + "epoch": 0.16996351065872864, + "grad_norm": 38.02768325805664, + "learning_rate": 1.9704253383225756e-06, + "loss": 0.4643, + "num_input_tokens_seen": 8358400, + "step": 2655 + }, + { + "epoch": 0.17028359259970552, + "grad_norm": 34.26385498046875, + "learning_rate": 1.970154998256023e-06, + "loss": 0.4813, + "num_input_tokens_seen": 8374144, + "step": 2660 + }, + { + "epoch": 0.17060367454068243, + "grad_norm": 36.563358306884766, + "learning_rate": 1.9698834469395644e-06, + "loss": 0.4266, + "num_input_tokens_seen": 8389440, + "step": 2665 + }, + { + "epoch": 0.1709237564816593, + "grad_norm": 35.873085021972656, + "learning_rate": 1.969610684712234e-06, + "loss": 0.5565, + "num_input_tokens_seen": 8404672, + "step": 2670 + }, + { + "epoch": 0.17124383842263619, + "grad_norm": 67.11613464355469, + "learning_rate": 1.9693367119145794e-06, + "loss": 0.5696, + "num_input_tokens_seen": 8420096, + "step": 2675 + }, + { + "epoch": 0.1715639203636131, + "grad_norm": 42.11975860595703, + "learning_rate": 1.969061528888659e-06, + "loss": 0.6647, + "num_input_tokens_seen": 8436288, + "step": 2680 + }, + { + "epoch": 0.17188400230458997, + "grad_norm": 25.307558059692383, + "learning_rate": 1.9687851359780415e-06, + "loss": 0.549, + "num_input_tokens_seen": 8452672, + "step": 2685 + }, + { + "epoch": 0.17220408424556685, + "grad_norm": 21.267452239990234, + "learning_rate": 1.968507533527807e-06, + "loss": 0.4875, + "num_input_tokens_seen": 8469120, + "step": 2690 + }, + { + "epoch": 0.17252416618654376, + "grad_norm": 47.49785232543945, + "learning_rate": 1.9682287218845455e-06, + "loss": 0.4694, + "num_input_tokens_seen": 8484736, + "step": 2695 + }, + { + "epoch": 0.17284424812752064, + "grad_norm": 39.40044021606445, + "learning_rate": 1.967948701396356e-06, + "loss": 0.7448, + "num_input_tokens_seen": 8500480, + "step": 2700 + }, + { + "epoch": 0.17316433006849755, + "grad_norm": 29.45541763305664, + "learning_rate": 1.9676674724128485e-06, + "loss": 0.3988, + "num_input_tokens_seen": 8514624, + "step": 2705 + }, + { + "epoch": 0.17348441200947443, + "grad_norm": 21.1942081451416, + "learning_rate": 1.9673850352851397e-06, + "loss": 0.4666, + "num_input_tokens_seen": 8529664, + "step": 2710 + }, + { + "epoch": 0.1738044939504513, + "grad_norm": 30.817174911499023, + "learning_rate": 1.967101390365856e-06, + "loss": 0.5852, + "num_input_tokens_seen": 8545280, + "step": 2715 + }, + { + "epoch": 0.1741245758914282, + "grad_norm": 31.28725814819336, + "learning_rate": 1.966816538009131e-06, + "loss": 0.4975, + "num_input_tokens_seen": 8560384, + "step": 2720 + }, + { + "epoch": 0.1744446578324051, + "grad_norm": 42.42086410522461, + "learning_rate": 1.966530478570607e-06, + "loss": 0.538, + "num_input_tokens_seen": 8576960, + "step": 2725 + }, + { + "epoch": 0.174764739773382, + "grad_norm": 33.971405029296875, + "learning_rate": 1.9662432124074325e-06, + "loss": 0.4686, + "num_input_tokens_seen": 8592384, + "step": 2730 + }, + { + "epoch": 0.17508482171435888, + "grad_norm": 31.316307067871094, + "learning_rate": 1.965954739878262e-06, + "loss": 0.4889, + "num_input_tokens_seen": 8609024, + "step": 2735 + }, + { + "epoch": 0.17540490365533576, + "grad_norm": 50.005043029785156, + "learning_rate": 1.965665061343257e-06, + "loss": 0.4298, + "num_input_tokens_seen": 8624768, + "step": 2740 + }, + { + "epoch": 0.17572498559631267, + "grad_norm": 26.911272048950195, + "learning_rate": 1.965374177164085e-06, + "loss": 0.4643, + "num_input_tokens_seen": 8640448, + "step": 2745 + }, + { + "epoch": 0.17604506753728955, + "grad_norm": 29.490320205688477, + "learning_rate": 1.9650820877039182e-06, + "loss": 0.5569, + "num_input_tokens_seen": 8655296, + "step": 2750 + }, + { + "epoch": 0.17636514947826643, + "grad_norm": 74.6144790649414, + "learning_rate": 1.9647887933274334e-06, + "loss": 0.4903, + "num_input_tokens_seen": 8671872, + "step": 2755 + }, + { + "epoch": 0.17668523141924333, + "grad_norm": 23.339736938476562, + "learning_rate": 1.9644942944008124e-06, + "loss": 0.4835, + "num_input_tokens_seen": 8687680, + "step": 2760 + }, + { + "epoch": 0.1770053133602202, + "grad_norm": 56.54179763793945, + "learning_rate": 1.96419859129174e-06, + "loss": 0.6033, + "num_input_tokens_seen": 8702912, + "step": 2765 + }, + { + "epoch": 0.17732539530119712, + "grad_norm": 25.558734893798828, + "learning_rate": 1.963901684369406e-06, + "loss": 0.467, + "num_input_tokens_seen": 8718144, + "step": 2770 + }, + { + "epoch": 0.177645477242174, + "grad_norm": 39.40992736816406, + "learning_rate": 1.9636035740045013e-06, + "loss": 0.5107, + "num_input_tokens_seen": 8732992, + "step": 2775 + }, + { + "epoch": 0.17796555918315088, + "grad_norm": 25.630998611450195, + "learning_rate": 1.9633042605692207e-06, + "loss": 0.6129, + "num_input_tokens_seen": 8749056, + "step": 2780 + }, + { + "epoch": 0.17828564112412779, + "grad_norm": 24.820589065551758, + "learning_rate": 1.9630037444372597e-06, + "loss": 0.4943, + "num_input_tokens_seen": 8765184, + "step": 2785 + }, + { + "epoch": 0.17860572306510467, + "grad_norm": 45.313453674316406, + "learning_rate": 1.9627020259838177e-06, + "loss": 0.4163, + "num_input_tokens_seen": 8780480, + "step": 2790 + }, + { + "epoch": 0.17892580500608155, + "grad_norm": 33.22193908691406, + "learning_rate": 1.9623991055855925e-06, + "loss": 0.5605, + "num_input_tokens_seen": 8796352, + "step": 2795 + }, + { + "epoch": 0.17924588694705845, + "grad_norm": 28.097183227539062, + "learning_rate": 1.962094983620784e-06, + "loss": 0.4507, + "num_input_tokens_seen": 8810688, + "step": 2800 + }, + { + "epoch": 0.17956596888803533, + "grad_norm": 46.64733123779297, + "learning_rate": 1.9617896604690925e-06, + "loss": 0.4204, + "num_input_tokens_seen": 8826304, + "step": 2805 + }, + { + "epoch": 0.17988605082901224, + "grad_norm": 24.82090187072754, + "learning_rate": 1.961483136511717e-06, + "loss": 0.4545, + "num_input_tokens_seen": 8841344, + "step": 2810 + }, + { + "epoch": 0.18020613276998912, + "grad_norm": 49.74783706665039, + "learning_rate": 1.9611754121313567e-06, + "loss": 0.6135, + "num_input_tokens_seen": 8857664, + "step": 2815 + }, + { + "epoch": 0.180526214710966, + "grad_norm": 54.14537048339844, + "learning_rate": 1.960866487712209e-06, + "loss": 0.5854, + "num_input_tokens_seen": 8873408, + "step": 2820 + }, + { + "epoch": 0.1808462966519429, + "grad_norm": 32.58800506591797, + "learning_rate": 1.9605563636399695e-06, + "loss": 0.4328, + "num_input_tokens_seen": 8889472, + "step": 2825 + }, + { + "epoch": 0.18116637859291979, + "grad_norm": 62.71939468383789, + "learning_rate": 1.9602450403018315e-06, + "loss": 0.6013, + "num_input_tokens_seen": 8904640, + "step": 2830 + }, + { + "epoch": 0.18148646053389667, + "grad_norm": 36.499908447265625, + "learning_rate": 1.9599325180864864e-06, + "loss": 0.4548, + "num_input_tokens_seen": 8919680, + "step": 2835 + }, + { + "epoch": 0.18180654247487357, + "grad_norm": 32.281959533691406, + "learning_rate": 1.9596187973841216e-06, + "loss": 0.446, + "num_input_tokens_seen": 8935360, + "step": 2840 + }, + { + "epoch": 0.18212662441585045, + "grad_norm": 26.72726058959961, + "learning_rate": 1.959303878586421e-06, + "loss": 0.4871, + "num_input_tokens_seen": 8951552, + "step": 2845 + }, + { + "epoch": 0.18244670635682736, + "grad_norm": 43.18830871582031, + "learning_rate": 1.9589877620865647e-06, + "loss": 0.585, + "num_input_tokens_seen": 8968576, + "step": 2850 + }, + { + "epoch": 0.18276678829780424, + "grad_norm": 30.252056121826172, + "learning_rate": 1.9586704482792277e-06, + "loss": 0.4598, + "num_input_tokens_seen": 8983744, + "step": 2855 + }, + { + "epoch": 0.18308687023878112, + "grad_norm": 31.71357536315918, + "learning_rate": 1.95835193756058e-06, + "loss": 0.4344, + "num_input_tokens_seen": 8999040, + "step": 2860 + }, + { + "epoch": 0.18340695217975803, + "grad_norm": 40.17634963989258, + "learning_rate": 1.9580322303282858e-06, + "loss": 0.4269, + "num_input_tokens_seen": 9015872, + "step": 2865 + }, + { + "epoch": 0.1837270341207349, + "grad_norm": 27.72193145751953, + "learning_rate": 1.9577113269815038e-06, + "loss": 0.4106, + "num_input_tokens_seen": 9031744, + "step": 2870 + }, + { + "epoch": 0.18404711606171179, + "grad_norm": 36.63798141479492, + "learning_rate": 1.957389227920885e-06, + "loss": 0.5936, + "num_input_tokens_seen": 9047872, + "step": 2875 + }, + { + "epoch": 0.1843671980026887, + "grad_norm": 36.46480178833008, + "learning_rate": 1.957065933548574e-06, + "loss": 0.5225, + "num_input_tokens_seen": 9062976, + "step": 2880 + }, + { + "epoch": 0.18468727994366557, + "grad_norm": 59.29536819458008, + "learning_rate": 1.956741444268208e-06, + "loss": 0.5881, + "num_input_tokens_seen": 9078208, + "step": 2885 + }, + { + "epoch": 0.18500736188464248, + "grad_norm": 31.733598709106445, + "learning_rate": 1.9564157604849154e-06, + "loss": 0.4778, + "num_input_tokens_seen": 9094720, + "step": 2890 + }, + { + "epoch": 0.18532744382561936, + "grad_norm": 30.152931213378906, + "learning_rate": 1.9560888826053163e-06, + "loss": 0.529, + "num_input_tokens_seen": 9110336, + "step": 2895 + }, + { + "epoch": 0.18564752576659624, + "grad_norm": 25.448486328125, + "learning_rate": 1.9557608110375212e-06, + "loss": 0.5617, + "num_input_tokens_seen": 9126912, + "step": 2900 + }, + { + "epoch": 0.18596760770757315, + "grad_norm": 27.7618465423584, + "learning_rate": 1.955431546191132e-06, + "loss": 0.5447, + "num_input_tokens_seen": 9142400, + "step": 2905 + }, + { + "epoch": 0.18628768964855003, + "grad_norm": 44.82647705078125, + "learning_rate": 1.95510108847724e-06, + "loss": 0.5254, + "num_input_tokens_seen": 9157184, + "step": 2910 + }, + { + "epoch": 0.1866077715895269, + "grad_norm": 29.998842239379883, + "learning_rate": 1.954769438308424e-06, + "loss": 0.526, + "num_input_tokens_seen": 9173696, + "step": 2915 + }, + { + "epoch": 0.1869278535305038, + "grad_norm": 38.51725387573242, + "learning_rate": 1.954436596098754e-06, + "loss": 0.5085, + "num_input_tokens_seen": 9190080, + "step": 2920 + }, + { + "epoch": 0.1872479354714807, + "grad_norm": 61.17892837524414, + "learning_rate": 1.9541025622637875e-06, + "loss": 0.5828, + "num_input_tokens_seen": 9204352, + "step": 2925 + }, + { + "epoch": 0.1875680174124576, + "grad_norm": 51.220340728759766, + "learning_rate": 1.95376733722057e-06, + "loss": 0.6086, + "num_input_tokens_seen": 9219200, + "step": 2930 + }, + { + "epoch": 0.18788809935343448, + "grad_norm": 38.08414840698242, + "learning_rate": 1.9534309213876337e-06, + "loss": 0.4778, + "num_input_tokens_seen": 9233600, + "step": 2935 + }, + { + "epoch": 0.18820818129441136, + "grad_norm": 40.778892517089844, + "learning_rate": 1.953093315184997e-06, + "loss": 0.4369, + "num_input_tokens_seen": 9249536, + "step": 2940 + }, + { + "epoch": 0.18852826323538827, + "grad_norm": 40.999114990234375, + "learning_rate": 1.952754519034166e-06, + "loss": 0.6525, + "num_input_tokens_seen": 9264256, + "step": 2945 + }, + { + "epoch": 0.18884834517636515, + "grad_norm": 62.22706985473633, + "learning_rate": 1.9524145333581313e-06, + "loss": 0.4542, + "num_input_tokens_seen": 9279488, + "step": 2950 + }, + { + "epoch": 0.18916842711734205, + "grad_norm": 27.501522064208984, + "learning_rate": 1.952073358581369e-06, + "loss": 0.5187, + "num_input_tokens_seen": 9294336, + "step": 2955 + }, + { + "epoch": 0.18948850905831893, + "grad_norm": 37.89274215698242, + "learning_rate": 1.95173099512984e-06, + "loss": 0.5615, + "num_input_tokens_seen": 9309376, + "step": 2960 + }, + { + "epoch": 0.1898085909992958, + "grad_norm": 23.472244262695312, + "learning_rate": 1.9513874434309894e-06, + "loss": 0.4698, + "num_input_tokens_seen": 9324224, + "step": 2965 + }, + { + "epoch": 0.19012867294027272, + "grad_norm": 28.482378005981445, + "learning_rate": 1.951042703913745e-06, + "loss": 0.4491, + "num_input_tokens_seen": 9339136, + "step": 2970 + }, + { + "epoch": 0.1904487548812496, + "grad_norm": 26.93058204650879, + "learning_rate": 1.950696777008518e-06, + "loss": 0.4492, + "num_input_tokens_seen": 9354688, + "step": 2975 + }, + { + "epoch": 0.19076883682222648, + "grad_norm": 24.83283233642578, + "learning_rate": 1.9503496631472025e-06, + "loss": 0.4948, + "num_input_tokens_seen": 9369664, + "step": 2980 + }, + { + "epoch": 0.19108891876320339, + "grad_norm": 43.00146484375, + "learning_rate": 1.9500013627631746e-06, + "loss": 0.6353, + "num_input_tokens_seen": 9384768, + "step": 2985 + }, + { + "epoch": 0.19140900070418027, + "grad_norm": 35.03440856933594, + "learning_rate": 1.949651876291291e-06, + "loss": 0.3771, + "num_input_tokens_seen": 9400320, + "step": 2990 + }, + { + "epoch": 0.19172908264515717, + "grad_norm": 54.04991912841797, + "learning_rate": 1.9493012041678894e-06, + "loss": 0.4872, + "num_input_tokens_seen": 9415872, + "step": 2995 + }, + { + "epoch": 0.19204916458613405, + "grad_norm": 33.80318832397461, + "learning_rate": 1.9489493468307883e-06, + "loss": 0.5988, + "num_input_tokens_seen": 9432704, + "step": 3000 + }, + { + "epoch": 0.19236924652711093, + "grad_norm": 49.77751541137695, + "learning_rate": 1.948596304719286e-06, + "loss": 0.5456, + "num_input_tokens_seen": 9448192, + "step": 3005 + }, + { + "epoch": 0.19268932846808784, + "grad_norm": 44.03824234008789, + "learning_rate": 1.9482420782741594e-06, + "loss": 0.4447, + "num_input_tokens_seen": 9464576, + "step": 3010 + }, + { + "epoch": 0.19300941040906472, + "grad_norm": 32.3371467590332, + "learning_rate": 1.9478866679376647e-06, + "loss": 0.5591, + "num_input_tokens_seen": 9479936, + "step": 3015 + }, + { + "epoch": 0.1933294923500416, + "grad_norm": 31.040849685668945, + "learning_rate": 1.9475300741535353e-06, + "loss": 0.5564, + "num_input_tokens_seen": 9497280, + "step": 3020 + }, + { + "epoch": 0.1936495742910185, + "grad_norm": 39.65032958984375, + "learning_rate": 1.9471722973669833e-06, + "loss": 0.4714, + "num_input_tokens_seen": 9514496, + "step": 3025 + }, + { + "epoch": 0.19396965623199539, + "grad_norm": 24.577251434326172, + "learning_rate": 1.946813338024697e-06, + "loss": 0.3979, + "num_input_tokens_seen": 9529536, + "step": 3030 + }, + { + "epoch": 0.1942897381729723, + "grad_norm": 54.89514923095703, + "learning_rate": 1.9464531965748414e-06, + "loss": 0.5342, + "num_input_tokens_seen": 9545472, + "step": 3035 + }, + { + "epoch": 0.19460982011394917, + "grad_norm": 40.215274810791016, + "learning_rate": 1.9460918734670573e-06, + "loss": 0.5827, + "num_input_tokens_seen": 9560960, + "step": 3040 + }, + { + "epoch": 0.19492990205492605, + "grad_norm": 31.09059715270996, + "learning_rate": 1.945729369152461e-06, + "loss": 0.543, + "num_input_tokens_seen": 9576320, + "step": 3045 + }, + { + "epoch": 0.19524998399590296, + "grad_norm": 45.24855422973633, + "learning_rate": 1.945365684083643e-06, + "loss": 0.5533, + "num_input_tokens_seen": 9592192, + "step": 3050 + }, + { + "epoch": 0.19557006593687984, + "grad_norm": 55.232234954833984, + "learning_rate": 1.945000818714668e-06, + "loss": 0.615, + "num_input_tokens_seen": 9608128, + "step": 3055 + }, + { + "epoch": 0.19589014787785672, + "grad_norm": 28.884002685546875, + "learning_rate": 1.944634773501076e-06, + "loss": 0.546, + "num_input_tokens_seen": 9623872, + "step": 3060 + }, + { + "epoch": 0.19621022981883363, + "grad_norm": 53.66278076171875, + "learning_rate": 1.9442675488998783e-06, + "loss": 0.5662, + "num_input_tokens_seen": 9639488, + "step": 3065 + }, + { + "epoch": 0.1965303117598105, + "grad_norm": 28.837116241455078, + "learning_rate": 1.9438991453695587e-06, + "loss": 0.5017, + "num_input_tokens_seen": 9655680, + "step": 3070 + }, + { + "epoch": 0.1968503937007874, + "grad_norm": 38.618587493896484, + "learning_rate": 1.943529563370073e-06, + "loss": 0.5648, + "num_input_tokens_seen": 9670400, + "step": 3075 + }, + { + "epoch": 0.1971704756417643, + "grad_norm": 21.553041458129883, + "learning_rate": 1.9431588033628495e-06, + "loss": 0.3815, + "num_input_tokens_seen": 9685504, + "step": 3080 + }, + { + "epoch": 0.19749055758274117, + "grad_norm": 45.82009506225586, + "learning_rate": 1.9427868658107862e-06, + "loss": 0.6302, + "num_input_tokens_seen": 9701952, + "step": 3085 + }, + { + "epoch": 0.19781063952371808, + "grad_norm": 24.57038116455078, + "learning_rate": 1.942413751178251e-06, + "loss": 0.449, + "num_input_tokens_seen": 9716928, + "step": 3090 + }, + { + "epoch": 0.19813072146469496, + "grad_norm": 59.18320846557617, + "learning_rate": 1.9420394599310826e-06, + "loss": 0.6552, + "num_input_tokens_seen": 9732096, + "step": 3095 + }, + { + "epoch": 0.19845080340567184, + "grad_norm": 29.033939361572266, + "learning_rate": 1.941663992536588e-06, + "loss": 0.5247, + "num_input_tokens_seen": 9747648, + "step": 3100 + }, + { + "epoch": 0.19877088534664875, + "grad_norm": 15.162464141845703, + "learning_rate": 1.941287349463542e-06, + "loss": 0.4467, + "num_input_tokens_seen": 9763072, + "step": 3105 + }, + { + "epoch": 0.19909096728762563, + "grad_norm": 31.735469818115234, + "learning_rate": 1.940909531182188e-06, + "loss": 0.4856, + "num_input_tokens_seen": 9778176, + "step": 3110 + }, + { + "epoch": 0.19941104922860253, + "grad_norm": 47.100677490234375, + "learning_rate": 1.9405305381642375e-06, + "loss": 0.6168, + "num_input_tokens_seen": 9793536, + "step": 3115 + }, + { + "epoch": 0.1997311311695794, + "grad_norm": 25.74694061279297, + "learning_rate": 1.9401503708828665e-06, + "loss": 0.5055, + "num_input_tokens_seen": 9808192, + "step": 3120 + }, + { + "epoch": 0.2000512131105563, + "grad_norm": 32.5270881652832, + "learning_rate": 1.939769029812719e-06, + "loss": 0.5889, + "num_input_tokens_seen": 9823232, + "step": 3125 + }, + { + "epoch": 0.20024326227514244, + "eval_loss": 0.4917045831680298, + "eval_runtime": 50.5995, + "eval_samples_per_second": 274.43, + "eval_steps_per_second": 34.309, + "num_input_tokens_seen": 9832064, + "step": 3128 + }, + { + "epoch": 0.2003712950515332, + "grad_norm": 41.536319732666016, + "learning_rate": 1.939386515429904e-06, + "loss": 0.5998, + "num_input_tokens_seen": 9839488, + "step": 3130 + }, + { + "epoch": 0.20069137699251008, + "grad_norm": 21.79485321044922, + "learning_rate": 1.9390028282119942e-06, + "loss": 0.4234, + "num_input_tokens_seen": 9856192, + "step": 3135 + }, + { + "epoch": 0.201011458933487, + "grad_norm": 39.57857131958008, + "learning_rate": 1.938617968638029e-06, + "loss": 0.5139, + "num_input_tokens_seen": 9871552, + "step": 3140 + }, + { + "epoch": 0.20133154087446387, + "grad_norm": 40.204986572265625, + "learning_rate": 1.938231937188509e-06, + "loss": 0.5103, + "num_input_tokens_seen": 9886016, + "step": 3145 + }, + { + "epoch": 0.20165162281544075, + "grad_norm": 43.581180572509766, + "learning_rate": 1.9378447343453995e-06, + "loss": 0.6257, + "num_input_tokens_seen": 9903552, + "step": 3150 + }, + { + "epoch": 0.20197170475641765, + "grad_norm": 42.93930435180664, + "learning_rate": 1.9374563605921275e-06, + "loss": 0.3501, + "num_input_tokens_seen": 9920320, + "step": 3155 + }, + { + "epoch": 0.20229178669739453, + "grad_norm": 31.756664276123047, + "learning_rate": 1.937066816413582e-06, + "loss": 0.5844, + "num_input_tokens_seen": 9935936, + "step": 3160 + }, + { + "epoch": 0.2026118686383714, + "grad_norm": 27.54891586303711, + "learning_rate": 1.9366761022961146e-06, + "loss": 0.4866, + "num_input_tokens_seen": 9950912, + "step": 3165 + }, + { + "epoch": 0.20293195057934832, + "grad_norm": 43.409576416015625, + "learning_rate": 1.9362842187275354e-06, + "loss": 0.5726, + "num_input_tokens_seen": 9966080, + "step": 3170 + }, + { + "epoch": 0.2032520325203252, + "grad_norm": 29.677104949951172, + "learning_rate": 1.9358911661971155e-06, + "loss": 0.4769, + "num_input_tokens_seen": 9982080, + "step": 3175 + }, + { + "epoch": 0.2035721144613021, + "grad_norm": 31.4278621673584, + "learning_rate": 1.9354969451955864e-06, + "loss": 0.4818, + "num_input_tokens_seen": 9996544, + "step": 3180 + }, + { + "epoch": 0.20389219640227899, + "grad_norm": 28.86357879638672, + "learning_rate": 1.9351015562151375e-06, + "loss": 0.5595, + "num_input_tokens_seen": 10011776, + "step": 3185 + }, + { + "epoch": 0.20421227834325587, + "grad_norm": 26.479507446289062, + "learning_rate": 1.934704999749416e-06, + "loss": 0.4337, + "num_input_tokens_seen": 10027264, + "step": 3190 + }, + { + "epoch": 0.20453236028423277, + "grad_norm": 24.264083862304688, + "learning_rate": 1.9343072762935274e-06, + "loss": 0.4251, + "num_input_tokens_seen": 10042432, + "step": 3195 + }, + { + "epoch": 0.20485244222520965, + "grad_norm": 29.045461654663086, + "learning_rate": 1.933908386344035e-06, + "loss": 0.4122, + "num_input_tokens_seen": 10057792, + "step": 3200 + }, + { + "epoch": 0.20517252416618653, + "grad_norm": 35.074466705322266, + "learning_rate": 1.9335083303989565e-06, + "loss": 0.528, + "num_input_tokens_seen": 10074752, + "step": 3205 + }, + { + "epoch": 0.20549260610716344, + "grad_norm": 39.952335357666016, + "learning_rate": 1.9331071089577674e-06, + "loss": 0.5767, + "num_input_tokens_seen": 10090752, + "step": 3210 + }, + { + "epoch": 0.20581268804814032, + "grad_norm": 49.97673416137695, + "learning_rate": 1.9327047225213963e-06, + "loss": 0.5028, + "num_input_tokens_seen": 10106240, + "step": 3215 + }, + { + "epoch": 0.20613276998911723, + "grad_norm": 40.61750411987305, + "learning_rate": 1.9323011715922283e-06, + "loss": 0.4154, + "num_input_tokens_seen": 10121856, + "step": 3220 + }, + { + "epoch": 0.2064528519300941, + "grad_norm": 70.33148956298828, + "learning_rate": 1.931896456674101e-06, + "loss": 0.4682, + "num_input_tokens_seen": 10137408, + "step": 3225 + }, + { + "epoch": 0.20677293387107099, + "grad_norm": 35.08588790893555, + "learning_rate": 1.931490578272306e-06, + "loss": 0.4611, + "num_input_tokens_seen": 10152640, + "step": 3230 + }, + { + "epoch": 0.2070930158120479, + "grad_norm": 35.493282318115234, + "learning_rate": 1.9310835368935867e-06, + "loss": 0.3551, + "num_input_tokens_seen": 10167936, + "step": 3235 + }, + { + "epoch": 0.20741309775302477, + "grad_norm": 37.591766357421875, + "learning_rate": 1.93067533304614e-06, + "loss": 0.4241, + "num_input_tokens_seen": 10183360, + "step": 3240 + }, + { + "epoch": 0.20773317969400165, + "grad_norm": 31.755170822143555, + "learning_rate": 1.9302659672396128e-06, + "loss": 0.5624, + "num_input_tokens_seen": 10198208, + "step": 3245 + }, + { + "epoch": 0.20805326163497856, + "grad_norm": 27.73048210144043, + "learning_rate": 1.9298554399851025e-06, + "loss": 0.4975, + "num_input_tokens_seen": 10213568, + "step": 3250 + }, + { + "epoch": 0.20837334357595544, + "grad_norm": 38.50849533081055, + "learning_rate": 1.929443751795158e-06, + "loss": 0.4755, + "num_input_tokens_seen": 10230080, + "step": 3255 + }, + { + "epoch": 0.20869342551693235, + "grad_norm": 24.83016586303711, + "learning_rate": 1.929030903183776e-06, + "loss": 0.4792, + "num_input_tokens_seen": 10246912, + "step": 3260 + }, + { + "epoch": 0.20901350745790923, + "grad_norm": 40.69696044921875, + "learning_rate": 1.9286168946664033e-06, + "loss": 0.5231, + "num_input_tokens_seen": 10262464, + "step": 3265 + }, + { + "epoch": 0.2093335893988861, + "grad_norm": 65.39424896240234, + "learning_rate": 1.9282017267599352e-06, + "loss": 0.6606, + "num_input_tokens_seen": 10278016, + "step": 3270 + }, + { + "epoch": 0.209653671339863, + "grad_norm": 44.02311706542969, + "learning_rate": 1.9277853999827125e-06, + "loss": 0.5055, + "num_input_tokens_seen": 10293824, + "step": 3275 + }, + { + "epoch": 0.2099737532808399, + "grad_norm": 45.903785705566406, + "learning_rate": 1.9273679148545244e-06, + "loss": 0.5263, + "num_input_tokens_seen": 10309568, + "step": 3280 + }, + { + "epoch": 0.21029383522181677, + "grad_norm": 35.46440124511719, + "learning_rate": 1.9269492718966062e-06, + "loss": 0.4181, + "num_input_tokens_seen": 10325696, + "step": 3285 + }, + { + "epoch": 0.21061391716279368, + "grad_norm": 31.961286544799805, + "learning_rate": 1.9265294716316384e-06, + "loss": 0.5398, + "num_input_tokens_seen": 10342016, + "step": 3290 + }, + { + "epoch": 0.21093399910377056, + "grad_norm": 29.716991424560547, + "learning_rate": 1.926108514583747e-06, + "loss": 0.468, + "num_input_tokens_seen": 10357632, + "step": 3295 + }, + { + "epoch": 0.21125408104474747, + "grad_norm": 51.53056335449219, + "learning_rate": 1.925686401278501e-06, + "loss": 0.4805, + "num_input_tokens_seen": 10373056, + "step": 3300 + }, + { + "epoch": 0.21157416298572435, + "grad_norm": 55.39066696166992, + "learning_rate": 1.9252631322429143e-06, + "loss": 0.6377, + "num_input_tokens_seen": 10389248, + "step": 3305 + }, + { + "epoch": 0.21189424492670123, + "grad_norm": 25.772296905517578, + "learning_rate": 1.9248387080054435e-06, + "loss": 0.4445, + "num_input_tokens_seen": 10404864, + "step": 3310 + }, + { + "epoch": 0.21221432686767813, + "grad_norm": 19.97397232055664, + "learning_rate": 1.9244131290959864e-06, + "loss": 0.4925, + "num_input_tokens_seen": 10420416, + "step": 3315 + }, + { + "epoch": 0.212534408808655, + "grad_norm": 32.75675964355469, + "learning_rate": 1.9239863960458845e-06, + "loss": 0.4311, + "num_input_tokens_seen": 10435456, + "step": 3320 + }, + { + "epoch": 0.21285449074963192, + "grad_norm": 33.047603607177734, + "learning_rate": 1.923558509387918e-06, + "loss": 0.4857, + "num_input_tokens_seen": 10451584, + "step": 3325 + }, + { + "epoch": 0.2131745726906088, + "grad_norm": 39.12358474731445, + "learning_rate": 1.9231294696563086e-06, + "loss": 0.3719, + "num_input_tokens_seen": 10467584, + "step": 3330 + }, + { + "epoch": 0.21349465463158568, + "grad_norm": 61.4869384765625, + "learning_rate": 1.922699277386718e-06, + "loss": 0.432, + "num_input_tokens_seen": 10483264, + "step": 3335 + }, + { + "epoch": 0.21381473657256259, + "grad_norm": 35.242061614990234, + "learning_rate": 1.9222679331162454e-06, + "loss": 0.5869, + "num_input_tokens_seen": 10498560, + "step": 3340 + }, + { + "epoch": 0.21413481851353947, + "grad_norm": 37.8232536315918, + "learning_rate": 1.92183543738343e-06, + "loss": 0.4431, + "num_input_tokens_seen": 10514176, + "step": 3345 + }, + { + "epoch": 0.21445490045451635, + "grad_norm": 26.13767433166504, + "learning_rate": 1.9214017907282475e-06, + "loss": 0.4427, + "num_input_tokens_seen": 10529792, + "step": 3350 + }, + { + "epoch": 0.21477498239549325, + "grad_norm": 38.66913986206055, + "learning_rate": 1.9209669936921105e-06, + "loss": 0.499, + "num_input_tokens_seen": 10545856, + "step": 3355 + }, + { + "epoch": 0.21509506433647013, + "grad_norm": 45.678104400634766, + "learning_rate": 1.920531046817869e-06, + "loss": 0.4092, + "num_input_tokens_seen": 10562368, + "step": 3360 + }, + { + "epoch": 0.21541514627744704, + "grad_norm": 47.112674713134766, + "learning_rate": 1.9200939506498067e-06, + "loss": 0.6207, + "num_input_tokens_seen": 10577280, + "step": 3365 + }, + { + "epoch": 0.21573522821842392, + "grad_norm": 29.5268497467041, + "learning_rate": 1.9196557057336446e-06, + "loss": 0.5719, + "num_input_tokens_seen": 10592384, + "step": 3370 + }, + { + "epoch": 0.2160553101594008, + "grad_norm": 25.944847106933594, + "learning_rate": 1.9192163126165354e-06, + "loss": 0.4577, + "num_input_tokens_seen": 10608704, + "step": 3375 + }, + { + "epoch": 0.2163753921003777, + "grad_norm": 44.30316925048828, + "learning_rate": 1.9187757718470673e-06, + "loss": 0.4011, + "num_input_tokens_seen": 10625280, + "step": 3380 + }, + { + "epoch": 0.21669547404135459, + "grad_norm": 23.519418716430664, + "learning_rate": 1.9183340839752606e-06, + "loss": 0.5336, + "num_input_tokens_seen": 10641152, + "step": 3385 + }, + { + "epoch": 0.21701555598233147, + "grad_norm": 30.161663055419922, + "learning_rate": 1.9178912495525672e-06, + "loss": 0.4227, + "num_input_tokens_seen": 10657472, + "step": 3390 + }, + { + "epoch": 0.21733563792330837, + "grad_norm": 24.444168090820312, + "learning_rate": 1.917447269131872e-06, + "loss": 0.4942, + "num_input_tokens_seen": 10673600, + "step": 3395 + }, + { + "epoch": 0.21765571986428525, + "grad_norm": 37.56548309326172, + "learning_rate": 1.917002143267489e-06, + "loss": 0.5717, + "num_input_tokens_seen": 10689344, + "step": 3400 + }, + { + "epoch": 0.21797580180526216, + "grad_norm": 27.281709671020508, + "learning_rate": 1.9165558725151633e-06, + "loss": 0.4315, + "num_input_tokens_seen": 10704384, + "step": 3405 + }, + { + "epoch": 0.21829588374623904, + "grad_norm": 58.759857177734375, + "learning_rate": 1.9161084574320692e-06, + "loss": 0.4937, + "num_input_tokens_seen": 10720512, + "step": 3410 + }, + { + "epoch": 0.21861596568721592, + "grad_norm": 34.621681213378906, + "learning_rate": 1.91565989857681e-06, + "loss": 0.485, + "num_input_tokens_seen": 10735744, + "step": 3415 + }, + { + "epoch": 0.21893604762819283, + "grad_norm": 32.12639617919922, + "learning_rate": 1.9152101965094162e-06, + "loss": 0.4665, + "num_input_tokens_seen": 10750848, + "step": 3420 + }, + { + "epoch": 0.2192561295691697, + "grad_norm": 49.00548553466797, + "learning_rate": 1.9147593517913464e-06, + "loss": 0.4939, + "num_input_tokens_seen": 10765632, + "step": 3425 + }, + { + "epoch": 0.21957621151014659, + "grad_norm": 18.40258026123047, + "learning_rate": 1.914307364985485e-06, + "loss": 0.3868, + "num_input_tokens_seen": 10780928, + "step": 3430 + }, + { + "epoch": 0.2198962934511235, + "grad_norm": 26.393571853637695, + "learning_rate": 1.913854236656144e-06, + "loss": 0.4273, + "num_input_tokens_seen": 10796864, + "step": 3435 + }, + { + "epoch": 0.22021637539210037, + "grad_norm": 42.73613357543945, + "learning_rate": 1.9133999673690584e-06, + "loss": 0.4643, + "num_input_tokens_seen": 10812672, + "step": 3440 + }, + { + "epoch": 0.22053645733307728, + "grad_norm": 46.20648193359375, + "learning_rate": 1.9129445576913886e-06, + "loss": 0.4782, + "num_input_tokens_seen": 10828544, + "step": 3445 + }, + { + "epoch": 0.22085653927405416, + "grad_norm": 25.036144256591797, + "learning_rate": 1.91248800819172e-06, + "loss": 0.5318, + "num_input_tokens_seen": 10844288, + "step": 3450 + }, + { + "epoch": 0.22117662121503104, + "grad_norm": 49.720008850097656, + "learning_rate": 1.912030319440059e-06, + "loss": 0.5306, + "num_input_tokens_seen": 10860160, + "step": 3455 + }, + { + "epoch": 0.22149670315600795, + "grad_norm": 37.309383392333984, + "learning_rate": 1.9115714920078354e-06, + "loss": 0.6076, + "num_input_tokens_seen": 10875968, + "step": 3460 + }, + { + "epoch": 0.22181678509698483, + "grad_norm": 27.302000045776367, + "learning_rate": 1.9111115264679017e-06, + "loss": 0.3367, + "num_input_tokens_seen": 10892096, + "step": 3465 + }, + { + "epoch": 0.2221368670379617, + "grad_norm": 45.5595703125, + "learning_rate": 1.910650423394529e-06, + "loss": 0.4611, + "num_input_tokens_seen": 10908544, + "step": 3470 + }, + { + "epoch": 0.2224569489789386, + "grad_norm": 47.52442169189453, + "learning_rate": 1.910188183363411e-06, + "loss": 0.4804, + "num_input_tokens_seen": 10924544, + "step": 3475 + }, + { + "epoch": 0.2227770309199155, + "grad_norm": 50.32191467285156, + "learning_rate": 1.909724806951659e-06, + "loss": 0.4408, + "num_input_tokens_seen": 10941888, + "step": 3480 + }, + { + "epoch": 0.2230971128608924, + "grad_norm": 49.5562858581543, + "learning_rate": 1.909260294737804e-06, + "loss": 0.4689, + "num_input_tokens_seen": 10958592, + "step": 3485 + }, + { + "epoch": 0.22341719480186928, + "grad_norm": 82.99776458740234, + "learning_rate": 1.9087946473017953e-06, + "loss": 0.5319, + "num_input_tokens_seen": 10974208, + "step": 3490 + }, + { + "epoch": 0.22373727674284616, + "grad_norm": 33.70225524902344, + "learning_rate": 1.9083278652249992e-06, + "loss": 0.4363, + "num_input_tokens_seen": 10988928, + "step": 3495 + }, + { + "epoch": 0.22405735868382307, + "grad_norm": 38.13908386230469, + "learning_rate": 1.9078599490901983e-06, + "loss": 0.4327, + "num_input_tokens_seen": 11005952, + "step": 3500 + }, + { + "epoch": 0.22437744062479995, + "grad_norm": 96.11404418945312, + "learning_rate": 1.9073908994815914e-06, + "loss": 0.4012, + "num_input_tokens_seen": 11020608, + "step": 3505 + }, + { + "epoch": 0.22469752256577685, + "grad_norm": 50.33843994140625, + "learning_rate": 1.9069207169847928e-06, + "loss": 0.4999, + "num_input_tokens_seen": 11036736, + "step": 3510 + }, + { + "epoch": 0.22501760450675373, + "grad_norm": 34.01993179321289, + "learning_rate": 1.9064494021868302e-06, + "loss": 0.3645, + "num_input_tokens_seen": 11052480, + "step": 3515 + }, + { + "epoch": 0.2253376864477306, + "grad_norm": 38.320194244384766, + "learning_rate": 1.9059769556761464e-06, + "loss": 0.4816, + "num_input_tokens_seen": 11068416, + "step": 3520 + }, + { + "epoch": 0.22565776838870752, + "grad_norm": 32.70565414428711, + "learning_rate": 1.9055033780425962e-06, + "loss": 0.4443, + "num_input_tokens_seen": 11086400, + "step": 3525 + }, + { + "epoch": 0.2259778503296844, + "grad_norm": 87.77069854736328, + "learning_rate": 1.9050286698774464e-06, + "loss": 0.5674, + "num_input_tokens_seen": 11102848, + "step": 3530 + }, + { + "epoch": 0.22629793227066128, + "grad_norm": 41.02049255371094, + "learning_rate": 1.904552831773376e-06, + "loss": 0.5366, + "num_input_tokens_seen": 11118080, + "step": 3535 + }, + { + "epoch": 0.22661801421163819, + "grad_norm": 25.107044219970703, + "learning_rate": 1.9040758643244748e-06, + "loss": 0.5045, + "num_input_tokens_seen": 11133120, + "step": 3540 + }, + { + "epoch": 0.22693809615261507, + "grad_norm": 30.775938034057617, + "learning_rate": 1.903597768126242e-06, + "loss": 0.4452, + "num_input_tokens_seen": 11150144, + "step": 3545 + }, + { + "epoch": 0.22725817809359197, + "grad_norm": 53.267887115478516, + "learning_rate": 1.9031185437755862e-06, + "loss": 0.4862, + "num_input_tokens_seen": 11165760, + "step": 3550 + }, + { + "epoch": 0.22757826003456885, + "grad_norm": 52.50774383544922, + "learning_rate": 1.9026381918708246e-06, + "loss": 0.4948, + "num_input_tokens_seen": 11180096, + "step": 3555 + }, + { + "epoch": 0.22789834197554573, + "grad_norm": 20.394287109375, + "learning_rate": 1.9021567130116822e-06, + "loss": 0.3775, + "num_input_tokens_seen": 11195584, + "step": 3560 + }, + { + "epoch": 0.22821842391652264, + "grad_norm": 48.595298767089844, + "learning_rate": 1.9016741077992916e-06, + "loss": 0.389, + "num_input_tokens_seen": 11210944, + "step": 3565 + }, + { + "epoch": 0.22853850585749952, + "grad_norm": 27.00090980529785, + "learning_rate": 1.90119037683619e-06, + "loss": 0.4008, + "num_input_tokens_seen": 11227392, + "step": 3570 + }, + { + "epoch": 0.2288585877984764, + "grad_norm": 31.237030029296875, + "learning_rate": 1.9007055207263223e-06, + "loss": 0.6598, + "num_input_tokens_seen": 11244416, + "step": 3575 + }, + { + "epoch": 0.2291786697394533, + "grad_norm": 29.584184646606445, + "learning_rate": 1.900219540075036e-06, + "loss": 0.3584, + "num_input_tokens_seen": 11260672, + "step": 3580 + }, + { + "epoch": 0.22949875168043019, + "grad_norm": 58.92024612426758, + "learning_rate": 1.8997324354890845e-06, + "loss": 0.4823, + "num_input_tokens_seen": 11277504, + "step": 3585 + }, + { + "epoch": 0.2298188336214071, + "grad_norm": 71.05110931396484, + "learning_rate": 1.8992442075766233e-06, + "loss": 0.5325, + "num_input_tokens_seen": 11293184, + "step": 3590 + }, + { + "epoch": 0.23013891556238397, + "grad_norm": 31.41910743713379, + "learning_rate": 1.8987548569472105e-06, + "loss": 0.3273, + "num_input_tokens_seen": 11308480, + "step": 3595 + }, + { + "epoch": 0.23045899750336085, + "grad_norm": 31.32626724243164, + "learning_rate": 1.8982643842118064e-06, + "loss": 0.3958, + "num_input_tokens_seen": 11323840, + "step": 3600 + }, + { + "epoch": 0.23077907944433776, + "grad_norm": 66.50199127197266, + "learning_rate": 1.8977727899827716e-06, + "loss": 0.5822, + "num_input_tokens_seen": 11339456, + "step": 3605 + }, + { + "epoch": 0.23109916138531464, + "grad_norm": 49.916748046875, + "learning_rate": 1.8972800748738678e-06, + "loss": 0.6628, + "num_input_tokens_seen": 11354880, + "step": 3610 + }, + { + "epoch": 0.23141924332629152, + "grad_norm": 28.426061630249023, + "learning_rate": 1.896786239500255e-06, + "loss": 0.5365, + "num_input_tokens_seen": 11369984, + "step": 3615 + }, + { + "epoch": 0.23173932526726843, + "grad_norm": 51.97602081298828, + "learning_rate": 1.8962912844784928e-06, + "loss": 0.4328, + "num_input_tokens_seen": 11384640, + "step": 3620 + }, + { + "epoch": 0.2320594072082453, + "grad_norm": 53.62090301513672, + "learning_rate": 1.8957952104265384e-06, + "loss": 0.5017, + "num_input_tokens_seen": 11401152, + "step": 3625 + }, + { + "epoch": 0.2323794891492222, + "grad_norm": 34.26565170288086, + "learning_rate": 1.8952980179637458e-06, + "loss": 0.4551, + "num_input_tokens_seen": 11416896, + "step": 3630 + }, + { + "epoch": 0.2326995710901991, + "grad_norm": 36.66518020629883, + "learning_rate": 1.8947997077108662e-06, + "loss": 0.5002, + "num_input_tokens_seen": 11432832, + "step": 3635 + }, + { + "epoch": 0.23301965303117597, + "grad_norm": 32.46730041503906, + "learning_rate": 1.894300280290045e-06, + "loss": 0.5022, + "num_input_tokens_seen": 11448320, + "step": 3640 + }, + { + "epoch": 0.23333973497215288, + "grad_norm": 25.59243392944336, + "learning_rate": 1.8937997363248237e-06, + "loss": 0.5691, + "num_input_tokens_seen": 11463488, + "step": 3645 + }, + { + "epoch": 0.23365981691312976, + "grad_norm": 21.926359176635742, + "learning_rate": 1.8932980764401373e-06, + "loss": 0.4616, + "num_input_tokens_seen": 11478592, + "step": 3650 + }, + { + "epoch": 0.23397989885410664, + "grad_norm": 26.116849899291992, + "learning_rate": 1.8927953012623141e-06, + "loss": 0.367, + "num_input_tokens_seen": 11494720, + "step": 3655 + }, + { + "epoch": 0.23429998079508355, + "grad_norm": 56.29279708862305, + "learning_rate": 1.8922914114190744e-06, + "loss": 0.4884, + "num_input_tokens_seen": 11511232, + "step": 3660 + }, + { + "epoch": 0.23462006273606043, + "grad_norm": 33.87401580810547, + "learning_rate": 1.8917864075395312e-06, + "loss": 0.5212, + "num_input_tokens_seen": 11527040, + "step": 3665 + }, + { + "epoch": 0.23494014467703733, + "grad_norm": 19.292613983154297, + "learning_rate": 1.8912802902541873e-06, + "loss": 0.4641, + "num_input_tokens_seen": 11542528, + "step": 3670 + }, + { + "epoch": 0.2352602266180142, + "grad_norm": 40.246734619140625, + "learning_rate": 1.8907730601949362e-06, + "loss": 0.503, + "num_input_tokens_seen": 11557696, + "step": 3675 + }, + { + "epoch": 0.2355803085589911, + "grad_norm": 50.62693786621094, + "learning_rate": 1.8902647179950608e-06, + "loss": 0.4703, + "num_input_tokens_seen": 11574848, + "step": 3680 + }, + { + "epoch": 0.235900390499968, + "grad_norm": 45.66773986816406, + "learning_rate": 1.889755264289232e-06, + "loss": 0.5074, + "num_input_tokens_seen": 11589696, + "step": 3685 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 20.744386672973633, + "learning_rate": 1.8892446997135087e-06, + "loss": 0.3915, + "num_input_tokens_seen": 11606848, + "step": 3690 + }, + { + "epoch": 0.23654055438192176, + "grad_norm": 30.993366241455078, + "learning_rate": 1.888733024905337e-06, + "loss": 0.6641, + "num_input_tokens_seen": 11623744, + "step": 3695 + }, + { + "epoch": 0.23686063632289867, + "grad_norm": 38.157161712646484, + "learning_rate": 1.888220240503549e-06, + "loss": 0.4874, + "num_input_tokens_seen": 11640256, + "step": 3700 + }, + { + "epoch": 0.23718071826387555, + "grad_norm": 33.60673522949219, + "learning_rate": 1.8877063471483618e-06, + "loss": 0.4096, + "num_input_tokens_seen": 11655744, + "step": 3705 + }, + { + "epoch": 0.23750080020485245, + "grad_norm": 17.36272621154785, + "learning_rate": 1.8871913454813772e-06, + "loss": 0.2877, + "num_input_tokens_seen": 11671104, + "step": 3710 + }, + { + "epoch": 0.23782088214582933, + "grad_norm": 31.4836483001709, + "learning_rate": 1.886675236145581e-06, + "loss": 0.3741, + "num_input_tokens_seen": 11686848, + "step": 3715 + }, + { + "epoch": 0.2381409640868062, + "grad_norm": 29.86385726928711, + "learning_rate": 1.8861580197853422e-06, + "loss": 0.5053, + "num_input_tokens_seen": 11701952, + "step": 3720 + }, + { + "epoch": 0.23846104602778312, + "grad_norm": 39.52205276489258, + "learning_rate": 1.8856396970464105e-06, + "loss": 0.4637, + "num_input_tokens_seen": 11718592, + "step": 3725 + }, + { + "epoch": 0.23878112796876, + "grad_norm": 36.30428695678711, + "learning_rate": 1.8851202685759189e-06, + "loss": 0.5129, + "num_input_tokens_seen": 11734208, + "step": 3730 + }, + { + "epoch": 0.2391012099097369, + "grad_norm": 12.694880485534668, + "learning_rate": 1.8845997350223792e-06, + "loss": 0.4186, + "num_input_tokens_seen": 11748992, + "step": 3735 + }, + { + "epoch": 0.23942129185071379, + "grad_norm": 29.11704444885254, + "learning_rate": 1.8840780970356842e-06, + "loss": 0.4258, + "num_input_tokens_seen": 11764608, + "step": 3740 + }, + { + "epoch": 0.23974137379169067, + "grad_norm": 28.726520538330078, + "learning_rate": 1.8835553552671048e-06, + "loss": 0.3919, + "num_input_tokens_seen": 11780800, + "step": 3745 + }, + { + "epoch": 0.24006145573266757, + "grad_norm": 32.10429382324219, + "learning_rate": 1.8830315103692902e-06, + "loss": 0.4467, + "num_input_tokens_seen": 11795776, + "step": 3750 + }, + { + "epoch": 0.24038153767364445, + "grad_norm": 36.08988571166992, + "learning_rate": 1.8825065629962669e-06, + "loss": 0.52, + "num_input_tokens_seen": 11811776, + "step": 3755 + }, + { + "epoch": 0.24070161961462133, + "grad_norm": 34.568302154541016, + "learning_rate": 1.881980513803438e-06, + "loss": 0.4902, + "num_input_tokens_seen": 11828224, + "step": 3760 + }, + { + "epoch": 0.24102170155559824, + "grad_norm": 49.93181228637695, + "learning_rate": 1.881453363447582e-06, + "loss": 0.5093, + "num_input_tokens_seen": 11843904, + "step": 3765 + }, + { + "epoch": 0.24134178349657512, + "grad_norm": 58.88014221191406, + "learning_rate": 1.880925112586852e-06, + "loss": 0.5653, + "num_input_tokens_seen": 11859392, + "step": 3770 + }, + { + "epoch": 0.24166186543755203, + "grad_norm": 48.48269271850586, + "learning_rate": 1.8803957618807762e-06, + "loss": 0.4347, + "num_input_tokens_seen": 11875968, + "step": 3775 + }, + { + "epoch": 0.2419819473785289, + "grad_norm": 70.41657257080078, + "learning_rate": 1.8798653119902548e-06, + "loss": 0.4527, + "num_input_tokens_seen": 11891584, + "step": 3780 + }, + { + "epoch": 0.24230202931950579, + "grad_norm": 27.50962257385254, + "learning_rate": 1.8793337635775603e-06, + "loss": 0.4997, + "num_input_tokens_seen": 11906944, + "step": 3785 + }, + { + "epoch": 0.2426221112604827, + "grad_norm": 47.6135139465332, + "learning_rate": 1.8788011173063376e-06, + "loss": 0.4883, + "num_input_tokens_seen": 11922368, + "step": 3790 + }, + { + "epoch": 0.24294219320145957, + "grad_norm": 50.76975631713867, + "learning_rate": 1.8782673738416018e-06, + "loss": 0.5193, + "num_input_tokens_seen": 11938432, + "step": 3795 + }, + { + "epoch": 0.24326227514243645, + "grad_norm": 48.272972106933594, + "learning_rate": 1.877732533849737e-06, + "loss": 0.5232, + "num_input_tokens_seen": 11956608, + "step": 3800 + }, + { + "epoch": 0.24358235708341336, + "grad_norm": 27.233749389648438, + "learning_rate": 1.8771965979984988e-06, + "loss": 0.4473, + "num_input_tokens_seen": 11972480, + "step": 3805 + }, + { + "epoch": 0.24390243902439024, + "grad_norm": 23.872587203979492, + "learning_rate": 1.8766595669570084e-06, + "loss": 0.3903, + "num_input_tokens_seen": 11987072, + "step": 3810 + }, + { + "epoch": 0.24422252096536715, + "grad_norm": 32.66127395629883, + "learning_rate": 1.8761214413957553e-06, + "loss": 0.4257, + "num_input_tokens_seen": 12002112, + "step": 3815 + }, + { + "epoch": 0.24454260290634403, + "grad_norm": 24.8861026763916, + "learning_rate": 1.8755822219865963e-06, + "loss": 0.3607, + "num_input_tokens_seen": 12016960, + "step": 3820 + }, + { + "epoch": 0.2448626848473209, + "grad_norm": 68.05755615234375, + "learning_rate": 1.875041909402752e-06, + "loss": 0.4264, + "num_input_tokens_seen": 12032576, + "step": 3825 + }, + { + "epoch": 0.2451827667882978, + "grad_norm": 25.900711059570312, + "learning_rate": 1.8745005043188102e-06, + "loss": 0.3694, + "num_input_tokens_seen": 12048768, + "step": 3830 + }, + { + "epoch": 0.2455028487292747, + "grad_norm": 34.87873077392578, + "learning_rate": 1.8739580074107208e-06, + "loss": 0.3828, + "num_input_tokens_seen": 12065088, + "step": 3835 + }, + { + "epoch": 0.24582293067025157, + "grad_norm": 41.93102264404297, + "learning_rate": 1.873414419355798e-06, + "loss": 0.7066, + "num_input_tokens_seen": 12080704, + "step": 3840 + }, + { + "epoch": 0.24614301261122848, + "grad_norm": 43.951683044433594, + "learning_rate": 1.872869740832717e-06, + "loss": 0.4319, + "num_input_tokens_seen": 12096704, + "step": 3845 + }, + { + "epoch": 0.24646309455220536, + "grad_norm": 38.640892028808594, + "learning_rate": 1.8723239725215165e-06, + "loss": 0.5962, + "num_input_tokens_seen": 12111488, + "step": 3850 + }, + { + "epoch": 0.24678317649318227, + "grad_norm": 22.83368682861328, + "learning_rate": 1.871777115103594e-06, + "loss": 0.4195, + "num_input_tokens_seen": 12128192, + "step": 3855 + }, + { + "epoch": 0.24710325843415915, + "grad_norm": 21.497661590576172, + "learning_rate": 1.8712291692617074e-06, + "loss": 0.4734, + "num_input_tokens_seen": 12143808, + "step": 3860 + }, + { + "epoch": 0.24742334037513602, + "grad_norm": 35.97737121582031, + "learning_rate": 1.8706801356799735e-06, + "loss": 0.496, + "num_input_tokens_seen": 12159232, + "step": 3865 + }, + { + "epoch": 0.24774342231611293, + "grad_norm": 28.39885139465332, + "learning_rate": 1.8701300150438674e-06, + "loss": 0.4515, + "num_input_tokens_seen": 12175360, + "step": 3870 + }, + { + "epoch": 0.2480635042570898, + "grad_norm": 18.00739288330078, + "learning_rate": 1.869578808040221e-06, + "loss": 0.4208, + "num_input_tokens_seen": 12190272, + "step": 3875 + }, + { + "epoch": 0.2483835861980667, + "grad_norm": 48.9376106262207, + "learning_rate": 1.869026515357223e-06, + "loss": 0.5226, + "num_input_tokens_seen": 12208448, + "step": 3880 + }, + { + "epoch": 0.2487036681390436, + "grad_norm": 39.13751220703125, + "learning_rate": 1.8684731376844169e-06, + "loss": 0.6458, + "num_input_tokens_seen": 12225984, + "step": 3885 + }, + { + "epoch": 0.24902375008002048, + "grad_norm": 35.23835372924805, + "learning_rate": 1.8679186757127014e-06, + "loss": 0.5022, + "num_input_tokens_seen": 12241408, + "step": 3890 + }, + { + "epoch": 0.24934383202099739, + "grad_norm": 33.123191833496094, + "learning_rate": 1.8673631301343288e-06, + "loss": 0.4355, + "num_input_tokens_seen": 12256064, + "step": 3895 + }, + { + "epoch": 0.24966391396197427, + "grad_norm": 31.406911849975586, + "learning_rate": 1.8668065016429044e-06, + "loss": 0.4515, + "num_input_tokens_seen": 12272832, + "step": 3900 + }, + { + "epoch": 0.24998399590295114, + "grad_norm": 23.97220230102539, + "learning_rate": 1.866248790933385e-06, + "loss": 0.5368, + "num_input_tokens_seen": 12289024, + "step": 3905 + }, + { + "epoch": 0.25030407784392805, + "grad_norm": 27.529621124267578, + "learning_rate": 1.8656899987020795e-06, + "loss": 0.4277, + "num_input_tokens_seen": 12304064, + "step": 3910 + }, + { + "epoch": 0.25030407784392805, + "eval_loss": 0.46774157881736755, + "eval_runtime": 50.7199, + "eval_samples_per_second": 273.778, + "eval_steps_per_second": 34.227, + "num_input_tokens_seen": 12304064, + "step": 3910 + }, + { + "epoch": 0.25062415978490493, + "grad_norm": 33.151676177978516, + "learning_rate": 1.865130125646646e-06, + "loss": 0.4665, + "num_input_tokens_seen": 12320256, + "step": 3915 + }, + { + "epoch": 0.2509442417258818, + "grad_norm": 22.091079711914062, + "learning_rate": 1.8645691724660933e-06, + "loss": 0.4426, + "num_input_tokens_seen": 12335360, + "step": 3920 + }, + { + "epoch": 0.2512643236668587, + "grad_norm": 25.529563903808594, + "learning_rate": 1.8640071398607774e-06, + "loss": 0.4718, + "num_input_tokens_seen": 12351488, + "step": 3925 + }, + { + "epoch": 0.2515844056078356, + "grad_norm": 64.41213989257812, + "learning_rate": 1.8634440285324024e-06, + "loss": 0.6284, + "num_input_tokens_seen": 12365952, + "step": 3930 + }, + { + "epoch": 0.2519044875488125, + "grad_norm": 53.673301696777344, + "learning_rate": 1.8628798391840205e-06, + "loss": 0.4716, + "num_input_tokens_seen": 12381376, + "step": 3935 + }, + { + "epoch": 0.2522245694897894, + "grad_norm": 60.03094482421875, + "learning_rate": 1.8623145725200277e-06, + "loss": 0.4596, + "num_input_tokens_seen": 12396160, + "step": 3940 + }, + { + "epoch": 0.25254465143076626, + "grad_norm": 29.19440460205078, + "learning_rate": 1.8617482292461664e-06, + "loss": 0.4591, + "num_input_tokens_seen": 12410944, + "step": 3945 + }, + { + "epoch": 0.25286473337174314, + "grad_norm": 28.456212997436523, + "learning_rate": 1.861180810069523e-06, + "loss": 0.4216, + "num_input_tokens_seen": 12426304, + "step": 3950 + }, + { + "epoch": 0.2531848153127201, + "grad_norm": 44.63097381591797, + "learning_rate": 1.8606123156985268e-06, + "loss": 0.4785, + "num_input_tokens_seen": 12442432, + "step": 3955 + }, + { + "epoch": 0.25350489725369696, + "grad_norm": 21.59270477294922, + "learning_rate": 1.8600427468429496e-06, + "loss": 0.4666, + "num_input_tokens_seen": 12458368, + "step": 3960 + }, + { + "epoch": 0.25382497919467384, + "grad_norm": 32.624305725097656, + "learning_rate": 1.8594721042139052e-06, + "loss": 0.433, + "num_input_tokens_seen": 12474368, + "step": 3965 + }, + { + "epoch": 0.2541450611356507, + "grad_norm": 19.357770919799805, + "learning_rate": 1.858900388523847e-06, + "loss": 0.423, + "num_input_tokens_seen": 12490176, + "step": 3970 + }, + { + "epoch": 0.2544651430766276, + "grad_norm": 27.158327102661133, + "learning_rate": 1.8583276004865694e-06, + "loss": 0.4523, + "num_input_tokens_seen": 12507840, + "step": 3975 + }, + { + "epoch": 0.25478522501760453, + "grad_norm": 35.72364044189453, + "learning_rate": 1.8577537408172046e-06, + "loss": 0.352, + "num_input_tokens_seen": 12523520, + "step": 3980 + }, + { + "epoch": 0.2551053069585814, + "grad_norm": 32.988182067871094, + "learning_rate": 1.8571788102322234e-06, + "loss": 0.5448, + "num_input_tokens_seen": 12540736, + "step": 3985 + }, + { + "epoch": 0.2554253888995583, + "grad_norm": 35.66259002685547, + "learning_rate": 1.8566028094494332e-06, + "loss": 0.4698, + "num_input_tokens_seen": 12556352, + "step": 3990 + }, + { + "epoch": 0.25574547084053517, + "grad_norm": 21.36469268798828, + "learning_rate": 1.8560257391879778e-06, + "loss": 0.3732, + "num_input_tokens_seen": 12570688, + "step": 3995 + }, + { + "epoch": 0.25606555278151205, + "grad_norm": 16.891489028930664, + "learning_rate": 1.855447600168336e-06, + "loss": 0.4079, + "num_input_tokens_seen": 12585984, + "step": 4000 + }, + { + "epoch": 0.25638563472248893, + "grad_norm": 19.356708526611328, + "learning_rate": 1.8548683931123215e-06, + "loss": 0.4732, + "num_input_tokens_seen": 12601216, + "step": 4005 + }, + { + "epoch": 0.25670571666346587, + "grad_norm": 66.04039764404297, + "learning_rate": 1.8542881187430807e-06, + "loss": 0.4471, + "num_input_tokens_seen": 12618624, + "step": 4010 + }, + { + "epoch": 0.25702579860444275, + "grad_norm": 27.081804275512695, + "learning_rate": 1.8537067777850935e-06, + "loss": 0.5899, + "num_input_tokens_seen": 12635840, + "step": 4015 + }, + { + "epoch": 0.2573458805454196, + "grad_norm": 22.54844093322754, + "learning_rate": 1.8531243709641704e-06, + "loss": 0.359, + "num_input_tokens_seen": 12651904, + "step": 4020 + }, + { + "epoch": 0.2576659624863965, + "grad_norm": 37.674034118652344, + "learning_rate": 1.8525408990074533e-06, + "loss": 0.4977, + "num_input_tokens_seen": 12666944, + "step": 4025 + }, + { + "epoch": 0.2579860444273734, + "grad_norm": 23.49472999572754, + "learning_rate": 1.851956362643414e-06, + "loss": 0.4184, + "num_input_tokens_seen": 12682688, + "step": 4030 + }, + { + "epoch": 0.2583061263683503, + "grad_norm": 44.18896484375, + "learning_rate": 1.851370762601853e-06, + "loss": 0.5578, + "num_input_tokens_seen": 12698304, + "step": 4035 + }, + { + "epoch": 0.2586262083093272, + "grad_norm": 42.4050178527832, + "learning_rate": 1.8507840996138983e-06, + "loss": 0.5083, + "num_input_tokens_seen": 12712896, + "step": 4040 + }, + { + "epoch": 0.2589462902503041, + "grad_norm": 63.08219909667969, + "learning_rate": 1.8501963744120062e-06, + "loss": 0.3908, + "num_input_tokens_seen": 12727488, + "step": 4045 + }, + { + "epoch": 0.25926637219128096, + "grad_norm": 35.79430389404297, + "learning_rate": 1.849607587729958e-06, + "loss": 0.408, + "num_input_tokens_seen": 12742720, + "step": 4050 + }, + { + "epoch": 0.25958645413225784, + "grad_norm": 28.8338623046875, + "learning_rate": 1.8490177403028615e-06, + "loss": 0.3966, + "num_input_tokens_seen": 12757760, + "step": 4055 + }, + { + "epoch": 0.2599065360732348, + "grad_norm": 44.3582878112793, + "learning_rate": 1.8484268328671475e-06, + "loss": 0.4966, + "num_input_tokens_seen": 12773312, + "step": 4060 + }, + { + "epoch": 0.26022661801421165, + "grad_norm": 41.44272994995117, + "learning_rate": 1.847834866160571e-06, + "loss": 0.5448, + "num_input_tokens_seen": 12790336, + "step": 4065 + }, + { + "epoch": 0.26054669995518853, + "grad_norm": 26.90788459777832, + "learning_rate": 1.847241840922209e-06, + "loss": 0.4919, + "num_input_tokens_seen": 12805632, + "step": 4070 + }, + { + "epoch": 0.2608667818961654, + "grad_norm": 36.868736267089844, + "learning_rate": 1.8466477578924616e-06, + "loss": 0.4875, + "num_input_tokens_seen": 12821184, + "step": 4075 + }, + { + "epoch": 0.2611868638371423, + "grad_norm": 32.42481994628906, + "learning_rate": 1.8460526178130472e-06, + "loss": 0.5004, + "num_input_tokens_seen": 12836544, + "step": 4080 + }, + { + "epoch": 0.26150694577811917, + "grad_norm": 38.77480697631836, + "learning_rate": 1.8454564214270056e-06, + "loss": 0.436, + "num_input_tokens_seen": 12852032, + "step": 4085 + }, + { + "epoch": 0.2618270277190961, + "grad_norm": 47.80220031738281, + "learning_rate": 1.8448591694786955e-06, + "loss": 0.4469, + "num_input_tokens_seen": 12867456, + "step": 4090 + }, + { + "epoch": 0.262147109660073, + "grad_norm": 34.1256103515625, + "learning_rate": 1.8442608627137925e-06, + "loss": 0.341, + "num_input_tokens_seen": 12885184, + "step": 4095 + }, + { + "epoch": 0.26246719160104987, + "grad_norm": 31.046709060668945, + "learning_rate": 1.8436615018792897e-06, + "loss": 0.3896, + "num_input_tokens_seen": 12900416, + "step": 4100 + }, + { + "epoch": 0.26278727354202674, + "grad_norm": 38.95481872558594, + "learning_rate": 1.8430610877234957e-06, + "loss": 0.5792, + "num_input_tokens_seen": 12915648, + "step": 4105 + }, + { + "epoch": 0.2631073554830036, + "grad_norm": 15.715054512023926, + "learning_rate": 1.8424596209960356e-06, + "loss": 0.4624, + "num_input_tokens_seen": 12930368, + "step": 4110 + }, + { + "epoch": 0.26342743742398056, + "grad_norm": 25.71843910217285, + "learning_rate": 1.8418571024478466e-06, + "loss": 0.5265, + "num_input_tokens_seen": 12945472, + "step": 4115 + }, + { + "epoch": 0.26374751936495744, + "grad_norm": 23.532575607299805, + "learning_rate": 1.8412535328311812e-06, + "loss": 0.491, + "num_input_tokens_seen": 12961472, + "step": 4120 + }, + { + "epoch": 0.2640676013059343, + "grad_norm": 50.43345260620117, + "learning_rate": 1.8406489128996023e-06, + "loss": 0.5816, + "num_input_tokens_seen": 12975872, + "step": 4125 + }, + { + "epoch": 0.2643876832469112, + "grad_norm": 34.13943862915039, + "learning_rate": 1.8400432434079853e-06, + "loss": 0.529, + "num_input_tokens_seen": 12992128, + "step": 4130 + }, + { + "epoch": 0.2647077651878881, + "grad_norm": 18.359914779663086, + "learning_rate": 1.8394365251125162e-06, + "loss": 0.4095, + "num_input_tokens_seen": 13021184, + "step": 4135 + }, + { + "epoch": 0.265027847128865, + "grad_norm": 33.158809661865234, + "learning_rate": 1.8388287587706888e-06, + "loss": 0.4425, + "num_input_tokens_seen": 13037568, + "step": 4140 + }, + { + "epoch": 0.2653479290698419, + "grad_norm": 35.4780387878418, + "learning_rate": 1.8382199451413074e-06, + "loss": 0.4682, + "num_input_tokens_seen": 13053440, + "step": 4145 + }, + { + "epoch": 0.26566801101081877, + "grad_norm": 38.03645706176758, + "learning_rate": 1.837610084984483e-06, + "loss": 0.5178, + "num_input_tokens_seen": 13069440, + "step": 4150 + }, + { + "epoch": 0.26598809295179565, + "grad_norm": 55.84706115722656, + "learning_rate": 1.8369991790616327e-06, + "loss": 0.5487, + "num_input_tokens_seen": 13084224, + "step": 4155 + }, + { + "epoch": 0.26630817489277253, + "grad_norm": 39.98716735839844, + "learning_rate": 1.8363872281354795e-06, + "loss": 0.6725, + "num_input_tokens_seen": 13098688, + "step": 4160 + }, + { + "epoch": 0.26662825683374947, + "grad_norm": 33.307315826416016, + "learning_rate": 1.835774232970052e-06, + "loss": 0.4162, + "num_input_tokens_seen": 13114112, + "step": 4165 + }, + { + "epoch": 0.26694833877472635, + "grad_norm": 31.13365936279297, + "learning_rate": 1.8351601943306815e-06, + "loss": 0.454, + "num_input_tokens_seen": 13130240, + "step": 4170 + }, + { + "epoch": 0.2672684207157032, + "grad_norm": 41.67189407348633, + "learning_rate": 1.8345451129840025e-06, + "loss": 0.3972, + "num_input_tokens_seen": 13145536, + "step": 4175 + }, + { + "epoch": 0.2675885026566801, + "grad_norm": 38.381675720214844, + "learning_rate": 1.8339289896979515e-06, + "loss": 0.5506, + "num_input_tokens_seen": 13160256, + "step": 4180 + }, + { + "epoch": 0.267908584597657, + "grad_norm": 39.27140426635742, + "learning_rate": 1.8333118252417651e-06, + "loss": 0.5525, + "num_input_tokens_seen": 13177088, + "step": 4185 + }, + { + "epoch": 0.26822866653863386, + "grad_norm": 34.699344635009766, + "learning_rate": 1.832693620385981e-06, + "loss": 0.5154, + "num_input_tokens_seen": 13192768, + "step": 4190 + }, + { + "epoch": 0.2685487484796108, + "grad_norm": 27.59552764892578, + "learning_rate": 1.8320743759024352e-06, + "loss": 0.5089, + "num_input_tokens_seen": 13208192, + "step": 4195 + }, + { + "epoch": 0.2688688304205877, + "grad_norm": 42.20448303222656, + "learning_rate": 1.831454092564261e-06, + "loss": 0.5486, + "num_input_tokens_seen": 13223872, + "step": 4200 + }, + { + "epoch": 0.26918891236156456, + "grad_norm": 21.2120361328125, + "learning_rate": 1.8308327711458899e-06, + "loss": 0.4597, + "num_input_tokens_seen": 13239104, + "step": 4205 + }, + { + "epoch": 0.26950899430254144, + "grad_norm": 40.029544830322266, + "learning_rate": 1.830210412423049e-06, + "loss": 0.3925, + "num_input_tokens_seen": 13254464, + "step": 4210 + }, + { + "epoch": 0.2698290762435183, + "grad_norm": 23.788978576660156, + "learning_rate": 1.8295870171727605e-06, + "loss": 0.3617, + "num_input_tokens_seen": 13269824, + "step": 4215 + }, + { + "epoch": 0.27014915818449525, + "grad_norm": 22.333120346069336, + "learning_rate": 1.8289625861733408e-06, + "loss": 0.4149, + "num_input_tokens_seen": 13288448, + "step": 4220 + }, + { + "epoch": 0.27046924012547213, + "grad_norm": 35.422637939453125, + "learning_rate": 1.8283371202043991e-06, + "loss": 0.5178, + "num_input_tokens_seen": 13304320, + "step": 4225 + }, + { + "epoch": 0.270789322066449, + "grad_norm": 40.13027572631836, + "learning_rate": 1.827710620046837e-06, + "loss": 0.5533, + "num_input_tokens_seen": 13321920, + "step": 4230 + }, + { + "epoch": 0.2711094040074259, + "grad_norm": 55.25458526611328, + "learning_rate": 1.8270830864828474e-06, + "loss": 0.4571, + "num_input_tokens_seen": 13337280, + "step": 4235 + }, + { + "epoch": 0.27142948594840277, + "grad_norm": 16.795021057128906, + "learning_rate": 1.8264545202959133e-06, + "loss": 0.434, + "num_input_tokens_seen": 13354112, + "step": 4240 + }, + { + "epoch": 0.2717495678893797, + "grad_norm": 37.604312896728516, + "learning_rate": 1.8258249222708067e-06, + "loss": 0.4362, + "num_input_tokens_seen": 13369600, + "step": 4245 + }, + { + "epoch": 0.2720696498303566, + "grad_norm": 24.84840202331543, + "learning_rate": 1.8251942931935886e-06, + "loss": 0.4558, + "num_input_tokens_seen": 13385536, + "step": 4250 + }, + { + "epoch": 0.27238973177133347, + "grad_norm": 33.14160919189453, + "learning_rate": 1.8245626338516069e-06, + "loss": 0.3748, + "num_input_tokens_seen": 13400832, + "step": 4255 + }, + { + "epoch": 0.27270981371231034, + "grad_norm": 35.8721923828125, + "learning_rate": 1.823929945033495e-06, + "loss": 0.338, + "num_input_tokens_seen": 13416000, + "step": 4260 + }, + { + "epoch": 0.2730298956532872, + "grad_norm": 32.823890686035156, + "learning_rate": 1.8232962275291728e-06, + "loss": 0.5038, + "num_input_tokens_seen": 13431360, + "step": 4265 + }, + { + "epoch": 0.2733499775942641, + "grad_norm": 50.578182220458984, + "learning_rate": 1.822661482129844e-06, + "loss": 0.4415, + "num_input_tokens_seen": 13446976, + "step": 4270 + }, + { + "epoch": 0.27367005953524104, + "grad_norm": 22.204975128173828, + "learning_rate": 1.8220257096279956e-06, + "loss": 0.3688, + "num_input_tokens_seen": 13463040, + "step": 4275 + }, + { + "epoch": 0.2739901414762179, + "grad_norm": 36.239845275878906, + "learning_rate": 1.8213889108173972e-06, + "loss": 0.6843, + "num_input_tokens_seen": 13478656, + "step": 4280 + }, + { + "epoch": 0.2743102234171948, + "grad_norm": 24.040267944335938, + "learning_rate": 1.8207510864930992e-06, + "loss": 0.4995, + "num_input_tokens_seen": 13495296, + "step": 4285 + }, + { + "epoch": 0.2746303053581717, + "grad_norm": 20.18925666809082, + "learning_rate": 1.8201122374514336e-06, + "loss": 0.5081, + "num_input_tokens_seen": 13510912, + "step": 4290 + }, + { + "epoch": 0.27495038729914856, + "grad_norm": 23.800817489624023, + "learning_rate": 1.8194723644900099e-06, + "loss": 0.4362, + "num_input_tokens_seen": 13525952, + "step": 4295 + }, + { + "epoch": 0.2752704692401255, + "grad_norm": 26.63628387451172, + "learning_rate": 1.8188314684077173e-06, + "loss": 0.5305, + "num_input_tokens_seen": 13546752, + "step": 4300 + }, + { + "epoch": 0.2755905511811024, + "grad_norm": 40.90980911254883, + "learning_rate": 1.8181895500047226e-06, + "loss": 0.5643, + "num_input_tokens_seen": 13561728, + "step": 4305 + }, + { + "epoch": 0.27591063312207925, + "grad_norm": 22.858909606933594, + "learning_rate": 1.817546610082468e-06, + "loss": 0.4562, + "num_input_tokens_seen": 13577344, + "step": 4310 + }, + { + "epoch": 0.27623071506305613, + "grad_norm": 28.205032348632812, + "learning_rate": 1.816902649443672e-06, + "loss": 0.4862, + "num_input_tokens_seen": 13592256, + "step": 4315 + }, + { + "epoch": 0.276550797004033, + "grad_norm": 37.81781005859375, + "learning_rate": 1.8162576688923262e-06, + "loss": 0.5403, + "num_input_tokens_seen": 13608832, + "step": 4320 + }, + { + "epoch": 0.27687087894500995, + "grad_norm": 27.493858337402344, + "learning_rate": 1.815611669233697e-06, + "loss": 0.5508, + "num_input_tokens_seen": 13624128, + "step": 4325 + }, + { + "epoch": 0.2771909608859868, + "grad_norm": 26.893049240112305, + "learning_rate": 1.8149646512743222e-06, + "loss": 0.5369, + "num_input_tokens_seen": 13640576, + "step": 4330 + }, + { + "epoch": 0.2775110428269637, + "grad_norm": 24.463943481445312, + "learning_rate": 1.8143166158220118e-06, + "loss": 0.4653, + "num_input_tokens_seen": 13655872, + "step": 4335 + }, + { + "epoch": 0.2778311247679406, + "grad_norm": 44.26751708984375, + "learning_rate": 1.8136675636858454e-06, + "loss": 0.6598, + "num_input_tokens_seen": 13672384, + "step": 4340 + }, + { + "epoch": 0.27815120670891746, + "grad_norm": 21.628820419311523, + "learning_rate": 1.8130174956761723e-06, + "loss": 0.408, + "num_input_tokens_seen": 13687296, + "step": 4345 + }, + { + "epoch": 0.2784712886498944, + "grad_norm": 26.918249130249023, + "learning_rate": 1.81236641260461e-06, + "loss": 0.5366, + "num_input_tokens_seen": 13702528, + "step": 4350 + }, + { + "epoch": 0.2787913705908713, + "grad_norm": 68.7051773071289, + "learning_rate": 1.811714315284043e-06, + "loss": 0.498, + "num_input_tokens_seen": 13717568, + "step": 4355 + }, + { + "epoch": 0.27911145253184816, + "grad_norm": 21.975799560546875, + "learning_rate": 1.8110612045286229e-06, + "loss": 0.4252, + "num_input_tokens_seen": 13733568, + "step": 4360 + }, + { + "epoch": 0.27943153447282504, + "grad_norm": 22.67809295654297, + "learning_rate": 1.8104070811537661e-06, + "loss": 0.3778, + "num_input_tokens_seen": 13749312, + "step": 4365 + }, + { + "epoch": 0.2797516164138019, + "grad_norm": 18.252212524414062, + "learning_rate": 1.8097519459761533e-06, + "loss": 0.4405, + "num_input_tokens_seen": 13765952, + "step": 4370 + }, + { + "epoch": 0.2800716983547788, + "grad_norm": 49.828643798828125, + "learning_rate": 1.8090957998137283e-06, + "loss": 0.5056, + "num_input_tokens_seen": 13781440, + "step": 4375 + }, + { + "epoch": 0.28039178029575573, + "grad_norm": 49.623783111572266, + "learning_rate": 1.8084386434856978e-06, + "loss": 0.4528, + "num_input_tokens_seen": 13796864, + "step": 4380 + }, + { + "epoch": 0.2807118622367326, + "grad_norm": 27.9454402923584, + "learning_rate": 1.8077804778125283e-06, + "loss": 0.4966, + "num_input_tokens_seen": 13812736, + "step": 4385 + }, + { + "epoch": 0.2810319441777095, + "grad_norm": 57.37892150878906, + "learning_rate": 1.807121303615948e-06, + "loss": 0.489, + "num_input_tokens_seen": 13828288, + "step": 4390 + }, + { + "epoch": 0.28135202611868637, + "grad_norm": 46.05356216430664, + "learning_rate": 1.8064611217189434e-06, + "loss": 0.4168, + "num_input_tokens_seen": 13845568, + "step": 4395 + }, + { + "epoch": 0.28167210805966325, + "grad_norm": 23.337419509887695, + "learning_rate": 1.8057999329457596e-06, + "loss": 0.3902, + "num_input_tokens_seen": 13860608, + "step": 4400 + }, + { + "epoch": 0.2819921900006402, + "grad_norm": 44.485595703125, + "learning_rate": 1.8051377381218984e-06, + "loss": 0.5584, + "num_input_tokens_seen": 13876608, + "step": 4405 + }, + { + "epoch": 0.28231227194161707, + "grad_norm": 37.74899673461914, + "learning_rate": 1.8044745380741177e-06, + "loss": 0.5613, + "num_input_tokens_seen": 13893632, + "step": 4410 + }, + { + "epoch": 0.28263235388259395, + "grad_norm": 53.780860900878906, + "learning_rate": 1.8038103336304306e-06, + "loss": 0.3872, + "num_input_tokens_seen": 13909312, + "step": 4415 + }, + { + "epoch": 0.2829524358235708, + "grad_norm": 28.769821166992188, + "learning_rate": 1.8031451256201042e-06, + "loss": 0.5718, + "num_input_tokens_seen": 13925824, + "step": 4420 + }, + { + "epoch": 0.2832725177645477, + "grad_norm": 25.95047950744629, + "learning_rate": 1.8024789148736589e-06, + "loss": 0.5314, + "num_input_tokens_seen": 13942336, + "step": 4425 + }, + { + "epoch": 0.28359259970552464, + "grad_norm": 31.877147674560547, + "learning_rate": 1.8018117022228655e-06, + "loss": 0.4017, + "num_input_tokens_seen": 13957760, + "step": 4430 + }, + { + "epoch": 0.2839126816465015, + "grad_norm": 53.280426025390625, + "learning_rate": 1.8011434885007479e-06, + "loss": 0.5044, + "num_input_tokens_seen": 13972992, + "step": 4435 + }, + { + "epoch": 0.2842327635874784, + "grad_norm": 34.214813232421875, + "learning_rate": 1.8004742745415787e-06, + "loss": 0.4184, + "num_input_tokens_seen": 13988736, + "step": 4440 + }, + { + "epoch": 0.2845528455284553, + "grad_norm": 21.93120002746582, + "learning_rate": 1.799804061180879e-06, + "loss": 0.5398, + "num_input_tokens_seen": 14003520, + "step": 4445 + }, + { + "epoch": 0.28487292746943216, + "grad_norm": 30.483198165893555, + "learning_rate": 1.799132849255418e-06, + "loss": 0.5016, + "num_input_tokens_seen": 14020608, + "step": 4450 + }, + { + "epoch": 0.28519300941040904, + "grad_norm": 36.024600982666016, + "learning_rate": 1.798460639603212e-06, + "loss": 0.4168, + "num_input_tokens_seen": 14035328, + "step": 4455 + }, + { + "epoch": 0.285513091351386, + "grad_norm": 38.09769058227539, + "learning_rate": 1.7977874330635224e-06, + "loss": 0.4799, + "num_input_tokens_seen": 14050816, + "step": 4460 + }, + { + "epoch": 0.28583317329236285, + "grad_norm": 20.480865478515625, + "learning_rate": 1.7971132304768555e-06, + "loss": 0.3319, + "num_input_tokens_seen": 14066880, + "step": 4465 + }, + { + "epoch": 0.28615325523333973, + "grad_norm": 26.70489501953125, + "learning_rate": 1.7964380326849612e-06, + "loss": 0.5081, + "num_input_tokens_seen": 14081728, + "step": 4470 + }, + { + "epoch": 0.2864733371743166, + "grad_norm": 20.658885955810547, + "learning_rate": 1.795761840530832e-06, + "loss": 0.4885, + "num_input_tokens_seen": 14097984, + "step": 4475 + }, + { + "epoch": 0.2867934191152935, + "grad_norm": 27.52956771850586, + "learning_rate": 1.7950846548587015e-06, + "loss": 0.4186, + "num_input_tokens_seen": 14115264, + "step": 4480 + }, + { + "epoch": 0.2871135010562704, + "grad_norm": 18.73761749267578, + "learning_rate": 1.7944064765140445e-06, + "loss": 0.2815, + "num_input_tokens_seen": 14129472, + "step": 4485 + }, + { + "epoch": 0.2874335829972473, + "grad_norm": 34.10987091064453, + "learning_rate": 1.7937273063435735e-06, + "loss": 0.5567, + "num_input_tokens_seen": 14144896, + "step": 4490 + }, + { + "epoch": 0.2877536649382242, + "grad_norm": 28.596620559692383, + "learning_rate": 1.7930471451952416e-06, + "loss": 0.3559, + "num_input_tokens_seen": 14159744, + "step": 4495 + }, + { + "epoch": 0.28807374687920106, + "grad_norm": 41.09931564331055, + "learning_rate": 1.7923659939182377e-06, + "loss": 0.4935, + "num_input_tokens_seen": 14176384, + "step": 4500 + }, + { + "epoch": 0.28839382882017794, + "grad_norm": 43.514373779296875, + "learning_rate": 1.7916838533629866e-06, + "loss": 0.5441, + "num_input_tokens_seen": 14192320, + "step": 4505 + }, + { + "epoch": 0.2887139107611549, + "grad_norm": 23.66765594482422, + "learning_rate": 1.7910007243811493e-06, + "loss": 0.3929, + "num_input_tokens_seen": 14208192, + "step": 4510 + }, + { + "epoch": 0.28903399270213176, + "grad_norm": 57.50717544555664, + "learning_rate": 1.7903166078256202e-06, + "loss": 0.5737, + "num_input_tokens_seen": 14223104, + "step": 4515 + }, + { + "epoch": 0.28935407464310864, + "grad_norm": 56.785011291503906, + "learning_rate": 1.789631504550527e-06, + "loss": 0.4223, + "num_input_tokens_seen": 14238464, + "step": 4520 + }, + { + "epoch": 0.2896741565840855, + "grad_norm": 34.91124725341797, + "learning_rate": 1.7889454154112288e-06, + "loss": 0.3912, + "num_input_tokens_seen": 14254656, + "step": 4525 + }, + { + "epoch": 0.2899942385250624, + "grad_norm": 42.82390594482422, + "learning_rate": 1.7882583412643167e-06, + "loss": 0.3903, + "num_input_tokens_seen": 14268928, + "step": 4530 + }, + { + "epoch": 0.29031432046603933, + "grad_norm": 33.92388153076172, + "learning_rate": 1.78757028296761e-06, + "loss": 0.4489, + "num_input_tokens_seen": 14285952, + "step": 4535 + }, + { + "epoch": 0.2906344024070162, + "grad_norm": 20.465452194213867, + "learning_rate": 1.7868812413801582e-06, + "loss": 0.3513, + "num_input_tokens_seen": 14301760, + "step": 4540 + }, + { + "epoch": 0.2909544843479931, + "grad_norm": 55.36177444458008, + "learning_rate": 1.7861912173622372e-06, + "loss": 0.4985, + "num_input_tokens_seen": 14318208, + "step": 4545 + }, + { + "epoch": 0.29127456628896997, + "grad_norm": 42.88619613647461, + "learning_rate": 1.7855002117753504e-06, + "loss": 0.4537, + "num_input_tokens_seen": 14334144, + "step": 4550 + }, + { + "epoch": 0.29159464822994685, + "grad_norm": 47.642051696777344, + "learning_rate": 1.7848082254822266e-06, + "loss": 0.5489, + "num_input_tokens_seen": 14349120, + "step": 4555 + }, + { + "epoch": 0.29191473017092373, + "grad_norm": 54.71379852294922, + "learning_rate": 1.7841152593468185e-06, + "loss": 0.4957, + "num_input_tokens_seen": 14365376, + "step": 4560 + }, + { + "epoch": 0.29223481211190067, + "grad_norm": 34.66753005981445, + "learning_rate": 1.7834213142343026e-06, + "loss": 0.4636, + "num_input_tokens_seen": 14381568, + "step": 4565 + }, + { + "epoch": 0.29255489405287755, + "grad_norm": 31.637672424316406, + "learning_rate": 1.7827263910110777e-06, + "loss": 0.4752, + "num_input_tokens_seen": 14397312, + "step": 4570 + }, + { + "epoch": 0.2928749759938544, + "grad_norm": 36.772586822509766, + "learning_rate": 1.7820304905447632e-06, + "loss": 0.4631, + "num_input_tokens_seen": 14412928, + "step": 4575 + }, + { + "epoch": 0.2931950579348313, + "grad_norm": 58.408050537109375, + "learning_rate": 1.7813336137041991e-06, + "loss": 0.4515, + "num_input_tokens_seen": 14427968, + "step": 4580 + }, + { + "epoch": 0.2935151398758082, + "grad_norm": 38.953765869140625, + "learning_rate": 1.7806357613594447e-06, + "loss": 0.3591, + "num_input_tokens_seen": 14442944, + "step": 4585 + }, + { + "epoch": 0.2938352218167851, + "grad_norm": 20.911611557006836, + "learning_rate": 1.7799369343817764e-06, + "loss": 0.452, + "num_input_tokens_seen": 14458176, + "step": 4590 + }, + { + "epoch": 0.294155303757762, + "grad_norm": 25.856048583984375, + "learning_rate": 1.7792371336436883e-06, + "loss": 0.3618, + "num_input_tokens_seen": 14473600, + "step": 4595 + }, + { + "epoch": 0.2944753856987389, + "grad_norm": 30.00708770751953, + "learning_rate": 1.7785363600188892e-06, + "loss": 0.6561, + "num_input_tokens_seen": 14488896, + "step": 4600 + }, + { + "epoch": 0.29479546763971576, + "grad_norm": 38.951820373535156, + "learning_rate": 1.7778346143823038e-06, + "loss": 0.5982, + "num_input_tokens_seen": 14502784, + "step": 4605 + }, + { + "epoch": 0.29511554958069264, + "grad_norm": 34.70473861694336, + "learning_rate": 1.7771318976100696e-06, + "loss": 0.4353, + "num_input_tokens_seen": 14520000, + "step": 4610 + }, + { + "epoch": 0.2954356315216696, + "grad_norm": 30.836076736450195, + "learning_rate": 1.7764282105795364e-06, + "loss": 0.3531, + "num_input_tokens_seen": 14536320, + "step": 4615 + }, + { + "epoch": 0.29575571346264645, + "grad_norm": 45.481624603271484, + "learning_rate": 1.7757235541692663e-06, + "loss": 0.4688, + "num_input_tokens_seen": 14551808, + "step": 4620 + }, + { + "epoch": 0.29607579540362333, + "grad_norm": 21.652406692504883, + "learning_rate": 1.7750179292590306e-06, + "loss": 0.3106, + "num_input_tokens_seen": 14566976, + "step": 4625 + }, + { + "epoch": 0.2963958773446002, + "grad_norm": 25.99779510498047, + "learning_rate": 1.7743113367298107e-06, + "loss": 0.3511, + "num_input_tokens_seen": 14583104, + "step": 4630 + }, + { + "epoch": 0.2967159592855771, + "grad_norm": 39.653045654296875, + "learning_rate": 1.7736037774637955e-06, + "loss": 0.4515, + "num_input_tokens_seen": 14598336, + "step": 4635 + }, + { + "epoch": 0.29703604122655397, + "grad_norm": 51.65949249267578, + "learning_rate": 1.772895252344381e-06, + "loss": 0.5141, + "num_input_tokens_seen": 14615232, + "step": 4640 + }, + { + "epoch": 0.2973561231675309, + "grad_norm": 20.959184646606445, + "learning_rate": 1.7721857622561692e-06, + "loss": 0.388, + "num_input_tokens_seen": 14630848, + "step": 4645 + }, + { + "epoch": 0.2976762051085078, + "grad_norm": 33.919654846191406, + "learning_rate": 1.7714753080849664e-06, + "loss": 0.4668, + "num_input_tokens_seen": 14647040, + "step": 4650 + }, + { + "epoch": 0.29799628704948466, + "grad_norm": 23.6036319732666, + "learning_rate": 1.7707638907177837e-06, + "loss": 0.4196, + "num_input_tokens_seen": 14661888, + "step": 4655 + }, + { + "epoch": 0.29831636899046154, + "grad_norm": 153.35108947753906, + "learning_rate": 1.7700515110428336e-06, + "loss": 0.7015, + "num_input_tokens_seen": 14677696, + "step": 4660 + }, + { + "epoch": 0.2986364509314384, + "grad_norm": 27.929115295410156, + "learning_rate": 1.7693381699495307e-06, + "loss": 0.4795, + "num_input_tokens_seen": 14693184, + "step": 4665 + }, + { + "epoch": 0.29895653287241536, + "grad_norm": 31.637182235717773, + "learning_rate": 1.7686238683284894e-06, + "loss": 0.3712, + "num_input_tokens_seen": 14707904, + "step": 4670 + }, + { + "epoch": 0.29927661481339224, + "grad_norm": 26.304777145385742, + "learning_rate": 1.7679086070715237e-06, + "loss": 0.3553, + "num_input_tokens_seen": 14724096, + "step": 4675 + }, + { + "epoch": 0.2995966967543691, + "grad_norm": 39.02534484863281, + "learning_rate": 1.7671923870716459e-06, + "loss": 0.4575, + "num_input_tokens_seen": 14738752, + "step": 4680 + }, + { + "epoch": 0.299916778695346, + "grad_norm": 42.02716827392578, + "learning_rate": 1.7664752092230652e-06, + "loss": 0.355, + "num_input_tokens_seen": 14753664, + "step": 4685 + }, + { + "epoch": 0.3002368606363229, + "grad_norm": 33.584014892578125, + "learning_rate": 1.7657570744211863e-06, + "loss": 0.3708, + "num_input_tokens_seen": 14769152, + "step": 4690 + }, + { + "epoch": 0.30036489341271366, + "eval_loss": 0.46517089009284973, + "eval_runtime": 50.6285, + "eval_samples_per_second": 274.272, + "eval_steps_per_second": 34.289, + "num_input_tokens_seen": 14775488, + "step": 4692 + }, + { + "epoch": 0.3005569425772998, + "grad_norm": 46.08897399902344, + "learning_rate": 1.765037983562609e-06, + "loss": 0.5088, + "num_input_tokens_seen": 14784128, + "step": 4695 + }, + { + "epoch": 0.3008770245182767, + "grad_norm": 46.2584228515625, + "learning_rate": 1.7643179375451264e-06, + "loss": 0.4325, + "num_input_tokens_seen": 14799936, + "step": 4700 + }, + { + "epoch": 0.30119710645925357, + "grad_norm": 42.52885437011719, + "learning_rate": 1.7635969372677252e-06, + "loss": 0.6141, + "num_input_tokens_seen": 14814208, + "step": 4705 + }, + { + "epoch": 0.30151718840023045, + "grad_norm": 40.1710205078125, + "learning_rate": 1.7628749836305818e-06, + "loss": 0.4862, + "num_input_tokens_seen": 14829504, + "step": 4710 + }, + { + "epoch": 0.30183727034120733, + "grad_norm": 31.896709442138672, + "learning_rate": 1.7621520775350645e-06, + "loss": 0.4053, + "num_input_tokens_seen": 14843968, + "step": 4715 + }, + { + "epoch": 0.30215735228218427, + "grad_norm": 34.46201705932617, + "learning_rate": 1.7614282198837293e-06, + "loss": 0.4685, + "num_input_tokens_seen": 14859840, + "step": 4720 + }, + { + "epoch": 0.30247743422316115, + "grad_norm": 44.51201248168945, + "learning_rate": 1.7607034115803219e-06, + "loss": 0.4873, + "num_input_tokens_seen": 14875648, + "step": 4725 + }, + { + "epoch": 0.302797516164138, + "grad_norm": 29.218721389770508, + "learning_rate": 1.7599776535297734e-06, + "loss": 0.4244, + "num_input_tokens_seen": 14890560, + "step": 4730 + }, + { + "epoch": 0.3031175981051149, + "grad_norm": 42.597896575927734, + "learning_rate": 1.7592509466382012e-06, + "loss": 0.478, + "num_input_tokens_seen": 14906688, + "step": 4735 + }, + { + "epoch": 0.3034376800460918, + "grad_norm": 57.83599853515625, + "learning_rate": 1.7585232918129076e-06, + "loss": 0.5622, + "num_input_tokens_seen": 14922496, + "step": 4740 + }, + { + "epoch": 0.30375776198706866, + "grad_norm": 38.69477081298828, + "learning_rate": 1.757794689962378e-06, + "loss": 0.4656, + "num_input_tokens_seen": 14938880, + "step": 4745 + }, + { + "epoch": 0.3040778439280456, + "grad_norm": 46.978797912597656, + "learning_rate": 1.7570651419962807e-06, + "loss": 0.5035, + "num_input_tokens_seen": 14954112, + "step": 4750 + }, + { + "epoch": 0.3043979258690225, + "grad_norm": 44.47570037841797, + "learning_rate": 1.7563346488254647e-06, + "loss": 0.4471, + "num_input_tokens_seen": 14969536, + "step": 4755 + }, + { + "epoch": 0.30471800780999936, + "grad_norm": 35.79732894897461, + "learning_rate": 1.755603211361959e-06, + "loss": 0.351, + "num_input_tokens_seen": 14985728, + "step": 4760 + }, + { + "epoch": 0.30503808975097624, + "grad_norm": 21.590835571289062, + "learning_rate": 1.7548708305189722e-06, + "loss": 0.4522, + "num_input_tokens_seen": 15003904, + "step": 4765 + }, + { + "epoch": 0.3053581716919531, + "grad_norm": 63.41920852661133, + "learning_rate": 1.7541375072108905e-06, + "loss": 0.5752, + "num_input_tokens_seen": 15019328, + "step": 4770 + }, + { + "epoch": 0.30567825363293005, + "grad_norm": 48.414974212646484, + "learning_rate": 1.7534032423532766e-06, + "loss": 0.4732, + "num_input_tokens_seen": 15033856, + "step": 4775 + }, + { + "epoch": 0.30599833557390693, + "grad_norm": 23.076284408569336, + "learning_rate": 1.7526680368628685e-06, + "loss": 0.361, + "num_input_tokens_seen": 15051200, + "step": 4780 + }, + { + "epoch": 0.3063184175148838, + "grad_norm": 33.26884841918945, + "learning_rate": 1.751931891657579e-06, + "loss": 0.4427, + "num_input_tokens_seen": 15066368, + "step": 4785 + }, + { + "epoch": 0.3066384994558607, + "grad_norm": 21.855182647705078, + "learning_rate": 1.7511948076564943e-06, + "loss": 0.3568, + "num_input_tokens_seen": 15081600, + "step": 4790 + }, + { + "epoch": 0.30695858139683757, + "grad_norm": 33.14620590209961, + "learning_rate": 1.7504567857798722e-06, + "loss": 0.5404, + "num_input_tokens_seen": 15097536, + "step": 4795 + }, + { + "epoch": 0.3072786633378145, + "grad_norm": 37.192012786865234, + "learning_rate": 1.7497178269491417e-06, + "loss": 0.4943, + "num_input_tokens_seen": 15113728, + "step": 4800 + }, + { + "epoch": 0.3075987452787914, + "grad_norm": 23.352327346801758, + "learning_rate": 1.7489779320869014e-06, + "loss": 0.5532, + "num_input_tokens_seen": 15130048, + "step": 4805 + }, + { + "epoch": 0.30791882721976827, + "grad_norm": 24.619413375854492, + "learning_rate": 1.7482371021169193e-06, + "loss": 0.3715, + "num_input_tokens_seen": 15145600, + "step": 4810 + }, + { + "epoch": 0.30823890916074514, + "grad_norm": 45.18055725097656, + "learning_rate": 1.7474953379641297e-06, + "loss": 0.4077, + "num_input_tokens_seen": 15162368, + "step": 4815 + }, + { + "epoch": 0.308558991101722, + "grad_norm": 35.786495208740234, + "learning_rate": 1.746752640554634e-06, + "loss": 0.438, + "num_input_tokens_seen": 15178368, + "step": 4820 + }, + { + "epoch": 0.3088790730426989, + "grad_norm": 25.801467895507812, + "learning_rate": 1.7460090108156988e-06, + "loss": 0.5348, + "num_input_tokens_seen": 15193408, + "step": 4825 + }, + { + "epoch": 0.30919915498367584, + "grad_norm": 25.487167358398438, + "learning_rate": 1.7452644496757548e-06, + "loss": 0.3155, + "num_input_tokens_seen": 15208640, + "step": 4830 + }, + { + "epoch": 0.3095192369246527, + "grad_norm": 47.79193878173828, + "learning_rate": 1.7445189580643946e-06, + "loss": 0.4557, + "num_input_tokens_seen": 15224192, + "step": 4835 + }, + { + "epoch": 0.3098393188656296, + "grad_norm": 29.753835678100586, + "learning_rate": 1.7437725369123737e-06, + "loss": 0.5187, + "num_input_tokens_seen": 15239616, + "step": 4840 + }, + { + "epoch": 0.3101594008066065, + "grad_norm": 33.8818359375, + "learning_rate": 1.7430251871516077e-06, + "loss": 0.4925, + "num_input_tokens_seen": 15255680, + "step": 4845 + }, + { + "epoch": 0.31047948274758336, + "grad_norm": 29.06200408935547, + "learning_rate": 1.7422769097151715e-06, + "loss": 0.5256, + "num_input_tokens_seen": 15271232, + "step": 4850 + }, + { + "epoch": 0.3107995646885603, + "grad_norm": 67.46397399902344, + "learning_rate": 1.7415277055372982e-06, + "loss": 0.5038, + "num_input_tokens_seen": 15287040, + "step": 4855 + }, + { + "epoch": 0.31111964662953717, + "grad_norm": 26.468515396118164, + "learning_rate": 1.7407775755533778e-06, + "loss": 0.5181, + "num_input_tokens_seen": 15304256, + "step": 4860 + }, + { + "epoch": 0.31143972857051405, + "grad_norm": 19.30422019958496, + "learning_rate": 1.7400265206999568e-06, + "loss": 0.364, + "num_input_tokens_seen": 15322112, + "step": 4865 + }, + { + "epoch": 0.31175981051149093, + "grad_norm": 68.16838836669922, + "learning_rate": 1.7392745419147362e-06, + "loss": 0.5297, + "num_input_tokens_seen": 15337216, + "step": 4870 + }, + { + "epoch": 0.3120798924524678, + "grad_norm": 37.93073654174805, + "learning_rate": 1.7385216401365693e-06, + "loss": 0.4478, + "num_input_tokens_seen": 15354048, + "step": 4875 + }, + { + "epoch": 0.31239997439344475, + "grad_norm": 30.45296287536621, + "learning_rate": 1.7377678163054638e-06, + "loss": 0.4964, + "num_input_tokens_seen": 15369344, + "step": 4880 + }, + { + "epoch": 0.3127200563344216, + "grad_norm": 47.27909469604492, + "learning_rate": 1.7370130713625775e-06, + "loss": 0.4864, + "num_input_tokens_seen": 15385920, + "step": 4885 + }, + { + "epoch": 0.3130401382753985, + "grad_norm": 24.398977279663086, + "learning_rate": 1.736257406250218e-06, + "loss": 0.3948, + "num_input_tokens_seen": 15401536, + "step": 4890 + }, + { + "epoch": 0.3133602202163754, + "grad_norm": 30.26610565185547, + "learning_rate": 1.735500821911842e-06, + "loss": 0.4629, + "num_input_tokens_seen": 15417152, + "step": 4895 + }, + { + "epoch": 0.31368030215735226, + "grad_norm": 31.936508178710938, + "learning_rate": 1.7347433192920544e-06, + "loss": 0.4961, + "num_input_tokens_seen": 15431872, + "step": 4900 + }, + { + "epoch": 0.3140003840983292, + "grad_norm": 20.389596939086914, + "learning_rate": 1.7339848993366056e-06, + "loss": 0.4021, + "num_input_tokens_seen": 15447552, + "step": 4905 + }, + { + "epoch": 0.3143204660393061, + "grad_norm": 32.99045181274414, + "learning_rate": 1.7332255629923922e-06, + "loss": 0.4667, + "num_input_tokens_seen": 15464384, + "step": 4910 + }, + { + "epoch": 0.31464054798028296, + "grad_norm": 24.761920928955078, + "learning_rate": 1.732465311207454e-06, + "loss": 0.5038, + "num_input_tokens_seen": 15479808, + "step": 4915 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 45.01390838623047, + "learning_rate": 1.731704144930975e-06, + "loss": 0.5018, + "num_input_tokens_seen": 15496512, + "step": 4920 + }, + { + "epoch": 0.3152807118622367, + "grad_norm": 38.63529968261719, + "learning_rate": 1.7309420651132797e-06, + "loss": 0.4137, + "num_input_tokens_seen": 15512896, + "step": 4925 + }, + { + "epoch": 0.3156007938032136, + "grad_norm": 34.324134826660156, + "learning_rate": 1.7301790727058343e-06, + "loss": 0.3295, + "num_input_tokens_seen": 15528064, + "step": 4930 + }, + { + "epoch": 0.31592087574419053, + "grad_norm": 34.598453521728516, + "learning_rate": 1.7294151686612431e-06, + "loss": 0.3593, + "num_input_tokens_seen": 15543424, + "step": 4935 + }, + { + "epoch": 0.3162409576851674, + "grad_norm": 44.17629623413086, + "learning_rate": 1.7286503539332495e-06, + "loss": 0.5778, + "num_input_tokens_seen": 15560192, + "step": 4940 + }, + { + "epoch": 0.3165610396261443, + "grad_norm": 43.37092208862305, + "learning_rate": 1.7278846294767337e-06, + "loss": 0.3873, + "num_input_tokens_seen": 15576128, + "step": 4945 + }, + { + "epoch": 0.31688112156712117, + "grad_norm": 79.67957305908203, + "learning_rate": 1.7271179962477118e-06, + "loss": 0.6923, + "num_input_tokens_seen": 15592576, + "step": 4950 + }, + { + "epoch": 0.31720120350809805, + "grad_norm": 51.76191711425781, + "learning_rate": 1.7263504552033341e-06, + "loss": 0.4372, + "num_input_tokens_seen": 15607744, + "step": 4955 + }, + { + "epoch": 0.317521285449075, + "grad_norm": 24.416505813598633, + "learning_rate": 1.725582007301885e-06, + "loss": 0.481, + "num_input_tokens_seen": 15623360, + "step": 4960 + }, + { + "epoch": 0.31784136739005187, + "grad_norm": 37.16778564453125, + "learning_rate": 1.7248126535027806e-06, + "loss": 0.4251, + "num_input_tokens_seen": 15638656, + "step": 4965 + }, + { + "epoch": 0.31816144933102875, + "grad_norm": 40.46333694458008, + "learning_rate": 1.7240423947665678e-06, + "loss": 0.4569, + "num_input_tokens_seen": 15654400, + "step": 4970 + }, + { + "epoch": 0.3184815312720056, + "grad_norm": 24.423137664794922, + "learning_rate": 1.723271232054924e-06, + "loss": 0.3867, + "num_input_tokens_seen": 15670016, + "step": 4975 + }, + { + "epoch": 0.3188016132129825, + "grad_norm": 54.15736389160156, + "learning_rate": 1.722499166330655e-06, + "loss": 0.5265, + "num_input_tokens_seen": 15686208, + "step": 4980 + }, + { + "epoch": 0.31912169515395944, + "grad_norm": 22.534137725830078, + "learning_rate": 1.7217261985576936e-06, + "loss": 0.443, + "num_input_tokens_seen": 15702592, + "step": 4985 + }, + { + "epoch": 0.3194417770949363, + "grad_norm": 72.37775421142578, + "learning_rate": 1.7209523297010992e-06, + "loss": 0.5114, + "num_input_tokens_seen": 15717696, + "step": 4990 + }, + { + "epoch": 0.3197618590359132, + "grad_norm": 37.13178634643555, + "learning_rate": 1.7201775607270564e-06, + "loss": 0.4619, + "num_input_tokens_seen": 15733184, + "step": 4995 + }, + { + "epoch": 0.3200819409768901, + "grad_norm": 32.471622467041016, + "learning_rate": 1.7194018926028733e-06, + "loss": 0.5318, + "num_input_tokens_seen": 15749888, + "step": 5000 + }, + { + "epoch": 0.32040202291786696, + "grad_norm": 36.298892974853516, + "learning_rate": 1.7186253262969803e-06, + "loss": 0.3622, + "num_input_tokens_seen": 15768384, + "step": 5005 + }, + { + "epoch": 0.32072210485884384, + "grad_norm": 28.648771286010742, + "learning_rate": 1.7178478627789299e-06, + "loss": 0.3291, + "num_input_tokens_seen": 15784448, + "step": 5010 + }, + { + "epoch": 0.3210421867998208, + "grad_norm": 24.88985824584961, + "learning_rate": 1.7170695030193944e-06, + "loss": 0.4122, + "num_input_tokens_seen": 15800512, + "step": 5015 + }, + { + "epoch": 0.32136226874079765, + "grad_norm": 33.581695556640625, + "learning_rate": 1.716290247990165e-06, + "loss": 0.4778, + "num_input_tokens_seen": 15815680, + "step": 5020 + }, + { + "epoch": 0.32168235068177453, + "grad_norm": 34.822837829589844, + "learning_rate": 1.715510098664151e-06, + "loss": 0.3896, + "num_input_tokens_seen": 15830528, + "step": 5025 + }, + { + "epoch": 0.3220024326227514, + "grad_norm": 30.73101043701172, + "learning_rate": 1.7147290560153777e-06, + "loss": 0.5141, + "num_input_tokens_seen": 15845568, + "step": 5030 + }, + { + "epoch": 0.3223225145637283, + "grad_norm": 36.853206634521484, + "learning_rate": 1.7139471210189862e-06, + "loss": 0.447, + "num_input_tokens_seen": 15861632, + "step": 5035 + }, + { + "epoch": 0.3226425965047052, + "grad_norm": 34.865318298339844, + "learning_rate": 1.7131642946512312e-06, + "loss": 0.543, + "num_input_tokens_seen": 15877632, + "step": 5040 + }, + { + "epoch": 0.3229626784456821, + "grad_norm": 21.021453857421875, + "learning_rate": 1.712380577889481e-06, + "loss": 0.3918, + "num_input_tokens_seen": 15893184, + "step": 5045 + }, + { + "epoch": 0.323282760386659, + "grad_norm": 34.583648681640625, + "learning_rate": 1.711595971712215e-06, + "loss": 0.3963, + "num_input_tokens_seen": 15908416, + "step": 5050 + }, + { + "epoch": 0.32360284232763586, + "grad_norm": 25.76999855041504, + "learning_rate": 1.7108104770990234e-06, + "loss": 0.4042, + "num_input_tokens_seen": 15924224, + "step": 5055 + }, + { + "epoch": 0.32392292426861274, + "grad_norm": 30.277738571166992, + "learning_rate": 1.7100240950306052e-06, + "loss": 0.254, + "num_input_tokens_seen": 15940032, + "step": 5060 + }, + { + "epoch": 0.3242430062095897, + "grad_norm": 39.45482635498047, + "learning_rate": 1.7092368264887677e-06, + "loss": 0.4647, + "num_input_tokens_seen": 15954944, + "step": 5065 + }, + { + "epoch": 0.32456308815056656, + "grad_norm": 54.28398513793945, + "learning_rate": 1.7084486724564252e-06, + "loss": 0.4846, + "num_input_tokens_seen": 15970624, + "step": 5070 + }, + { + "epoch": 0.32488317009154344, + "grad_norm": 33.94327163696289, + "learning_rate": 1.707659633917597e-06, + "loss": 0.4092, + "num_input_tokens_seen": 15986688, + "step": 5075 + }, + { + "epoch": 0.3252032520325203, + "grad_norm": 41.53309631347656, + "learning_rate": 1.7068697118574064e-06, + "loss": 0.4098, + "num_input_tokens_seen": 16002752, + "step": 5080 + }, + { + "epoch": 0.3255233339734972, + "grad_norm": 25.390241622924805, + "learning_rate": 1.7060789072620816e-06, + "loss": 0.4931, + "num_input_tokens_seen": 16018112, + "step": 5085 + }, + { + "epoch": 0.32584341591447413, + "grad_norm": 26.28127098083496, + "learning_rate": 1.7052872211189509e-06, + "loss": 0.4288, + "num_input_tokens_seen": 16033984, + "step": 5090 + }, + { + "epoch": 0.326163497855451, + "grad_norm": 24.156646728515625, + "learning_rate": 1.7044946544164431e-06, + "loss": 0.3304, + "num_input_tokens_seen": 16049536, + "step": 5095 + }, + { + "epoch": 0.3264835797964279, + "grad_norm": 35.91144943237305, + "learning_rate": 1.703701208144088e-06, + "loss": 0.3713, + "num_input_tokens_seen": 16066304, + "step": 5100 + }, + { + "epoch": 0.32680366173740477, + "grad_norm": 45.723236083984375, + "learning_rate": 1.702906883292512e-06, + "loss": 0.4829, + "num_input_tokens_seen": 16081536, + "step": 5105 + }, + { + "epoch": 0.32712374367838165, + "grad_norm": 27.605358123779297, + "learning_rate": 1.7021116808534393e-06, + "loss": 0.5586, + "num_input_tokens_seen": 16096896, + "step": 5110 + }, + { + "epoch": 0.32744382561935853, + "grad_norm": 46.61053466796875, + "learning_rate": 1.7013156018196893e-06, + "loss": 0.443, + "num_input_tokens_seen": 16112960, + "step": 5115 + }, + { + "epoch": 0.32776390756033547, + "grad_norm": 35.758201599121094, + "learning_rate": 1.7005186471851759e-06, + "loss": 0.4038, + "num_input_tokens_seen": 16129344, + "step": 5120 + }, + { + "epoch": 0.32808398950131235, + "grad_norm": 24.61606216430664, + "learning_rate": 1.6997208179449066e-06, + "loss": 0.6052, + "num_input_tokens_seen": 16147776, + "step": 5125 + }, + { + "epoch": 0.3284040714422892, + "grad_norm": 42.087249755859375, + "learning_rate": 1.6989221150949806e-06, + "loss": 0.3508, + "num_input_tokens_seen": 16162880, + "step": 5130 + }, + { + "epoch": 0.3287241533832661, + "grad_norm": 18.78113555908203, + "learning_rate": 1.6981225396325873e-06, + "loss": 0.2676, + "num_input_tokens_seen": 16179392, + "step": 5135 + }, + { + "epoch": 0.329044235324243, + "grad_norm": 38.857826232910156, + "learning_rate": 1.6973220925560067e-06, + "loss": 0.504, + "num_input_tokens_seen": 16194560, + "step": 5140 + }, + { + "epoch": 0.3293643172652199, + "grad_norm": 51.61846923828125, + "learning_rate": 1.696520774864606e-06, + "loss": 0.4243, + "num_input_tokens_seen": 16210112, + "step": 5145 + }, + { + "epoch": 0.3296843992061968, + "grad_norm": 55.612979888916016, + "learning_rate": 1.69571858755884e-06, + "loss": 0.464, + "num_input_tokens_seen": 16225856, + "step": 5150 + }, + { + "epoch": 0.3300044811471737, + "grad_norm": 25.002553939819336, + "learning_rate": 1.6949155316402487e-06, + "loss": 0.4314, + "num_input_tokens_seen": 16241536, + "step": 5155 + }, + { + "epoch": 0.33032456308815056, + "grad_norm": 35.29892349243164, + "learning_rate": 1.6941116081114566e-06, + "loss": 0.3807, + "num_input_tokens_seen": 16256384, + "step": 5160 + }, + { + "epoch": 0.33064464502912744, + "grad_norm": 39.39152908325195, + "learning_rate": 1.6933068179761722e-06, + "loss": 0.398, + "num_input_tokens_seen": 16271360, + "step": 5165 + }, + { + "epoch": 0.3309647269701044, + "grad_norm": 27.22661781311035, + "learning_rate": 1.6925011622391857e-06, + "loss": 0.4122, + "num_input_tokens_seen": 16286656, + "step": 5170 + }, + { + "epoch": 0.33128480891108125, + "grad_norm": 19.13918685913086, + "learning_rate": 1.6916946419063667e-06, + "loss": 0.4255, + "num_input_tokens_seen": 16302592, + "step": 5175 + }, + { + "epoch": 0.33160489085205813, + "grad_norm": 27.572052001953125, + "learning_rate": 1.690887257984666e-06, + "loss": 0.5442, + "num_input_tokens_seen": 16318656, + "step": 5180 + }, + { + "epoch": 0.331924972793035, + "grad_norm": 27.06237030029297, + "learning_rate": 1.690079011482112e-06, + "loss": 0.4755, + "num_input_tokens_seen": 16334016, + "step": 5185 + }, + { + "epoch": 0.3322450547340119, + "grad_norm": 43.45961380004883, + "learning_rate": 1.6892699034078096e-06, + "loss": 0.5287, + "num_input_tokens_seen": 16349888, + "step": 5190 + }, + { + "epoch": 0.33256513667498877, + "grad_norm": 41.60029220581055, + "learning_rate": 1.68845993477194e-06, + "loss": 0.503, + "num_input_tokens_seen": 16365056, + "step": 5195 + }, + { + "epoch": 0.3328852186159657, + "grad_norm": 27.33567237854004, + "learning_rate": 1.6876491065857584e-06, + "loss": 0.3973, + "num_input_tokens_seen": 16380032, + "step": 5200 + }, + { + "epoch": 0.3332053005569426, + "grad_norm": 32.64432907104492, + "learning_rate": 1.6868374198615928e-06, + "loss": 0.6461, + "num_input_tokens_seen": 16394752, + "step": 5205 + }, + { + "epoch": 0.33352538249791946, + "grad_norm": 20.55217742919922, + "learning_rate": 1.6860248756128448e-06, + "loss": 0.4714, + "num_input_tokens_seen": 16410368, + "step": 5210 + }, + { + "epoch": 0.33384546443889634, + "grad_norm": 22.915000915527344, + "learning_rate": 1.6852114748539844e-06, + "loss": 0.4142, + "num_input_tokens_seen": 16425088, + "step": 5215 + }, + { + "epoch": 0.3341655463798732, + "grad_norm": 24.764463424682617, + "learning_rate": 1.6843972186005525e-06, + "loss": 0.3446, + "num_input_tokens_seen": 16441152, + "step": 5220 + }, + { + "epoch": 0.33448562832085016, + "grad_norm": 35.099388122558594, + "learning_rate": 1.6835821078691577e-06, + "loss": 0.4705, + "num_input_tokens_seen": 16458240, + "step": 5225 + }, + { + "epoch": 0.33480571026182704, + "grad_norm": 39.11796188354492, + "learning_rate": 1.6827661436774746e-06, + "loss": 0.4342, + "num_input_tokens_seen": 16474112, + "step": 5230 + }, + { + "epoch": 0.3351257922028039, + "grad_norm": 44.745906829833984, + "learning_rate": 1.681949327044245e-06, + "loss": 0.3957, + "num_input_tokens_seen": 16490560, + "step": 5235 + }, + { + "epoch": 0.3354458741437808, + "grad_norm": 70.94788360595703, + "learning_rate": 1.6811316589892734e-06, + "loss": 0.6821, + "num_input_tokens_seen": 16505728, + "step": 5240 + }, + { + "epoch": 0.3357659560847577, + "grad_norm": 24.731172561645508, + "learning_rate": 1.6803131405334284e-06, + "loss": 0.4364, + "num_input_tokens_seen": 16521856, + "step": 5245 + }, + { + "epoch": 0.3360860380257346, + "grad_norm": 31.324085235595703, + "learning_rate": 1.6794937726986396e-06, + "loss": 0.4436, + "num_input_tokens_seen": 16537792, + "step": 5250 + }, + { + "epoch": 0.3364061199667115, + "grad_norm": 42.410850524902344, + "learning_rate": 1.6786735565078974e-06, + "loss": 0.4347, + "num_input_tokens_seen": 16553408, + "step": 5255 + }, + { + "epoch": 0.33672620190768837, + "grad_norm": 27.741994857788086, + "learning_rate": 1.677852492985251e-06, + "loss": 0.4233, + "num_input_tokens_seen": 16570112, + "step": 5260 + }, + { + "epoch": 0.33704628384866525, + "grad_norm": 60.623374938964844, + "learning_rate": 1.6770305831558086e-06, + "loss": 0.5003, + "num_input_tokens_seen": 16586304, + "step": 5265 + }, + { + "epoch": 0.33736636578964213, + "grad_norm": 15.280008316040039, + "learning_rate": 1.6762078280457342e-06, + "loss": 0.3912, + "num_input_tokens_seen": 16601920, + "step": 5270 + }, + { + "epoch": 0.33768644773061907, + "grad_norm": 29.975696563720703, + "learning_rate": 1.6753842286822465e-06, + "loss": 0.4725, + "num_input_tokens_seen": 16618240, + "step": 5275 + }, + { + "epoch": 0.33800652967159595, + "grad_norm": 33.5026969909668, + "learning_rate": 1.6745597860936199e-06, + "loss": 0.5845, + "num_input_tokens_seen": 16633408, + "step": 5280 + }, + { + "epoch": 0.3383266116125728, + "grad_norm": 39.31660842895508, + "learning_rate": 1.6737345013091794e-06, + "loss": 0.4484, + "num_input_tokens_seen": 16649664, + "step": 5285 + }, + { + "epoch": 0.3386466935535497, + "grad_norm": 37.725284576416016, + "learning_rate": 1.672908375359304e-06, + "loss": 0.4686, + "num_input_tokens_seen": 16664896, + "step": 5290 + }, + { + "epoch": 0.3389667754945266, + "grad_norm": 49.54829406738281, + "learning_rate": 1.6720814092754209e-06, + "loss": 0.5565, + "num_input_tokens_seen": 16680384, + "step": 5295 + }, + { + "epoch": 0.33928685743550346, + "grad_norm": 22.089405059814453, + "learning_rate": 1.6712536040900075e-06, + "loss": 0.3785, + "num_input_tokens_seen": 16696192, + "step": 5300 + }, + { + "epoch": 0.3396069393764804, + "grad_norm": 26.588197708129883, + "learning_rate": 1.6704249608365878e-06, + "loss": 0.4741, + "num_input_tokens_seen": 16727104, + "step": 5305 + }, + { + "epoch": 0.3399270213174573, + "grad_norm": 29.5950870513916, + "learning_rate": 1.669595480549733e-06, + "loss": 0.4291, + "num_input_tokens_seen": 16741696, + "step": 5310 + }, + { + "epoch": 0.34024710325843416, + "grad_norm": 36.820213317871094, + "learning_rate": 1.6687651642650587e-06, + "loss": 0.4384, + "num_input_tokens_seen": 16757120, + "step": 5315 + }, + { + "epoch": 0.34056718519941104, + "grad_norm": 28.92207145690918, + "learning_rate": 1.6679340130192245e-06, + "loss": 0.4572, + "num_input_tokens_seen": 16772416, + "step": 5320 + }, + { + "epoch": 0.3408872671403879, + "grad_norm": 26.009944915771484, + "learning_rate": 1.667102027849933e-06, + "loss": 0.3287, + "num_input_tokens_seen": 16788352, + "step": 5325 + }, + { + "epoch": 0.34120734908136485, + "grad_norm": 38.97822952270508, + "learning_rate": 1.6662692097959266e-06, + "loss": 0.3582, + "num_input_tokens_seen": 16803648, + "step": 5330 + }, + { + "epoch": 0.34152743102234173, + "grad_norm": 49.21427536010742, + "learning_rate": 1.6654355598969894e-06, + "loss": 0.4741, + "num_input_tokens_seen": 16818944, + "step": 5335 + }, + { + "epoch": 0.3418475129633186, + "grad_norm": 33.456058502197266, + "learning_rate": 1.6646010791939423e-06, + "loss": 0.5007, + "num_input_tokens_seen": 16833984, + "step": 5340 + }, + { + "epoch": 0.3421675949042955, + "grad_norm": 29.826610565185547, + "learning_rate": 1.6637657687286446e-06, + "loss": 0.5632, + "num_input_tokens_seen": 16849280, + "step": 5345 + }, + { + "epoch": 0.34248767684527237, + "grad_norm": 30.897554397583008, + "learning_rate": 1.6629296295439912e-06, + "loss": 0.4051, + "num_input_tokens_seen": 16865664, + "step": 5350 + }, + { + "epoch": 0.3428077587862493, + "grad_norm": 46.125911712646484, + "learning_rate": 1.6620926626839116e-06, + "loss": 0.4945, + "num_input_tokens_seen": 16881536, + "step": 5355 + }, + { + "epoch": 0.3431278407272262, + "grad_norm": 27.395605087280273, + "learning_rate": 1.661254869193369e-06, + "loss": 0.4456, + "num_input_tokens_seen": 16898816, + "step": 5360 + }, + { + "epoch": 0.34344792266820307, + "grad_norm": 46.8023796081543, + "learning_rate": 1.6604162501183581e-06, + "loss": 0.5174, + "num_input_tokens_seen": 16915136, + "step": 5365 + }, + { + "epoch": 0.34376800460917994, + "grad_norm": 31.79302406311035, + "learning_rate": 1.6595768065059045e-06, + "loss": 0.4742, + "num_input_tokens_seen": 16931200, + "step": 5370 + }, + { + "epoch": 0.3440880865501568, + "grad_norm": 28.743654251098633, + "learning_rate": 1.6587365394040641e-06, + "loss": 0.4691, + "num_input_tokens_seen": 16946816, + "step": 5375 + }, + { + "epoch": 0.3444081684911337, + "grad_norm": 29.590286254882812, + "learning_rate": 1.6578954498619195e-06, + "loss": 0.3826, + "num_input_tokens_seen": 16962880, + "step": 5380 + }, + { + "epoch": 0.34472825043211064, + "grad_norm": 32.09335708618164, + "learning_rate": 1.6570535389295814e-06, + "loss": 0.4712, + "num_input_tokens_seen": 16978240, + "step": 5385 + }, + { + "epoch": 0.3450483323730875, + "grad_norm": 21.793235778808594, + "learning_rate": 1.6562108076581853e-06, + "loss": 0.3684, + "num_input_tokens_seen": 16993728, + "step": 5390 + }, + { + "epoch": 0.3453684143140644, + "grad_norm": 40.34245681762695, + "learning_rate": 1.6553672570998912e-06, + "loss": 0.5846, + "num_input_tokens_seen": 17009728, + "step": 5395 + }, + { + "epoch": 0.3456884962550413, + "grad_norm": 37.5211067199707, + "learning_rate": 1.6545228883078815e-06, + "loss": 0.414, + "num_input_tokens_seen": 17024640, + "step": 5400 + }, + { + "epoch": 0.34600857819601816, + "grad_norm": 41.48558807373047, + "learning_rate": 1.653677702336361e-06, + "loss": 0.36, + "num_input_tokens_seen": 17040512, + "step": 5405 + }, + { + "epoch": 0.3463286601369951, + "grad_norm": 20.21353530883789, + "learning_rate": 1.6528317002405538e-06, + "loss": 0.4801, + "num_input_tokens_seen": 17056064, + "step": 5410 + }, + { + "epoch": 0.34664874207797197, + "grad_norm": 31.45917320251465, + "learning_rate": 1.6519848830767043e-06, + "loss": 0.3685, + "num_input_tokens_seen": 17072448, + "step": 5415 + }, + { + "epoch": 0.34696882401894885, + "grad_norm": 43.27189254760742, + "learning_rate": 1.6511372519020726e-06, + "loss": 0.6228, + "num_input_tokens_seen": 17088320, + "step": 5420 + }, + { + "epoch": 0.34728890595992573, + "grad_norm": 36.178993225097656, + "learning_rate": 1.650288807774937e-06, + "loss": 0.4376, + "num_input_tokens_seen": 17104448, + "step": 5425 + }, + { + "epoch": 0.3476089879009026, + "grad_norm": 35.5722770690918, + "learning_rate": 1.6494395517545893e-06, + "loss": 0.3981, + "num_input_tokens_seen": 17121856, + "step": 5430 + }, + { + "epoch": 0.34792906984187955, + "grad_norm": 48.720191955566406, + "learning_rate": 1.6485894849013362e-06, + "loss": 0.5135, + "num_input_tokens_seen": 17136512, + "step": 5435 + }, + { + "epoch": 0.3482491517828564, + "grad_norm": 25.877552032470703, + "learning_rate": 1.6477386082764961e-06, + "loss": 0.4487, + "num_input_tokens_seen": 17152640, + "step": 5440 + }, + { + "epoch": 0.3485692337238333, + "grad_norm": 28.085277557373047, + "learning_rate": 1.6468869229423983e-06, + "loss": 0.3645, + "num_input_tokens_seen": 17167680, + "step": 5445 + }, + { + "epoch": 0.3488893156648102, + "grad_norm": 59.84387969970703, + "learning_rate": 1.6460344299623813e-06, + "loss": 0.6431, + "num_input_tokens_seen": 17183296, + "step": 5450 + }, + { + "epoch": 0.34920939760578706, + "grad_norm": 56.25507354736328, + "learning_rate": 1.6451811304007939e-06, + "loss": 0.5412, + "num_input_tokens_seen": 17198272, + "step": 5455 + }, + { + "epoch": 0.349529479546764, + "grad_norm": 46.85531997680664, + "learning_rate": 1.6443270253229895e-06, + "loss": 0.5194, + "num_input_tokens_seen": 17213376, + "step": 5460 + }, + { + "epoch": 0.3498495614877409, + "grad_norm": 39.22257995605469, + "learning_rate": 1.6434721157953288e-06, + "loss": 0.4614, + "num_input_tokens_seen": 17229632, + "step": 5465 + }, + { + "epoch": 0.35016964342871776, + "grad_norm": 34.838531494140625, + "learning_rate": 1.6426164028851765e-06, + "loss": 0.5873, + "num_input_tokens_seen": 17245696, + "step": 5470 + }, + { + "epoch": 0.3504257089814993, + "eval_loss": 0.44318872690200806, + "eval_runtime": 50.6001, + "eval_samples_per_second": 274.427, + "eval_steps_per_second": 34.308, + "num_input_tokens_seen": 17259840, + "step": 5474 + }, + { + "epoch": 0.35048972536969464, + "grad_norm": 28.226123809814453, + "learning_rate": 1.6417598876609002e-06, + "loss": 0.3797, + "num_input_tokens_seen": 17262976, + "step": 5475 + }, + { + "epoch": 0.3508098073106715, + "grad_norm": 37.542503356933594, + "learning_rate": 1.640902571191869e-06, + "loss": 0.4144, + "num_input_tokens_seen": 17278336, + "step": 5480 + }, + { + "epoch": 0.3511298892516484, + "grad_norm": 40.7253303527832, + "learning_rate": 1.6400444545484524e-06, + "loss": 0.3617, + "num_input_tokens_seen": 17293248, + "step": 5485 + }, + { + "epoch": 0.35144997119262533, + "grad_norm": 21.151514053344727, + "learning_rate": 1.6391855388020193e-06, + "loss": 0.428, + "num_input_tokens_seen": 17309184, + "step": 5490 + }, + { + "epoch": 0.3517700531336022, + "grad_norm": 35.13167953491211, + "learning_rate": 1.6383258250249363e-06, + "loss": 0.4654, + "num_input_tokens_seen": 17325248, + "step": 5495 + }, + { + "epoch": 0.3520901350745791, + "grad_norm": 19.110126495361328, + "learning_rate": 1.6374653142905661e-06, + "loss": 0.4297, + "num_input_tokens_seen": 17340736, + "step": 5500 + }, + { + "epoch": 0.35241021701555597, + "grad_norm": 35.75419235229492, + "learning_rate": 1.6366040076732662e-06, + "loss": 0.4224, + "num_input_tokens_seen": 17355904, + "step": 5505 + }, + { + "epoch": 0.35273029895653285, + "grad_norm": 28.760461807250977, + "learning_rate": 1.6357419062483882e-06, + "loss": 0.4675, + "num_input_tokens_seen": 17371264, + "step": 5510 + }, + { + "epoch": 0.3530503808975098, + "grad_norm": 25.240421295166016, + "learning_rate": 1.6348790110922758e-06, + "loss": 0.4268, + "num_input_tokens_seen": 17388608, + "step": 5515 + }, + { + "epoch": 0.35337046283848667, + "grad_norm": 28.650354385375977, + "learning_rate": 1.6340153232822635e-06, + "loss": 0.4558, + "num_input_tokens_seen": 17403712, + "step": 5520 + }, + { + "epoch": 0.35369054477946354, + "grad_norm": 44.04157257080078, + "learning_rate": 1.633150843896676e-06, + "loss": 0.5137, + "num_input_tokens_seen": 17421056, + "step": 5525 + }, + { + "epoch": 0.3540106267204404, + "grad_norm": 56.921592712402344, + "learning_rate": 1.6322855740148263e-06, + "loss": 0.5658, + "num_input_tokens_seen": 17436096, + "step": 5530 + }, + { + "epoch": 0.3543307086614173, + "grad_norm": 25.134639739990234, + "learning_rate": 1.6314195147170132e-06, + "loss": 0.3768, + "num_input_tokens_seen": 17452480, + "step": 5535 + }, + { + "epoch": 0.35465079060239424, + "grad_norm": 29.89691162109375, + "learning_rate": 1.6305526670845225e-06, + "loss": 0.4032, + "num_input_tokens_seen": 17467776, + "step": 5540 + }, + { + "epoch": 0.3549708725433711, + "grad_norm": 46.79875564575195, + "learning_rate": 1.6296850321996232e-06, + "loss": 0.4877, + "num_input_tokens_seen": 17482752, + "step": 5545 + }, + { + "epoch": 0.355290954484348, + "grad_norm": 34.09406280517578, + "learning_rate": 1.6288166111455683e-06, + "loss": 0.3843, + "num_input_tokens_seen": 17497792, + "step": 5550 + }, + { + "epoch": 0.3556110364253249, + "grad_norm": 23.421165466308594, + "learning_rate": 1.6279474050065906e-06, + "loss": 0.4878, + "num_input_tokens_seen": 17513024, + "step": 5555 + }, + { + "epoch": 0.35593111836630176, + "grad_norm": 27.991254806518555, + "learning_rate": 1.6270774148679054e-06, + "loss": 0.4049, + "num_input_tokens_seen": 17529024, + "step": 5560 + }, + { + "epoch": 0.35625120030727864, + "grad_norm": 17.627593994140625, + "learning_rate": 1.6262066418157048e-06, + "loss": 0.3788, + "num_input_tokens_seen": 17543936, + "step": 5565 + }, + { + "epoch": 0.35657128224825557, + "grad_norm": 51.489200592041016, + "learning_rate": 1.6253350869371595e-06, + "loss": 0.5444, + "num_input_tokens_seen": 17559168, + "step": 5570 + }, + { + "epoch": 0.35689136418923245, + "grad_norm": 35.092872619628906, + "learning_rate": 1.6244627513204158e-06, + "loss": 0.3861, + "num_input_tokens_seen": 17574912, + "step": 5575 + }, + { + "epoch": 0.35721144613020933, + "grad_norm": 23.55853843688965, + "learning_rate": 1.6235896360545954e-06, + "loss": 0.4319, + "num_input_tokens_seen": 17590272, + "step": 5580 + }, + { + "epoch": 0.3575315280711862, + "grad_norm": 42.771095275878906, + "learning_rate": 1.622715742229792e-06, + "loss": 0.4466, + "num_input_tokens_seen": 17605952, + "step": 5585 + }, + { + "epoch": 0.3578516100121631, + "grad_norm": 21.996267318725586, + "learning_rate": 1.6218410709370734e-06, + "loss": 0.3861, + "num_input_tokens_seen": 17621120, + "step": 5590 + }, + { + "epoch": 0.35817169195314, + "grad_norm": 43.53791046142578, + "learning_rate": 1.6209656232684768e-06, + "loss": 0.5462, + "num_input_tokens_seen": 17636096, + "step": 5595 + }, + { + "epoch": 0.3584917738941169, + "grad_norm": 88.72663116455078, + "learning_rate": 1.620089400317008e-06, + "loss": 0.4566, + "num_input_tokens_seen": 17652672, + "step": 5600 + }, + { + "epoch": 0.3588118558350938, + "grad_norm": 35.793540954589844, + "learning_rate": 1.6192124031766425e-06, + "loss": 0.4979, + "num_input_tokens_seen": 17668032, + "step": 5605 + }, + { + "epoch": 0.35913193777607066, + "grad_norm": 29.273569107055664, + "learning_rate": 1.6183346329423213e-06, + "loss": 0.4507, + "num_input_tokens_seen": 17683264, + "step": 5610 + }, + { + "epoch": 0.35945201971704754, + "grad_norm": 52.97650146484375, + "learning_rate": 1.6174560907099508e-06, + "loss": 0.3672, + "num_input_tokens_seen": 17699200, + "step": 5615 + }, + { + "epoch": 0.3597721016580245, + "grad_norm": 23.672475814819336, + "learning_rate": 1.6165767775764013e-06, + "loss": 0.3538, + "num_input_tokens_seen": 17714816, + "step": 5620 + }, + { + "epoch": 0.36009218359900136, + "grad_norm": 38.058650970458984, + "learning_rate": 1.6156966946395056e-06, + "loss": 0.4157, + "num_input_tokens_seen": 17732352, + "step": 5625 + }, + { + "epoch": 0.36041226553997824, + "grad_norm": 54.641357421875, + "learning_rate": 1.6148158429980577e-06, + "loss": 0.536, + "num_input_tokens_seen": 17748288, + "step": 5630 + }, + { + "epoch": 0.3607323474809551, + "grad_norm": 42.360755920410156, + "learning_rate": 1.6139342237518108e-06, + "loss": 0.3758, + "num_input_tokens_seen": 17763520, + "step": 5635 + }, + { + "epoch": 0.361052429421932, + "grad_norm": 33.51826095581055, + "learning_rate": 1.6130518380014773e-06, + "loss": 0.4256, + "num_input_tokens_seen": 17779328, + "step": 5640 + }, + { + "epoch": 0.3613725113629089, + "grad_norm": 36.83528137207031, + "learning_rate": 1.6121686868487259e-06, + "loss": 0.4313, + "num_input_tokens_seen": 17795584, + "step": 5645 + }, + { + "epoch": 0.3616925933038858, + "grad_norm": 17.509504318237305, + "learning_rate": 1.6112847713961815e-06, + "loss": 0.4449, + "num_input_tokens_seen": 17810368, + "step": 5650 + }, + { + "epoch": 0.3620126752448627, + "grad_norm": 28.996376037597656, + "learning_rate": 1.610400092747423e-06, + "loss": 0.4365, + "num_input_tokens_seen": 17826496, + "step": 5655 + }, + { + "epoch": 0.36233275718583957, + "grad_norm": 31.747772216796875, + "learning_rate": 1.609514652006981e-06, + "loss": 0.4266, + "num_input_tokens_seen": 17841344, + "step": 5660 + }, + { + "epoch": 0.36265283912681645, + "grad_norm": 32.14071273803711, + "learning_rate": 1.60862845028034e-06, + "loss": 0.5632, + "num_input_tokens_seen": 17857408, + "step": 5665 + }, + { + "epoch": 0.36297292106779333, + "grad_norm": 24.347280502319336, + "learning_rate": 1.6077414886739327e-06, + "loss": 0.4209, + "num_input_tokens_seen": 17873280, + "step": 5670 + }, + { + "epoch": 0.36329300300877027, + "grad_norm": 22.06682586669922, + "learning_rate": 1.6068537682951412e-06, + "loss": 0.5023, + "num_input_tokens_seen": 17888448, + "step": 5675 + }, + { + "epoch": 0.36361308494974715, + "grad_norm": 28.86912727355957, + "learning_rate": 1.6059652902522947e-06, + "loss": 0.4459, + "num_input_tokens_seen": 17904320, + "step": 5680 + }, + { + "epoch": 0.363933166890724, + "grad_norm": 50.29701232910156, + "learning_rate": 1.6050760556546683e-06, + "loss": 0.3725, + "num_input_tokens_seen": 17919744, + "step": 5685 + }, + { + "epoch": 0.3642532488317009, + "grad_norm": 26.409318923950195, + "learning_rate": 1.6041860656124823e-06, + "loss": 0.3823, + "num_input_tokens_seen": 17934656, + "step": 5690 + }, + { + "epoch": 0.3645733307726778, + "grad_norm": 40.44452667236328, + "learning_rate": 1.6032953212368993e-06, + "loss": 0.5608, + "num_input_tokens_seen": 17950976, + "step": 5695 + }, + { + "epoch": 0.3648934127136547, + "grad_norm": 24.44096565246582, + "learning_rate": 1.6024038236400243e-06, + "loss": 0.465, + "num_input_tokens_seen": 17966400, + "step": 5700 + }, + { + "epoch": 0.3652134946546316, + "grad_norm": 122.20569610595703, + "learning_rate": 1.6015115739349027e-06, + "loss": 0.5704, + "num_input_tokens_seen": 17983872, + "step": 5705 + }, + { + "epoch": 0.3655335765956085, + "grad_norm": 33.78934860229492, + "learning_rate": 1.6006185732355183e-06, + "loss": 0.5358, + "num_input_tokens_seen": 17999680, + "step": 5710 + }, + { + "epoch": 0.36585365853658536, + "grad_norm": 22.399660110473633, + "learning_rate": 1.5997248226567931e-06, + "loss": 0.3807, + "num_input_tokens_seen": 18014784, + "step": 5715 + }, + { + "epoch": 0.36617374047756224, + "grad_norm": 25.733877182006836, + "learning_rate": 1.5988303233145853e-06, + "loss": 0.5063, + "num_input_tokens_seen": 18029888, + "step": 5720 + }, + { + "epoch": 0.3664938224185392, + "grad_norm": 30.036779403686523, + "learning_rate": 1.597935076325688e-06, + "loss": 0.3721, + "num_input_tokens_seen": 18045632, + "step": 5725 + }, + { + "epoch": 0.36681390435951605, + "grad_norm": 42.842872619628906, + "learning_rate": 1.5970390828078272e-06, + "loss": 0.5996, + "num_input_tokens_seen": 18060928, + "step": 5730 + }, + { + "epoch": 0.36713398630049293, + "grad_norm": 17.993152618408203, + "learning_rate": 1.5961423438796615e-06, + "loss": 0.4616, + "num_input_tokens_seen": 18076352, + "step": 5735 + }, + { + "epoch": 0.3674540682414698, + "grad_norm": 42.84749984741211, + "learning_rate": 1.59524486066078e-06, + "loss": 0.45, + "num_input_tokens_seen": 18092096, + "step": 5740 + }, + { + "epoch": 0.3677741501824467, + "grad_norm": 29.25870132446289, + "learning_rate": 1.5943466342717012e-06, + "loss": 0.5875, + "num_input_tokens_seen": 18107648, + "step": 5745 + }, + { + "epoch": 0.36809423212342357, + "grad_norm": 28.201173782348633, + "learning_rate": 1.5934476658338708e-06, + "loss": 0.4526, + "num_input_tokens_seen": 18123264, + "step": 5750 + }, + { + "epoch": 0.3684143140644005, + "grad_norm": 29.33237075805664, + "learning_rate": 1.5925479564696619e-06, + "loss": 0.5482, + "num_input_tokens_seen": 18138368, + "step": 5755 + }, + { + "epoch": 0.3687343960053774, + "grad_norm": 12.648244857788086, + "learning_rate": 1.5916475073023721e-06, + "loss": 0.3433, + "num_input_tokens_seen": 18154432, + "step": 5760 + }, + { + "epoch": 0.36905447794635426, + "grad_norm": 35.97001266479492, + "learning_rate": 1.5907463194562226e-06, + "loss": 0.3385, + "num_input_tokens_seen": 18171200, + "step": 5765 + }, + { + "epoch": 0.36937455988733114, + "grad_norm": 24.797889709472656, + "learning_rate": 1.589844394056357e-06, + "loss": 0.3763, + "num_input_tokens_seen": 18187008, + "step": 5770 + }, + { + "epoch": 0.369694641828308, + "grad_norm": 50.53974914550781, + "learning_rate": 1.5889417322288403e-06, + "loss": 0.3462, + "num_input_tokens_seen": 18202944, + "step": 5775 + }, + { + "epoch": 0.37001472376928496, + "grad_norm": 84.17024993896484, + "learning_rate": 1.5880383351006556e-06, + "loss": 0.4963, + "num_input_tokens_seen": 18217984, + "step": 5780 + }, + { + "epoch": 0.37033480571026184, + "grad_norm": 29.01326560974121, + "learning_rate": 1.5871342037997055e-06, + "loss": 0.5257, + "num_input_tokens_seen": 18233984, + "step": 5785 + }, + { + "epoch": 0.3706548876512387, + "grad_norm": 52.05530548095703, + "learning_rate": 1.5862293394548082e-06, + "loss": 0.416, + "num_input_tokens_seen": 18249024, + "step": 5790 + }, + { + "epoch": 0.3709749695922156, + "grad_norm": 72.29715728759766, + "learning_rate": 1.5853237431956972e-06, + "loss": 0.3512, + "num_input_tokens_seen": 18264256, + "step": 5795 + }, + { + "epoch": 0.3712950515331925, + "grad_norm": 43.78818893432617, + "learning_rate": 1.5844174161530206e-06, + "loss": 0.554, + "num_input_tokens_seen": 18279936, + "step": 5800 + }, + { + "epoch": 0.3716151334741694, + "grad_norm": 26.14434814453125, + "learning_rate": 1.5835103594583382e-06, + "loss": 0.4147, + "num_input_tokens_seen": 18295488, + "step": 5805 + }, + { + "epoch": 0.3719352154151463, + "grad_norm": 26.584754943847656, + "learning_rate": 1.5826025742441207e-06, + "loss": 0.5357, + "num_input_tokens_seen": 18311360, + "step": 5810 + }, + { + "epoch": 0.37225529735612317, + "grad_norm": 28.070344924926758, + "learning_rate": 1.5816940616437486e-06, + "loss": 0.4282, + "num_input_tokens_seen": 18326592, + "step": 5815 + }, + { + "epoch": 0.37257537929710005, + "grad_norm": 36.15549850463867, + "learning_rate": 1.5807848227915108e-06, + "loss": 0.3564, + "num_input_tokens_seen": 18344000, + "step": 5820 + }, + { + "epoch": 0.37289546123807693, + "grad_norm": 63.37150955200195, + "learning_rate": 1.5798748588226028e-06, + "loss": 0.4888, + "num_input_tokens_seen": 18359872, + "step": 5825 + }, + { + "epoch": 0.3732155431790538, + "grad_norm": 36.90925216674805, + "learning_rate": 1.578964170873125e-06, + "loss": 0.472, + "num_input_tokens_seen": 18374400, + "step": 5830 + }, + { + "epoch": 0.37353562512003075, + "grad_norm": 19.994869232177734, + "learning_rate": 1.5780527600800816e-06, + "loss": 0.2731, + "num_input_tokens_seen": 18390656, + "step": 5835 + }, + { + "epoch": 0.3738557070610076, + "grad_norm": 66.3774185180664, + "learning_rate": 1.5771406275813808e-06, + "loss": 0.4561, + "num_input_tokens_seen": 18406400, + "step": 5840 + }, + { + "epoch": 0.3741757890019845, + "grad_norm": 54.15401840209961, + "learning_rate": 1.5762277745158297e-06, + "loss": 0.5531, + "num_input_tokens_seen": 18422848, + "step": 5845 + }, + { + "epoch": 0.3744958709429614, + "grad_norm": 93.29429626464844, + "learning_rate": 1.5753142020231365e-06, + "loss": 0.5008, + "num_input_tokens_seen": 18438912, + "step": 5850 + }, + { + "epoch": 0.37481595288393826, + "grad_norm": 40.965824127197266, + "learning_rate": 1.5743999112439073e-06, + "loss": 0.5494, + "num_input_tokens_seen": 18455488, + "step": 5855 + }, + { + "epoch": 0.3751360348249152, + "grad_norm": 41.4587516784668, + "learning_rate": 1.5734849033196446e-06, + "loss": 0.4015, + "num_input_tokens_seen": 18470080, + "step": 5860 + }, + { + "epoch": 0.3754561167658921, + "grad_norm": 41.16543197631836, + "learning_rate": 1.5725691793927468e-06, + "loss": 0.4426, + "num_input_tokens_seen": 18484480, + "step": 5865 + }, + { + "epoch": 0.37577619870686896, + "grad_norm": 24.49448585510254, + "learning_rate": 1.5716527406065057e-06, + "loss": 0.4731, + "num_input_tokens_seen": 18501312, + "step": 5870 + }, + { + "epoch": 0.37609628064784584, + "grad_norm": 26.67026138305664, + "learning_rate": 1.570735588105106e-06, + "loss": 0.4582, + "num_input_tokens_seen": 18515968, + "step": 5875 + }, + { + "epoch": 0.3764163625888227, + "grad_norm": 17.646738052368164, + "learning_rate": 1.5698177230336234e-06, + "loss": 0.3808, + "num_input_tokens_seen": 18531200, + "step": 5880 + }, + { + "epoch": 0.37673644452979965, + "grad_norm": 34.10994338989258, + "learning_rate": 1.568899146538023e-06, + "loss": 0.2686, + "num_input_tokens_seen": 18547712, + "step": 5885 + }, + { + "epoch": 0.37705652647077653, + "grad_norm": 28.377954483032227, + "learning_rate": 1.5679798597651587e-06, + "loss": 0.4112, + "num_input_tokens_seen": 18562752, + "step": 5890 + }, + { + "epoch": 0.3773766084117534, + "grad_norm": 42.726253509521484, + "learning_rate": 1.5670598638627706e-06, + "loss": 0.4375, + "num_input_tokens_seen": 18578368, + "step": 5895 + }, + { + "epoch": 0.3776966903527303, + "grad_norm": 42.27223587036133, + "learning_rate": 1.5661391599794847e-06, + "loss": 0.3833, + "num_input_tokens_seen": 18593408, + "step": 5900 + }, + { + "epoch": 0.37801677229370717, + "grad_norm": 29.259695053100586, + "learning_rate": 1.56521774926481e-06, + "loss": 0.4148, + "num_input_tokens_seen": 18607872, + "step": 5905 + }, + { + "epoch": 0.3783368542346841, + "grad_norm": 25.923545837402344, + "learning_rate": 1.5642956328691393e-06, + "loss": 0.359, + "num_input_tokens_seen": 18624000, + "step": 5910 + }, + { + "epoch": 0.378656936175661, + "grad_norm": 53.90917205810547, + "learning_rate": 1.5633728119437451e-06, + "loss": 0.5591, + "num_input_tokens_seen": 18640704, + "step": 5915 + }, + { + "epoch": 0.37897701811663786, + "grad_norm": 30.155330657958984, + "learning_rate": 1.5624492876407807e-06, + "loss": 0.472, + "num_input_tokens_seen": 18658368, + "step": 5920 + }, + { + "epoch": 0.37929710005761474, + "grad_norm": 47.06120681762695, + "learning_rate": 1.5615250611132766e-06, + "loss": 0.411, + "num_input_tokens_seen": 18675584, + "step": 5925 + }, + { + "epoch": 0.3796171819985916, + "grad_norm": 25.19417381286621, + "learning_rate": 1.5606001335151405e-06, + "loss": 0.5683, + "num_input_tokens_seen": 18691904, + "step": 5930 + }, + { + "epoch": 0.3799372639395685, + "grad_norm": 36.34967803955078, + "learning_rate": 1.5596745060011561e-06, + "loss": 0.3734, + "num_input_tokens_seen": 18708736, + "step": 5935 + }, + { + "epoch": 0.38025734588054544, + "grad_norm": 36.44337844848633, + "learning_rate": 1.5587481797269793e-06, + "loss": 0.3492, + "num_input_tokens_seen": 18724032, + "step": 5940 + }, + { + "epoch": 0.3805774278215223, + "grad_norm": 39.71046447753906, + "learning_rate": 1.5578211558491396e-06, + "loss": 0.4266, + "num_input_tokens_seen": 18740352, + "step": 5945 + }, + { + "epoch": 0.3808975097624992, + "grad_norm": 26.960233688354492, + "learning_rate": 1.5568934355250375e-06, + "loss": 0.3346, + "num_input_tokens_seen": 18754560, + "step": 5950 + }, + { + "epoch": 0.3812175917034761, + "grad_norm": 69.33877563476562, + "learning_rate": 1.5559650199129423e-06, + "loss": 0.6693, + "num_input_tokens_seen": 18769280, + "step": 5955 + }, + { + "epoch": 0.38153767364445296, + "grad_norm": 53.183929443359375, + "learning_rate": 1.5550359101719921e-06, + "loss": 0.4131, + "num_input_tokens_seen": 18784512, + "step": 5960 + }, + { + "epoch": 0.3818577555854299, + "grad_norm": 62.83601379394531, + "learning_rate": 1.554106107462191e-06, + "loss": 0.3615, + "num_input_tokens_seen": 18800384, + "step": 5965 + }, + { + "epoch": 0.38217783752640677, + "grad_norm": 42.66127014160156, + "learning_rate": 1.5531756129444092e-06, + "loss": 0.4262, + "num_input_tokens_seen": 18815552, + "step": 5970 + }, + { + "epoch": 0.38249791946738365, + "grad_norm": 26.47112464904785, + "learning_rate": 1.5522444277803796e-06, + "loss": 0.4191, + "num_input_tokens_seen": 18830080, + "step": 5975 + }, + { + "epoch": 0.38281800140836053, + "grad_norm": 33.1724967956543, + "learning_rate": 1.5513125531326976e-06, + "loss": 0.4244, + "num_input_tokens_seen": 18846272, + "step": 5980 + }, + { + "epoch": 0.3831380833493374, + "grad_norm": 28.296476364135742, + "learning_rate": 1.5503799901648198e-06, + "loss": 0.3802, + "num_input_tokens_seen": 18860928, + "step": 5985 + }, + { + "epoch": 0.38345816529031435, + "grad_norm": 63.02308654785156, + "learning_rate": 1.5494467400410625e-06, + "loss": 0.4461, + "num_input_tokens_seen": 18877120, + "step": 5990 + }, + { + "epoch": 0.3837782472312912, + "grad_norm": 48.09440612792969, + "learning_rate": 1.5485128039265986e-06, + "loss": 0.6047, + "num_input_tokens_seen": 18892224, + "step": 5995 + }, + { + "epoch": 0.3840983291722681, + "grad_norm": 52.94186782836914, + "learning_rate": 1.547578182987459e-06, + "loss": 0.445, + "num_input_tokens_seen": 18907008, + "step": 6000 + }, + { + "epoch": 0.384418411113245, + "grad_norm": 22.386451721191406, + "learning_rate": 1.5466428783905286e-06, + "loss": 0.2856, + "num_input_tokens_seen": 18922368, + "step": 6005 + }, + { + "epoch": 0.38473849305422186, + "grad_norm": 33.801048278808594, + "learning_rate": 1.5457068913035463e-06, + "loss": 0.4418, + "num_input_tokens_seen": 18937536, + "step": 6010 + }, + { + "epoch": 0.38505857499519874, + "grad_norm": 40.13835906982422, + "learning_rate": 1.544770222895103e-06, + "loss": 0.5024, + "num_input_tokens_seen": 18954048, + "step": 6015 + }, + { + "epoch": 0.3853786569361757, + "grad_norm": 30.525466918945312, + "learning_rate": 1.5438328743346398e-06, + "loss": 0.5102, + "num_input_tokens_seen": 18969472, + "step": 6020 + }, + { + "epoch": 0.38569873887715256, + "grad_norm": 24.051631927490234, + "learning_rate": 1.5428948467924478e-06, + "loss": 0.4192, + "num_input_tokens_seen": 18983872, + "step": 6025 + }, + { + "epoch": 0.38601882081812944, + "grad_norm": 21.4184513092041, + "learning_rate": 1.5419561414396656e-06, + "loss": 0.3268, + "num_input_tokens_seen": 18999360, + "step": 6030 + }, + { + "epoch": 0.3863389027591063, + "grad_norm": 24.729501724243164, + "learning_rate": 1.541016759448277e-06, + "loss": 0.4969, + "num_input_tokens_seen": 19015424, + "step": 6035 + }, + { + "epoch": 0.3866589847000832, + "grad_norm": 32.38375473022461, + "learning_rate": 1.5400767019911124e-06, + "loss": 0.3775, + "num_input_tokens_seen": 19031616, + "step": 6040 + }, + { + "epoch": 0.38697906664106013, + "grad_norm": 31.431982040405273, + "learning_rate": 1.539135970241844e-06, + "loss": 0.4886, + "num_input_tokens_seen": 19047040, + "step": 6045 + }, + { + "epoch": 0.387299148582037, + "grad_norm": 53.52016067504883, + "learning_rate": 1.5381945653749866e-06, + "loss": 0.4842, + "num_input_tokens_seen": 19062848, + "step": 6050 + }, + { + "epoch": 0.3876192305230139, + "grad_norm": 81.80306243896484, + "learning_rate": 1.5372524885658952e-06, + "loss": 0.5516, + "num_input_tokens_seen": 19078976, + "step": 6055 + }, + { + "epoch": 0.38793931246399077, + "grad_norm": 26.691001892089844, + "learning_rate": 1.5363097409907638e-06, + "loss": 0.3732, + "num_input_tokens_seen": 19093632, + "step": 6060 + }, + { + "epoch": 0.38825939440496765, + "grad_norm": 24.735050201416016, + "learning_rate": 1.535366323826624e-06, + "loss": 0.3583, + "num_input_tokens_seen": 19109056, + "step": 6065 + }, + { + "epoch": 0.3885794763459446, + "grad_norm": 48.98762130737305, + "learning_rate": 1.534422238251343e-06, + "loss": 0.3623, + "num_input_tokens_seen": 19124544, + "step": 6070 + }, + { + "epoch": 0.38889955828692147, + "grad_norm": 35.63262939453125, + "learning_rate": 1.5334774854436223e-06, + "loss": 0.3844, + "num_input_tokens_seen": 19140480, + "step": 6075 + }, + { + "epoch": 0.38921964022789834, + "grad_norm": 37.14643859863281, + "learning_rate": 1.5325320665829975e-06, + "loss": 0.378, + "num_input_tokens_seen": 19156736, + "step": 6080 + }, + { + "epoch": 0.3895397221688752, + "grad_norm": 34.23735809326172, + "learning_rate": 1.5315859828498352e-06, + "loss": 0.4624, + "num_input_tokens_seen": 19171520, + "step": 6085 + }, + { + "epoch": 0.3898598041098521, + "grad_norm": 29.676471710205078, + "learning_rate": 1.5306392354253316e-06, + "loss": 0.5057, + "num_input_tokens_seen": 19187136, + "step": 6090 + }, + { + "epoch": 0.39017988605082904, + "grad_norm": 23.943275451660156, + "learning_rate": 1.5296918254915123e-06, + "loss": 0.4389, + "num_input_tokens_seen": 19201856, + "step": 6095 + }, + { + "epoch": 0.3904999679918059, + "grad_norm": 27.726776123046875, + "learning_rate": 1.5287437542312296e-06, + "loss": 0.3827, + "num_input_tokens_seen": 19216704, + "step": 6100 + }, + { + "epoch": 0.3908200499327828, + "grad_norm": 55.275352478027344, + "learning_rate": 1.5277950228281614e-06, + "loss": 0.5423, + "num_input_tokens_seen": 19233408, + "step": 6105 + }, + { + "epoch": 0.3911401318737597, + "grad_norm": 27.080801010131836, + "learning_rate": 1.52684563246681e-06, + "loss": 0.3617, + "num_input_tokens_seen": 19250048, + "step": 6110 + }, + { + "epoch": 0.39146021381473656, + "grad_norm": 18.170150756835938, + "learning_rate": 1.5258955843325015e-06, + "loss": 0.4241, + "num_input_tokens_seen": 19266560, + "step": 6115 + }, + { + "epoch": 0.39178029575571344, + "grad_norm": 60.83952713012695, + "learning_rate": 1.5249448796113804e-06, + "loss": 0.5018, + "num_input_tokens_seen": 19281408, + "step": 6120 + }, + { + "epoch": 0.39210037769669037, + "grad_norm": 48.25818634033203, + "learning_rate": 1.5239935194904141e-06, + "loss": 0.4797, + "num_input_tokens_seen": 19296384, + "step": 6125 + }, + { + "epoch": 0.39242045963766725, + "grad_norm": 24.223154067993164, + "learning_rate": 1.523041505157386e-06, + "loss": 0.3946, + "num_input_tokens_seen": 19312000, + "step": 6130 + }, + { + "epoch": 0.39274054157864413, + "grad_norm": 27.672351837158203, + "learning_rate": 1.5220888378008977e-06, + "loss": 0.395, + "num_input_tokens_seen": 19327488, + "step": 6135 + }, + { + "epoch": 0.393060623519621, + "grad_norm": 23.515743255615234, + "learning_rate": 1.5211355186103654e-06, + "loss": 0.4748, + "num_input_tokens_seen": 19342080, + "step": 6140 + }, + { + "epoch": 0.3933807054605979, + "grad_norm": 54.429962158203125, + "learning_rate": 1.5201815487760192e-06, + "loss": 0.4435, + "num_input_tokens_seen": 19358336, + "step": 6145 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 88.29194641113281, + "learning_rate": 1.5192269294889019e-06, + "loss": 0.5032, + "num_input_tokens_seen": 19373376, + "step": 6150 + }, + { + "epoch": 0.3940208693425517, + "grad_norm": 29.944011688232422, + "learning_rate": 1.5182716619408666e-06, + "loss": 0.4021, + "num_input_tokens_seen": 19388608, + "step": 6155 + }, + { + "epoch": 0.3943409512835286, + "grad_norm": 32.160797119140625, + "learning_rate": 1.5173157473245764e-06, + "loss": 0.5383, + "num_input_tokens_seen": 19403264, + "step": 6160 + }, + { + "epoch": 0.39466103322450546, + "grad_norm": 38.58219528198242, + "learning_rate": 1.5163591868335016e-06, + "loss": 0.4397, + "num_input_tokens_seen": 19418816, + "step": 6165 + }, + { + "epoch": 0.39498111516548234, + "grad_norm": 41.530364990234375, + "learning_rate": 1.515401981661919e-06, + "loss": 0.5856, + "num_input_tokens_seen": 19435392, + "step": 6170 + }, + { + "epoch": 0.3953011971064593, + "grad_norm": 33.935325622558594, + "learning_rate": 1.514444133004911e-06, + "loss": 0.4567, + "num_input_tokens_seen": 19450048, + "step": 6175 + }, + { + "epoch": 0.39562127904743616, + "grad_norm": 31.931150436401367, + "learning_rate": 1.5134856420583631e-06, + "loss": 0.465, + "num_input_tokens_seen": 19466368, + "step": 6180 + }, + { + "epoch": 0.39594136098841304, + "grad_norm": 25.44246482849121, + "learning_rate": 1.5125265100189614e-06, + "loss": 0.34, + "num_input_tokens_seen": 19482624, + "step": 6185 + }, + { + "epoch": 0.3962614429293899, + "grad_norm": 32.58120346069336, + "learning_rate": 1.5115667380841948e-06, + "loss": 0.5382, + "num_input_tokens_seen": 19498048, + "step": 6190 + }, + { + "epoch": 0.3965815248703668, + "grad_norm": 18.999216079711914, + "learning_rate": 1.510606327452349e-06, + "loss": 0.4413, + "num_input_tokens_seen": 19515264, + "step": 6195 + }, + { + "epoch": 0.3969016068113437, + "grad_norm": 35.836219787597656, + "learning_rate": 1.5096452793225082e-06, + "loss": 0.4267, + "num_input_tokens_seen": 19533056, + "step": 6200 + }, + { + "epoch": 0.3972216887523206, + "grad_norm": 26.90237808227539, + "learning_rate": 1.5086835948945522e-06, + "loss": 0.3994, + "num_input_tokens_seen": 19548480, + "step": 6205 + }, + { + "epoch": 0.3975417706932975, + "grad_norm": 30.118810653686523, + "learning_rate": 1.5077212753691556e-06, + "loss": 0.3462, + "num_input_tokens_seen": 19563712, + "step": 6210 + }, + { + "epoch": 0.39786185263427437, + "grad_norm": 36.46988296508789, + "learning_rate": 1.5067583219477852e-06, + "loss": 0.41, + "num_input_tokens_seen": 19578624, + "step": 6215 + }, + { + "epoch": 0.39818193457525125, + "grad_norm": 31.342973709106445, + "learning_rate": 1.5057947358327e-06, + "loss": 0.3926, + "num_input_tokens_seen": 19593408, + "step": 6220 + }, + { + "epoch": 0.39850201651622813, + "grad_norm": 37.588436126708984, + "learning_rate": 1.504830518226948e-06, + "loss": 0.5044, + "num_input_tokens_seen": 19609216, + "step": 6225 + }, + { + "epoch": 0.39882209845720507, + "grad_norm": 29.45639419555664, + "learning_rate": 1.5038656703343672e-06, + "loss": 0.4468, + "num_input_tokens_seen": 19624896, + "step": 6230 + }, + { + "epoch": 0.39914218039818194, + "grad_norm": 72.1549072265625, + "learning_rate": 1.5029001933595805e-06, + "loss": 0.5125, + "num_input_tokens_seen": 19640128, + "step": 6235 + }, + { + "epoch": 0.3994622623391588, + "grad_norm": 34.15262985229492, + "learning_rate": 1.501934088507998e-06, + "loss": 0.3482, + "num_input_tokens_seen": 19655680, + "step": 6240 + }, + { + "epoch": 0.3997823442801357, + "grad_norm": 35.860618591308594, + "learning_rate": 1.5009673569858126e-06, + "loss": 0.6246, + "num_input_tokens_seen": 19672192, + "step": 6245 + }, + { + "epoch": 0.4001024262211126, + "grad_norm": 46.368167877197266, + "learning_rate": 1.5e-06, + "loss": 0.534, + "num_input_tokens_seen": 19688896, + "step": 6250 + }, + { + "epoch": 0.4004225081620895, + "grad_norm": 19.773387908935547, + "learning_rate": 1.4990320187583167e-06, + "loss": 0.3556, + "num_input_tokens_seen": 19704128, + "step": 6255 + }, + { + "epoch": 0.4004865245502849, + "eval_loss": 0.4279458224773407, + "eval_runtime": 50.6211, + "eval_samples_per_second": 274.312, + "eval_steps_per_second": 34.294, + "num_input_tokens_seen": 19707456, + "step": 6256 + }, + { + "epoch": 0.4007425901030664, + "grad_norm": 32.513755798339844, + "learning_rate": 1.4980634144692986e-06, + "loss": 0.3913, + "num_input_tokens_seen": 19719744, + "step": 6260 + }, + { + "epoch": 0.4010626720440433, + "grad_norm": 47.36183547973633, + "learning_rate": 1.4970941883422599e-06, + "loss": 0.3734, + "num_input_tokens_seen": 19736128, + "step": 6265 + }, + { + "epoch": 0.40138275398502016, + "grad_norm": 26.743701934814453, + "learning_rate": 1.4961243415872901e-06, + "loss": 0.4286, + "num_input_tokens_seen": 19751296, + "step": 6270 + }, + { + "epoch": 0.40170283592599704, + "grad_norm": 62.38422775268555, + "learning_rate": 1.4951538754152551e-06, + "loss": 0.3958, + "num_input_tokens_seen": 19765888, + "step": 6275 + }, + { + "epoch": 0.402022917866974, + "grad_norm": 31.93796157836914, + "learning_rate": 1.4941827910377925e-06, + "loss": 0.4227, + "num_input_tokens_seen": 19780864, + "step": 6280 + }, + { + "epoch": 0.40234299980795085, + "grad_norm": 22.445552825927734, + "learning_rate": 1.4932110896673131e-06, + "loss": 0.3978, + "num_input_tokens_seen": 19796864, + "step": 6285 + }, + { + "epoch": 0.40266308174892773, + "grad_norm": 29.57168960571289, + "learning_rate": 1.4922387725169973e-06, + "loss": 0.5383, + "num_input_tokens_seen": 19811904, + "step": 6290 + }, + { + "epoch": 0.4029831636899046, + "grad_norm": 32.471187591552734, + "learning_rate": 1.4912658408007947e-06, + "loss": 0.418, + "num_input_tokens_seen": 19827456, + "step": 6295 + }, + { + "epoch": 0.4033032456308815, + "grad_norm": 33.78974914550781, + "learning_rate": 1.4902922957334215e-06, + "loss": 0.4194, + "num_input_tokens_seen": 19842496, + "step": 6300 + }, + { + "epoch": 0.40362332757185837, + "grad_norm": 46.43672561645508, + "learning_rate": 1.4893181385303608e-06, + "loss": 0.4186, + "num_input_tokens_seen": 19858240, + "step": 6305 + }, + { + "epoch": 0.4039434095128353, + "grad_norm": 34.83802032470703, + "learning_rate": 1.4883433704078584e-06, + "loss": 0.4262, + "num_input_tokens_seen": 19874368, + "step": 6310 + }, + { + "epoch": 0.4042634914538122, + "grad_norm": 34.20176315307617, + "learning_rate": 1.4873679925829246e-06, + "loss": 0.3986, + "num_input_tokens_seen": 19891904, + "step": 6315 + }, + { + "epoch": 0.40458357339478906, + "grad_norm": 21.880064010620117, + "learning_rate": 1.4863920062733298e-06, + "loss": 0.4157, + "num_input_tokens_seen": 19907392, + "step": 6320 + }, + { + "epoch": 0.40490365533576594, + "grad_norm": 48.874141693115234, + "learning_rate": 1.485415412697604e-06, + "loss": 0.3822, + "num_input_tokens_seen": 19922624, + "step": 6325 + }, + { + "epoch": 0.4052237372767428, + "grad_norm": 34.258758544921875, + "learning_rate": 1.484438213075036e-06, + "loss": 0.4286, + "num_input_tokens_seen": 19939328, + "step": 6330 + }, + { + "epoch": 0.40554381921771976, + "grad_norm": 43.55635452270508, + "learning_rate": 1.4834604086256713e-06, + "loss": 0.4412, + "num_input_tokens_seen": 19955392, + "step": 6335 + }, + { + "epoch": 0.40586390115869664, + "grad_norm": 39.21355438232422, + "learning_rate": 1.4824820005703097e-06, + "loss": 0.401, + "num_input_tokens_seen": 19971520, + "step": 6340 + }, + { + "epoch": 0.4061839830996735, + "grad_norm": 24.532764434814453, + "learning_rate": 1.4815029901305061e-06, + "loss": 0.448, + "num_input_tokens_seen": 19988352, + "step": 6345 + }, + { + "epoch": 0.4065040650406504, + "grad_norm": 28.755565643310547, + "learning_rate": 1.480523378528565e-06, + "loss": 0.4706, + "num_input_tokens_seen": 20005184, + "step": 6350 + }, + { + "epoch": 0.4068241469816273, + "grad_norm": 43.57781982421875, + "learning_rate": 1.4795431669875441e-06, + "loss": 0.4379, + "num_input_tokens_seen": 20020800, + "step": 6355 + }, + { + "epoch": 0.4071442289226042, + "grad_norm": 30.764387130737305, + "learning_rate": 1.478562356731249e-06, + "loss": 0.475, + "num_input_tokens_seen": 20036416, + "step": 6360 + }, + { + "epoch": 0.4074643108635811, + "grad_norm": 42.592384338378906, + "learning_rate": 1.4775809489842326e-06, + "loss": 0.4608, + "num_input_tokens_seen": 20053184, + "step": 6365 + }, + { + "epoch": 0.40778439280455797, + "grad_norm": 28.14908790588379, + "learning_rate": 1.4765989449717937e-06, + "loss": 0.3944, + "num_input_tokens_seen": 20069888, + "step": 6370 + }, + { + "epoch": 0.40810447474553485, + "grad_norm": 62.522220611572266, + "learning_rate": 1.4756163459199763e-06, + "loss": 0.534, + "num_input_tokens_seen": 20085760, + "step": 6375 + }, + { + "epoch": 0.40842455668651173, + "grad_norm": 34.16120910644531, + "learning_rate": 1.4746331530555665e-06, + "loss": 0.2694, + "num_input_tokens_seen": 20101056, + "step": 6380 + }, + { + "epoch": 0.4087446386274886, + "grad_norm": 39.74694061279297, + "learning_rate": 1.4736493676060923e-06, + "loss": 0.4114, + "num_input_tokens_seen": 20116352, + "step": 6385 + }, + { + "epoch": 0.40906472056846555, + "grad_norm": 21.04083824157715, + "learning_rate": 1.4726649907998216e-06, + "loss": 0.3752, + "num_input_tokens_seen": 20131712, + "step": 6390 + }, + { + "epoch": 0.4093848025094424, + "grad_norm": 33.7485466003418, + "learning_rate": 1.4716800238657599e-06, + "loss": 0.3816, + "num_input_tokens_seen": 20146880, + "step": 6395 + }, + { + "epoch": 0.4097048844504193, + "grad_norm": 19.112497329711914, + "learning_rate": 1.4706944680336505e-06, + "loss": 0.285, + "num_input_tokens_seen": 20163520, + "step": 6400 + }, + { + "epoch": 0.4100249663913962, + "grad_norm": 43.30415344238281, + "learning_rate": 1.469708324533971e-06, + "loss": 0.4656, + "num_input_tokens_seen": 20177984, + "step": 6405 + }, + { + "epoch": 0.41034504833237306, + "grad_norm": 18.54940414428711, + "learning_rate": 1.4687215945979335e-06, + "loss": 0.3425, + "num_input_tokens_seen": 20193472, + "step": 6410 + }, + { + "epoch": 0.41066513027335, + "grad_norm": 41.684871673583984, + "learning_rate": 1.4677342794574815e-06, + "loss": 0.4557, + "num_input_tokens_seen": 20210624, + "step": 6415 + }, + { + "epoch": 0.4109852122143269, + "grad_norm": 59.87638854980469, + "learning_rate": 1.4667463803452902e-06, + "loss": 0.4171, + "num_input_tokens_seen": 20226688, + "step": 6420 + }, + { + "epoch": 0.41130529415530376, + "grad_norm": 43.94029998779297, + "learning_rate": 1.4657578984947627e-06, + "loss": 0.4553, + "num_input_tokens_seen": 20244608, + "step": 6425 + }, + { + "epoch": 0.41162537609628064, + "grad_norm": 34.1412353515625, + "learning_rate": 1.4647688351400303e-06, + "loss": 0.3597, + "num_input_tokens_seen": 20261184, + "step": 6430 + }, + { + "epoch": 0.4119454580372575, + "grad_norm": 22.3857421875, + "learning_rate": 1.46377919151595e-06, + "loss": 0.3288, + "num_input_tokens_seen": 20276736, + "step": 6435 + }, + { + "epoch": 0.41226553997823445, + "grad_norm": 33.48893737792969, + "learning_rate": 1.462788968858104e-06, + "loss": 0.47, + "num_input_tokens_seen": 20293888, + "step": 6440 + }, + { + "epoch": 0.41258562191921133, + "grad_norm": 20.228092193603516, + "learning_rate": 1.4617981684027966e-06, + "loss": 0.4858, + "num_input_tokens_seen": 20309696, + "step": 6445 + }, + { + "epoch": 0.4129057038601882, + "grad_norm": 19.324357986450195, + "learning_rate": 1.4608067913870536e-06, + "loss": 0.3958, + "num_input_tokens_seen": 20325632, + "step": 6450 + }, + { + "epoch": 0.4132257858011651, + "grad_norm": 24.772836685180664, + "learning_rate": 1.4598148390486213e-06, + "loss": 0.3994, + "num_input_tokens_seen": 20341888, + "step": 6455 + }, + { + "epoch": 0.41354586774214197, + "grad_norm": 30.82358741760254, + "learning_rate": 1.4588223126259639e-06, + "loss": 0.5083, + "num_input_tokens_seen": 20358656, + "step": 6460 + }, + { + "epoch": 0.4138659496831189, + "grad_norm": 15.612091064453125, + "learning_rate": 1.4578292133582615e-06, + "loss": 0.3307, + "num_input_tokens_seen": 20372864, + "step": 6465 + }, + { + "epoch": 0.4141860316240958, + "grad_norm": 28.77939224243164, + "learning_rate": 1.456835542485411e-06, + "loss": 0.3876, + "num_input_tokens_seen": 20387840, + "step": 6470 + }, + { + "epoch": 0.41450611356507266, + "grad_norm": 34.10711669921875, + "learning_rate": 1.4558413012480215e-06, + "loss": 0.4136, + "num_input_tokens_seen": 20404736, + "step": 6475 + }, + { + "epoch": 0.41482619550604954, + "grad_norm": 39.10059356689453, + "learning_rate": 1.4548464908874156e-06, + "loss": 0.5707, + "num_input_tokens_seen": 20422848, + "step": 6480 + }, + { + "epoch": 0.4151462774470264, + "grad_norm": 31.53403091430664, + "learning_rate": 1.4538511126456255e-06, + "loss": 0.4077, + "num_input_tokens_seen": 20438016, + "step": 6485 + }, + { + "epoch": 0.4154663593880033, + "grad_norm": 54.74279022216797, + "learning_rate": 1.452855167765392e-06, + "loss": 0.5888, + "num_input_tokens_seen": 20454464, + "step": 6490 + }, + { + "epoch": 0.41578644132898024, + "grad_norm": 25.2100887298584, + "learning_rate": 1.4518586574901647e-06, + "loss": 0.4553, + "num_input_tokens_seen": 20470464, + "step": 6495 + }, + { + "epoch": 0.4161065232699571, + "grad_norm": 33.02887725830078, + "learning_rate": 1.450861583064098e-06, + "loss": 0.4639, + "num_input_tokens_seen": 20485696, + "step": 6500 + }, + { + "epoch": 0.416426605210934, + "grad_norm": 24.64836883544922, + "learning_rate": 1.4498639457320515e-06, + "loss": 0.352, + "num_input_tokens_seen": 20500608, + "step": 6505 + }, + { + "epoch": 0.4167466871519109, + "grad_norm": 35.120365142822266, + "learning_rate": 1.4488657467395865e-06, + "loss": 0.4715, + "num_input_tokens_seen": 20515776, + "step": 6510 + }, + { + "epoch": 0.41706676909288776, + "grad_norm": 41.49770736694336, + "learning_rate": 1.4478669873329663e-06, + "loss": 0.5086, + "num_input_tokens_seen": 20531456, + "step": 6515 + }, + { + "epoch": 0.4173868510338647, + "grad_norm": 32.16014099121094, + "learning_rate": 1.4468676687591536e-06, + "loss": 0.3953, + "num_input_tokens_seen": 20547200, + "step": 6520 + }, + { + "epoch": 0.41770693297484157, + "grad_norm": 29.224876403808594, + "learning_rate": 1.4458677922658104e-06, + "loss": 0.4326, + "num_input_tokens_seen": 20562560, + "step": 6525 + }, + { + "epoch": 0.41802701491581845, + "grad_norm": 18.81861686706543, + "learning_rate": 1.444867359101293e-06, + "loss": 0.2884, + "num_input_tokens_seen": 20577344, + "step": 6530 + }, + { + "epoch": 0.41834709685679533, + "grad_norm": 35.77077865600586, + "learning_rate": 1.4438663705146545e-06, + "loss": 0.3541, + "num_input_tokens_seen": 20593088, + "step": 6535 + }, + { + "epoch": 0.4186671787977722, + "grad_norm": 28.87957191467285, + "learning_rate": 1.442864827755641e-06, + "loss": 0.3645, + "num_input_tokens_seen": 20609792, + "step": 6540 + }, + { + "epoch": 0.41898726073874915, + "grad_norm": 20.035581588745117, + "learning_rate": 1.4418627320746901e-06, + "loss": 0.4507, + "num_input_tokens_seen": 20625280, + "step": 6545 + }, + { + "epoch": 0.419307342679726, + "grad_norm": 32.727542877197266, + "learning_rate": 1.4408600847229304e-06, + "loss": 0.3912, + "num_input_tokens_seen": 20641984, + "step": 6550 + }, + { + "epoch": 0.4196274246207029, + "grad_norm": 36.26650619506836, + "learning_rate": 1.4398568869521782e-06, + "loss": 0.5483, + "num_input_tokens_seen": 20658240, + "step": 6555 + }, + { + "epoch": 0.4199475065616798, + "grad_norm": 32.617122650146484, + "learning_rate": 1.4388531400149384e-06, + "loss": 0.3603, + "num_input_tokens_seen": 20673408, + "step": 6560 + }, + { + "epoch": 0.42026758850265666, + "grad_norm": 44.918235778808594, + "learning_rate": 1.4378488451644007e-06, + "loss": 0.3865, + "num_input_tokens_seen": 20688960, + "step": 6565 + }, + { + "epoch": 0.42058767044363354, + "grad_norm": 27.99346351623535, + "learning_rate": 1.4368440036544386e-06, + "loss": 0.4216, + "num_input_tokens_seen": 20704768, + "step": 6570 + }, + { + "epoch": 0.4209077523846105, + "grad_norm": 41.81321716308594, + "learning_rate": 1.435838616739609e-06, + "loss": 0.4157, + "num_input_tokens_seen": 20719808, + "step": 6575 + }, + { + "epoch": 0.42122783432558736, + "grad_norm": 35.78312683105469, + "learning_rate": 1.4348326856751493e-06, + "loss": 0.5319, + "num_input_tokens_seen": 20735680, + "step": 6580 + }, + { + "epoch": 0.42154791626656424, + "grad_norm": 27.771835327148438, + "learning_rate": 1.433826211716976e-06, + "loss": 0.3379, + "num_input_tokens_seen": 20750144, + "step": 6585 + }, + { + "epoch": 0.4218679982075411, + "grad_norm": 30.699609756469727, + "learning_rate": 1.4328191961216835e-06, + "loss": 0.3988, + "num_input_tokens_seen": 20766016, + "step": 6590 + }, + { + "epoch": 0.422188080148518, + "grad_norm": 52.447792053222656, + "learning_rate": 1.4318116401465427e-06, + "loss": 0.4818, + "num_input_tokens_seen": 20782720, + "step": 6595 + }, + { + "epoch": 0.42250816208949493, + "grad_norm": 29.143312454223633, + "learning_rate": 1.430803545049499e-06, + "loss": 0.3925, + "num_input_tokens_seen": 20798208, + "step": 6600 + }, + { + "epoch": 0.4228282440304718, + "grad_norm": 18.094640731811523, + "learning_rate": 1.4297949120891716e-06, + "loss": 0.5891, + "num_input_tokens_seen": 20813056, + "step": 6605 + }, + { + "epoch": 0.4231483259714487, + "grad_norm": 37.29645919799805, + "learning_rate": 1.4287857425248497e-06, + "loss": 0.4266, + "num_input_tokens_seen": 20828800, + "step": 6610 + }, + { + "epoch": 0.42346840791242557, + "grad_norm": 24.383594512939453, + "learning_rate": 1.427776037616494e-06, + "loss": 0.4956, + "num_input_tokens_seen": 20844736, + "step": 6615 + }, + { + "epoch": 0.42378848985340245, + "grad_norm": 32.46372604370117, + "learning_rate": 1.4267657986247326e-06, + "loss": 0.3504, + "num_input_tokens_seen": 20860672, + "step": 6620 + }, + { + "epoch": 0.4241085717943794, + "grad_norm": 35.29887771606445, + "learning_rate": 1.425755026810861e-06, + "loss": 0.3666, + "num_input_tokens_seen": 20877184, + "step": 6625 + }, + { + "epoch": 0.42442865373535626, + "grad_norm": 58.47334671020508, + "learning_rate": 1.4247437234368394e-06, + "loss": 0.3965, + "num_input_tokens_seen": 20894208, + "step": 6630 + }, + { + "epoch": 0.42474873567633314, + "grad_norm": 38.44792175292969, + "learning_rate": 1.423731889765292e-06, + "loss": 0.407, + "num_input_tokens_seen": 20909696, + "step": 6635 + }, + { + "epoch": 0.42506881761731, + "grad_norm": 16.839555740356445, + "learning_rate": 1.422719527059505e-06, + "loss": 0.3465, + "num_input_tokens_seen": 20926016, + "step": 6640 + }, + { + "epoch": 0.4253888995582869, + "grad_norm": 21.848594665527344, + "learning_rate": 1.4217066365834253e-06, + "loss": 0.362, + "num_input_tokens_seen": 20941440, + "step": 6645 + }, + { + "epoch": 0.42570898149926384, + "grad_norm": 35.94594955444336, + "learning_rate": 1.4206932196016586e-06, + "loss": 0.4566, + "num_input_tokens_seen": 20956352, + "step": 6650 + }, + { + "epoch": 0.4260290634402407, + "grad_norm": 51.25725173950195, + "learning_rate": 1.4196792773794672e-06, + "loss": 0.3947, + "num_input_tokens_seen": 20973056, + "step": 6655 + }, + { + "epoch": 0.4263491453812176, + "grad_norm": 39.09233474731445, + "learning_rate": 1.418664811182771e-06, + "loss": 0.4406, + "num_input_tokens_seen": 20989248, + "step": 6660 + }, + { + "epoch": 0.4266692273221945, + "grad_norm": 40.44568634033203, + "learning_rate": 1.417649822278142e-06, + "loss": 0.4946, + "num_input_tokens_seen": 21004096, + "step": 6665 + }, + { + "epoch": 0.42698930926317136, + "grad_norm": 25.228567123413086, + "learning_rate": 1.4166343119328064e-06, + "loss": 0.489, + "num_input_tokens_seen": 21020224, + "step": 6670 + }, + { + "epoch": 0.42730939120414824, + "grad_norm": 30.947425842285156, + "learning_rate": 1.4156182814146404e-06, + "loss": 0.466, + "num_input_tokens_seen": 21035264, + "step": 6675 + }, + { + "epoch": 0.42762947314512517, + "grad_norm": 19.565750122070312, + "learning_rate": 1.4146017319921701e-06, + "loss": 0.354, + "num_input_tokens_seen": 21051904, + "step": 6680 + }, + { + "epoch": 0.42794955508610205, + "grad_norm": 30.911516189575195, + "learning_rate": 1.4135846649345695e-06, + "loss": 0.4117, + "num_input_tokens_seen": 21069504, + "step": 6685 + }, + { + "epoch": 0.42826963702707893, + "grad_norm": 30.636323928833008, + "learning_rate": 1.4125670815116589e-06, + "loss": 0.4259, + "num_input_tokens_seen": 21084288, + "step": 6690 + }, + { + "epoch": 0.4285897189680558, + "grad_norm": 26.054248809814453, + "learning_rate": 1.4115489829939025e-06, + "loss": 0.2933, + "num_input_tokens_seen": 21100544, + "step": 6695 + }, + { + "epoch": 0.4289098009090327, + "grad_norm": 29.667612075805664, + "learning_rate": 1.4105303706524093e-06, + "loss": 0.4315, + "num_input_tokens_seen": 21116608, + "step": 6700 + }, + { + "epoch": 0.4292298828500096, + "grad_norm": 49.060630798339844, + "learning_rate": 1.4095112457589276e-06, + "loss": 0.6147, + "num_input_tokens_seen": 21131776, + "step": 6705 + }, + { + "epoch": 0.4295499647909865, + "grad_norm": 31.449359893798828, + "learning_rate": 1.4084916095858477e-06, + "loss": 0.4185, + "num_input_tokens_seen": 21146368, + "step": 6710 + }, + { + "epoch": 0.4298700467319634, + "grad_norm": 75.84903717041016, + "learning_rate": 1.407471463406197e-06, + "loss": 0.509, + "num_input_tokens_seen": 21162368, + "step": 6715 + }, + { + "epoch": 0.43019012867294026, + "grad_norm": 31.45880699157715, + "learning_rate": 1.4064508084936399e-06, + "loss": 0.4404, + "num_input_tokens_seen": 21179008, + "step": 6720 + }, + { + "epoch": 0.43051021061391714, + "grad_norm": 33.05830383300781, + "learning_rate": 1.405429646122476e-06, + "loss": 0.569, + "num_input_tokens_seen": 21196160, + "step": 6725 + }, + { + "epoch": 0.4308302925548941, + "grad_norm": 21.873218536376953, + "learning_rate": 1.4044079775676392e-06, + "loss": 0.5342, + "num_input_tokens_seen": 21212032, + "step": 6730 + }, + { + "epoch": 0.43115037449587096, + "grad_norm": 21.718421936035156, + "learning_rate": 1.4033858041046936e-06, + "loss": 0.3587, + "num_input_tokens_seen": 21230272, + "step": 6735 + }, + { + "epoch": 0.43147045643684784, + "grad_norm": 25.18842124938965, + "learning_rate": 1.4023631270098352e-06, + "loss": 0.3928, + "num_input_tokens_seen": 21245760, + "step": 6740 + }, + { + "epoch": 0.4317905383778247, + "grad_norm": 29.12677764892578, + "learning_rate": 1.4013399475598888e-06, + "loss": 0.3446, + "num_input_tokens_seen": 21260992, + "step": 6745 + }, + { + "epoch": 0.4321106203188016, + "grad_norm": 22.8419189453125, + "learning_rate": 1.4003162670323056e-06, + "loss": 0.2819, + "num_input_tokens_seen": 21275136, + "step": 6750 + }, + { + "epoch": 0.4324307022597785, + "grad_norm": 73.31881713867188, + "learning_rate": 1.3992920867051627e-06, + "loss": 0.5416, + "num_input_tokens_seen": 21290560, + "step": 6755 + }, + { + "epoch": 0.4327507842007554, + "grad_norm": 42.87895202636719, + "learning_rate": 1.3982674078571614e-06, + "loss": 0.3552, + "num_input_tokens_seen": 21305536, + "step": 6760 + }, + { + "epoch": 0.4330708661417323, + "grad_norm": 27.110273361206055, + "learning_rate": 1.3972422317676252e-06, + "loss": 0.3758, + "num_input_tokens_seen": 21320576, + "step": 6765 + }, + { + "epoch": 0.43339094808270917, + "grad_norm": 18.68414306640625, + "learning_rate": 1.3962165597164985e-06, + "loss": 0.3698, + "num_input_tokens_seen": 21335680, + "step": 6770 + }, + { + "epoch": 0.43371103002368605, + "grad_norm": 28.053197860717773, + "learning_rate": 1.395190392984345e-06, + "loss": 0.3519, + "num_input_tokens_seen": 21351808, + "step": 6775 + }, + { + "epoch": 0.43403111196466293, + "grad_norm": 24.955060958862305, + "learning_rate": 1.3941637328523452e-06, + "loss": 0.4522, + "num_input_tokens_seen": 21366464, + "step": 6780 + }, + { + "epoch": 0.43435119390563987, + "grad_norm": 38.06902313232422, + "learning_rate": 1.3931365806022978e-06, + "loss": 0.3038, + "num_input_tokens_seen": 21383296, + "step": 6785 + }, + { + "epoch": 0.43467127584661674, + "grad_norm": 37.151485443115234, + "learning_rate": 1.3921089375166131e-06, + "loss": 0.3111, + "num_input_tokens_seen": 21399616, + "step": 6790 + }, + { + "epoch": 0.4349913577875936, + "grad_norm": 21.17021942138672, + "learning_rate": 1.391080804878316e-06, + "loss": 0.4455, + "num_input_tokens_seen": 21414848, + "step": 6795 + }, + { + "epoch": 0.4353114397285705, + "grad_norm": 60.63893508911133, + "learning_rate": 1.3900521839710427e-06, + "loss": 0.3804, + "num_input_tokens_seen": 21430144, + "step": 6800 + }, + { + "epoch": 0.4356315216695474, + "grad_norm": 24.321428298950195, + "learning_rate": 1.3890230760790373e-06, + "loss": 0.3503, + "num_input_tokens_seen": 21445248, + "step": 6805 + }, + { + "epoch": 0.4359516036105243, + "grad_norm": 102.32804870605469, + "learning_rate": 1.3879934824871544e-06, + "loss": 0.598, + "num_input_tokens_seen": 21460544, + "step": 6810 + }, + { + "epoch": 0.4362716855515012, + "grad_norm": 30.45060920715332, + "learning_rate": 1.3869634044808526e-06, + "loss": 0.5102, + "num_input_tokens_seen": 21476224, + "step": 6815 + }, + { + "epoch": 0.4365917674924781, + "grad_norm": 40.25202560424805, + "learning_rate": 1.3859328433461971e-06, + "loss": 0.6093, + "num_input_tokens_seen": 21491712, + "step": 6820 + }, + { + "epoch": 0.43691184943345496, + "grad_norm": 65.34822082519531, + "learning_rate": 1.3849018003698553e-06, + "loss": 0.5794, + "num_input_tokens_seen": 21508928, + "step": 6825 + }, + { + "epoch": 0.43723193137443184, + "grad_norm": 38.05977249145508, + "learning_rate": 1.3838702768390964e-06, + "loss": 0.3975, + "num_input_tokens_seen": 21523648, + "step": 6830 + }, + { + "epoch": 0.43755201331540877, + "grad_norm": 32.886993408203125, + "learning_rate": 1.38283827404179e-06, + "loss": 0.474, + "num_input_tokens_seen": 21539264, + "step": 6835 + }, + { + "epoch": 0.43787209525638565, + "grad_norm": 48.08152389526367, + "learning_rate": 1.381805793266403e-06, + "loss": 0.3763, + "num_input_tokens_seen": 21555520, + "step": 6840 + }, + { + "epoch": 0.43819217719736253, + "grad_norm": 34.62569808959961, + "learning_rate": 1.3807728358020009e-06, + "loss": 0.4524, + "num_input_tokens_seen": 21570112, + "step": 6845 + }, + { + "epoch": 0.4385122591383394, + "grad_norm": 51.81264114379883, + "learning_rate": 1.3797394029382416e-06, + "loss": 0.3372, + "num_input_tokens_seen": 21584768, + "step": 6850 + }, + { + "epoch": 0.4388323410793163, + "grad_norm": 23.349763870239258, + "learning_rate": 1.37870549596538e-06, + "loss": 0.3008, + "num_input_tokens_seen": 21599872, + "step": 6855 + }, + { + "epoch": 0.43915242302029317, + "grad_norm": 21.19732093811035, + "learning_rate": 1.3776711161742595e-06, + "loss": 0.5217, + "num_input_tokens_seen": 21615808, + "step": 6860 + }, + { + "epoch": 0.4394725049612701, + "grad_norm": 28.972196578979492, + "learning_rate": 1.3766362648563166e-06, + "loss": 0.4772, + "num_input_tokens_seen": 21630656, + "step": 6865 + }, + { + "epoch": 0.439792586902247, + "grad_norm": 65.14180755615234, + "learning_rate": 1.3756009433035744e-06, + "loss": 0.4123, + "num_input_tokens_seen": 21646976, + "step": 6870 + }, + { + "epoch": 0.44011266884322386, + "grad_norm": 28.106571197509766, + "learning_rate": 1.3745651528086447e-06, + "loss": 0.5783, + "num_input_tokens_seen": 21665024, + "step": 6875 + }, + { + "epoch": 0.44043275078420074, + "grad_norm": 16.411785125732422, + "learning_rate": 1.373528894664724e-06, + "loss": 0.4489, + "num_input_tokens_seen": 21680128, + "step": 6880 + }, + { + "epoch": 0.4407528327251776, + "grad_norm": 26.3304386138916, + "learning_rate": 1.3724921701655924e-06, + "loss": 0.3466, + "num_input_tokens_seen": 21695808, + "step": 6885 + }, + { + "epoch": 0.44107291466615456, + "grad_norm": 15.10905647277832, + "learning_rate": 1.3714549806056125e-06, + "loss": 0.3186, + "num_input_tokens_seen": 21711936, + "step": 6890 + }, + { + "epoch": 0.44139299660713144, + "grad_norm": 45.09797668457031, + "learning_rate": 1.3704173272797283e-06, + "loss": 0.4162, + "num_input_tokens_seen": 21727488, + "step": 6895 + }, + { + "epoch": 0.4417130785481083, + "grad_norm": 38.701011657714844, + "learning_rate": 1.3693792114834619e-06, + "loss": 0.4556, + "num_input_tokens_seen": 21745280, + "step": 6900 + }, + { + "epoch": 0.4420331604890852, + "grad_norm": 26.589208602905273, + "learning_rate": 1.3683406345129129e-06, + "loss": 0.467, + "num_input_tokens_seen": 21760000, + "step": 6905 + }, + { + "epoch": 0.4423532424300621, + "grad_norm": 25.628881454467773, + "learning_rate": 1.3673015976647567e-06, + "loss": 0.3971, + "num_input_tokens_seen": 21775232, + "step": 6910 + }, + { + "epoch": 0.442673324371039, + "grad_norm": 37.34769821166992, + "learning_rate": 1.3662621022362435e-06, + "loss": 0.3979, + "num_input_tokens_seen": 21790656, + "step": 6915 + }, + { + "epoch": 0.4429934063120159, + "grad_norm": 52.09904479980469, + "learning_rate": 1.3652221495251952e-06, + "loss": 0.462, + "num_input_tokens_seen": 21806336, + "step": 6920 + }, + { + "epoch": 0.44331348825299277, + "grad_norm": 27.45566749572754, + "learning_rate": 1.3641817408300049e-06, + "loss": 0.3242, + "num_input_tokens_seen": 21823744, + "step": 6925 + }, + { + "epoch": 0.44363357019396965, + "grad_norm": 31.710731506347656, + "learning_rate": 1.3631408774496352e-06, + "loss": 0.559, + "num_input_tokens_seen": 21839104, + "step": 6930 + }, + { + "epoch": 0.44395365213494653, + "grad_norm": 28.635961532592773, + "learning_rate": 1.3620995606836165e-06, + "loss": 0.3616, + "num_input_tokens_seen": 21854528, + "step": 6935 + }, + { + "epoch": 0.4442737340759234, + "grad_norm": 58.52728271484375, + "learning_rate": 1.3610577918320446e-06, + "loss": 0.6013, + "num_input_tokens_seen": 21870592, + "step": 6940 + }, + { + "epoch": 0.44459381601690035, + "grad_norm": 47.7674674987793, + "learning_rate": 1.3600155721955802e-06, + "loss": 0.3823, + "num_input_tokens_seen": 21885696, + "step": 6945 + }, + { + "epoch": 0.4449138979578772, + "grad_norm": 25.71700096130371, + "learning_rate": 1.3589729030754468e-06, + "loss": 0.4017, + "num_input_tokens_seen": 21901248, + "step": 6950 + }, + { + "epoch": 0.4452339798988541, + "grad_norm": 30.66419219970703, + "learning_rate": 1.3579297857734293e-06, + "loss": 0.4293, + "num_input_tokens_seen": 21916352, + "step": 6955 + }, + { + "epoch": 0.445554061839831, + "grad_norm": 18.087440490722656, + "learning_rate": 1.3568862215918717e-06, + "loss": 0.3354, + "num_input_tokens_seen": 21931072, + "step": 6960 + }, + { + "epoch": 0.44587414378080786, + "grad_norm": 33.805599212646484, + "learning_rate": 1.3558422118336762e-06, + "loss": 0.5014, + "num_input_tokens_seen": 21946752, + "step": 6965 + }, + { + "epoch": 0.4461942257217848, + "grad_norm": 39.23421096801758, + "learning_rate": 1.354797757802301e-06, + "loss": 0.4669, + "num_input_tokens_seen": 21962176, + "step": 6970 + }, + { + "epoch": 0.4465143076627617, + "grad_norm": 18.45398712158203, + "learning_rate": 1.3537528608017596e-06, + "loss": 0.3986, + "num_input_tokens_seen": 21978496, + "step": 6975 + }, + { + "epoch": 0.44683438960373856, + "grad_norm": 24.98866844177246, + "learning_rate": 1.352707522136618e-06, + "loss": 0.3989, + "num_input_tokens_seen": 21992576, + "step": 6980 + }, + { + "epoch": 0.44715447154471544, + "grad_norm": 18.77157211303711, + "learning_rate": 1.3516617431119934e-06, + "loss": 0.3987, + "num_input_tokens_seen": 22008000, + "step": 6985 + }, + { + "epoch": 0.4474745534856923, + "grad_norm": 35.73682403564453, + "learning_rate": 1.350615525033554e-06, + "loss": 0.5453, + "num_input_tokens_seen": 22022976, + "step": 6990 + }, + { + "epoch": 0.44779463542666925, + "grad_norm": 28.581218719482422, + "learning_rate": 1.3495688692075144e-06, + "loss": 0.4055, + "num_input_tokens_seen": 22038144, + "step": 6995 + }, + { + "epoch": 0.44811471736764613, + "grad_norm": 32.27814483642578, + "learning_rate": 1.3485217769406376e-06, + "loss": 0.35, + "num_input_tokens_seen": 22054016, + "step": 7000 + }, + { + "epoch": 0.448434799308623, + "grad_norm": 24.271724700927734, + "learning_rate": 1.3474742495402303e-06, + "loss": 0.3627, + "num_input_tokens_seen": 22073920, + "step": 7005 + }, + { + "epoch": 0.4487548812495999, + "grad_norm": 53.558128356933594, + "learning_rate": 1.3464262883141425e-06, + "loss": 0.4295, + "num_input_tokens_seen": 22089728, + "step": 7010 + }, + { + "epoch": 0.44907496319057677, + "grad_norm": 36.53765869140625, + "learning_rate": 1.3453778945707663e-06, + "loss": 0.5883, + "num_input_tokens_seen": 22105344, + "step": 7015 + }, + { + "epoch": 0.4493950451315537, + "grad_norm": 54.6480712890625, + "learning_rate": 1.3443290696190332e-06, + "loss": 0.4596, + "num_input_tokens_seen": 22121792, + "step": 7020 + }, + { + "epoch": 0.4497151270725306, + "grad_norm": 24.6829776763916, + "learning_rate": 1.343279814768414e-06, + "loss": 0.4175, + "num_input_tokens_seen": 22136128, + "step": 7025 + }, + { + "epoch": 0.45003520901350746, + "grad_norm": 23.87900733947754, + "learning_rate": 1.3422301313289156e-06, + "loss": 0.3849, + "num_input_tokens_seen": 22151936, + "step": 7030 + }, + { + "epoch": 0.45035529095448434, + "grad_norm": 21.608688354492188, + "learning_rate": 1.34118002061108e-06, + "loss": 0.3775, + "num_input_tokens_seen": 22168128, + "step": 7035 + }, + { + "epoch": 0.4505473401190705, + "eval_loss": 0.43633610010147095, + "eval_runtime": 50.6412, + "eval_samples_per_second": 274.203, + "eval_steps_per_second": 34.28, + "num_input_tokens_seen": 22178432, + "step": 7038 + }, + { + "epoch": 0.4506753728954612, + "grad_norm": 43.05158615112305, + "learning_rate": 1.3401294839259828e-06, + "loss": 0.4432, + "num_input_tokens_seen": 22184512, + "step": 7040 + }, + { + "epoch": 0.4509954548364381, + "grad_norm": 34.34581756591797, + "learning_rate": 1.3390785225852312e-06, + "loss": 0.5428, + "num_input_tokens_seen": 22199872, + "step": 7045 + }, + { + "epoch": 0.45131553677741504, + "grad_norm": 20.483835220336914, + "learning_rate": 1.3380271379009631e-06, + "loss": 0.444, + "num_input_tokens_seen": 22216960, + "step": 7050 + }, + { + "epoch": 0.4516356187183919, + "grad_norm": 20.620986938476562, + "learning_rate": 1.3369753311858442e-06, + "loss": 0.2645, + "num_input_tokens_seen": 22231488, + "step": 7055 + }, + { + "epoch": 0.4519557006593688, + "grad_norm": 27.00153350830078, + "learning_rate": 1.3359231037530682e-06, + "loss": 0.4597, + "num_input_tokens_seen": 22246976, + "step": 7060 + }, + { + "epoch": 0.4522757826003457, + "grad_norm": 17.260738372802734, + "learning_rate": 1.3348704569163527e-06, + "loss": 0.4178, + "num_input_tokens_seen": 22263680, + "step": 7065 + }, + { + "epoch": 0.45259586454132256, + "grad_norm": 19.05507469177246, + "learning_rate": 1.33381739198994e-06, + "loss": 0.3371, + "num_input_tokens_seen": 22279552, + "step": 7070 + }, + { + "epoch": 0.4529159464822995, + "grad_norm": 20.88991355895996, + "learning_rate": 1.3327639102885938e-06, + "loss": 0.4463, + "num_input_tokens_seen": 22295296, + "step": 7075 + }, + { + "epoch": 0.45323602842327637, + "grad_norm": 36.431251525878906, + "learning_rate": 1.3317100131275986e-06, + "loss": 0.3979, + "num_input_tokens_seen": 22310400, + "step": 7080 + }, + { + "epoch": 0.45355611036425325, + "grad_norm": 60.086669921875, + "learning_rate": 1.3306557018227576e-06, + "loss": 0.4852, + "num_input_tokens_seen": 22326848, + "step": 7085 + }, + { + "epoch": 0.45387619230523013, + "grad_norm": 33.853111267089844, + "learning_rate": 1.3296009776903903e-06, + "loss": 0.4673, + "num_input_tokens_seen": 22342592, + "step": 7090 + }, + { + "epoch": 0.454196274246207, + "grad_norm": 29.175647735595703, + "learning_rate": 1.3285458420473323e-06, + "loss": 0.4693, + "num_input_tokens_seen": 22358912, + "step": 7095 + }, + { + "epoch": 0.45451635618718395, + "grad_norm": 30.104026794433594, + "learning_rate": 1.3274902962109332e-06, + "loss": 0.3789, + "num_input_tokens_seen": 22374528, + "step": 7100 + }, + { + "epoch": 0.4548364381281608, + "grad_norm": 20.507343292236328, + "learning_rate": 1.3264343414990539e-06, + "loss": 0.3752, + "num_input_tokens_seen": 22389824, + "step": 7105 + }, + { + "epoch": 0.4551565200691377, + "grad_norm": 32.275794982910156, + "learning_rate": 1.3253779792300663e-06, + "loss": 0.4269, + "num_input_tokens_seen": 22405376, + "step": 7110 + }, + { + "epoch": 0.4554766020101146, + "grad_norm": 17.600027084350586, + "learning_rate": 1.3243212107228518e-06, + "loss": 0.3442, + "num_input_tokens_seen": 22420032, + "step": 7115 + }, + { + "epoch": 0.45579668395109146, + "grad_norm": 15.571036338806152, + "learning_rate": 1.3232640372967974e-06, + "loss": 0.393, + "num_input_tokens_seen": 22434688, + "step": 7120 + }, + { + "epoch": 0.45611676589206834, + "grad_norm": 51.120750427246094, + "learning_rate": 1.3222064602717974e-06, + "loss": 0.4691, + "num_input_tokens_seen": 22451072, + "step": 7125 + }, + { + "epoch": 0.4564368478330453, + "grad_norm": 31.28764533996582, + "learning_rate": 1.321148480968248e-06, + "loss": 0.3578, + "num_input_tokens_seen": 22466688, + "step": 7130 + }, + { + "epoch": 0.45675692977402216, + "grad_norm": 39.04620361328125, + "learning_rate": 1.3200901007070495e-06, + "loss": 0.4627, + "num_input_tokens_seen": 22482432, + "step": 7135 + }, + { + "epoch": 0.45707701171499904, + "grad_norm": 45.138580322265625, + "learning_rate": 1.3190313208096022e-06, + "loss": 0.4653, + "num_input_tokens_seen": 22496960, + "step": 7140 + }, + { + "epoch": 0.4573970936559759, + "grad_norm": 62.76626968383789, + "learning_rate": 1.3179721425978048e-06, + "loss": 0.3506, + "num_input_tokens_seen": 22512256, + "step": 7145 + }, + { + "epoch": 0.4577171755969528, + "grad_norm": 29.606536865234375, + "learning_rate": 1.3169125673940541e-06, + "loss": 0.3801, + "num_input_tokens_seen": 22528192, + "step": 7150 + }, + { + "epoch": 0.45803725753792973, + "grad_norm": 23.77508544921875, + "learning_rate": 1.3158525965212422e-06, + "loss": 0.4222, + "num_input_tokens_seen": 22545408, + "step": 7155 + }, + { + "epoch": 0.4583573394789066, + "grad_norm": 46.141380310058594, + "learning_rate": 1.3147922313027548e-06, + "loss": 0.499, + "num_input_tokens_seen": 22560832, + "step": 7160 + }, + { + "epoch": 0.4586774214198835, + "grad_norm": 37.23847961425781, + "learning_rate": 1.3137314730624707e-06, + "loss": 0.3566, + "num_input_tokens_seen": 22577728, + "step": 7165 + }, + { + "epoch": 0.45899750336086037, + "grad_norm": 56.6774787902832, + "learning_rate": 1.3126703231247588e-06, + "loss": 0.4792, + "num_input_tokens_seen": 22594112, + "step": 7170 + }, + { + "epoch": 0.45931758530183725, + "grad_norm": 57.76353454589844, + "learning_rate": 1.3116087828144772e-06, + "loss": 0.3942, + "num_input_tokens_seen": 22609728, + "step": 7175 + }, + { + "epoch": 0.4596376672428142, + "grad_norm": 25.027061462402344, + "learning_rate": 1.310546853456972e-06, + "loss": 0.4788, + "num_input_tokens_seen": 22624704, + "step": 7180 + }, + { + "epoch": 0.45995774918379106, + "grad_norm": 29.121782302856445, + "learning_rate": 1.3094845363780737e-06, + "loss": 0.3133, + "num_input_tokens_seen": 22640448, + "step": 7185 + }, + { + "epoch": 0.46027783112476794, + "grad_norm": 27.954086303710938, + "learning_rate": 1.3084218329040976e-06, + "loss": 0.2221, + "num_input_tokens_seen": 22655680, + "step": 7190 + }, + { + "epoch": 0.4605979130657448, + "grad_norm": 19.17045021057129, + "learning_rate": 1.3073587443618425e-06, + "loss": 0.3836, + "num_input_tokens_seen": 22672128, + "step": 7195 + }, + { + "epoch": 0.4609179950067217, + "grad_norm": 63.576904296875, + "learning_rate": 1.3062952720785861e-06, + "loss": 0.528, + "num_input_tokens_seen": 22687104, + "step": 7200 + }, + { + "epoch": 0.4612380769476986, + "grad_norm": 57.46402359008789, + "learning_rate": 1.305231417382086e-06, + "loss": 0.3679, + "num_input_tokens_seen": 22702976, + "step": 7205 + }, + { + "epoch": 0.4615581588886755, + "grad_norm": 33.87038803100586, + "learning_rate": 1.3041671816005777e-06, + "loss": 0.3473, + "num_input_tokens_seen": 22718464, + "step": 7210 + }, + { + "epoch": 0.4618782408296524, + "grad_norm": 31.65737533569336, + "learning_rate": 1.3031025660627718e-06, + "loss": 0.3735, + "num_input_tokens_seen": 22734656, + "step": 7215 + }, + { + "epoch": 0.4621983227706293, + "grad_norm": 41.2495002746582, + "learning_rate": 1.3020375720978534e-06, + "loss": 0.4378, + "num_input_tokens_seen": 22750016, + "step": 7220 + }, + { + "epoch": 0.46251840471160616, + "grad_norm": 34.97142791748047, + "learning_rate": 1.3009722010354799e-06, + "loss": 0.385, + "num_input_tokens_seen": 22765632, + "step": 7225 + }, + { + "epoch": 0.46283848665258304, + "grad_norm": 39.41366958618164, + "learning_rate": 1.2999064542057794e-06, + "loss": 0.4572, + "num_input_tokens_seen": 22781184, + "step": 7230 + }, + { + "epoch": 0.46315856859355997, + "grad_norm": 32.739967346191406, + "learning_rate": 1.2988403329393495e-06, + "loss": 0.4955, + "num_input_tokens_seen": 22797248, + "step": 7235 + }, + { + "epoch": 0.46347865053453685, + "grad_norm": 29.355844497680664, + "learning_rate": 1.2977738385672557e-06, + "loss": 0.4186, + "num_input_tokens_seen": 22812800, + "step": 7240 + }, + { + "epoch": 0.46379873247551373, + "grad_norm": 23.447111129760742, + "learning_rate": 1.2967069724210278e-06, + "loss": 0.4086, + "num_input_tokens_seen": 22827200, + "step": 7245 + }, + { + "epoch": 0.4641188144164906, + "grad_norm": 32.68409729003906, + "learning_rate": 1.2956397358326609e-06, + "loss": 0.5472, + "num_input_tokens_seen": 22843264, + "step": 7250 + }, + { + "epoch": 0.4644388963574675, + "grad_norm": 40.67762756347656, + "learning_rate": 1.294572130134613e-06, + "loss": 0.3845, + "num_input_tokens_seen": 22858624, + "step": 7255 + }, + { + "epoch": 0.4647589782984444, + "grad_norm": 35.638824462890625, + "learning_rate": 1.2935041566598016e-06, + "loss": 0.5608, + "num_input_tokens_seen": 22873856, + "step": 7260 + }, + { + "epoch": 0.4650790602394213, + "grad_norm": 35.22060775756836, + "learning_rate": 1.2924358167416049e-06, + "loss": 0.3669, + "num_input_tokens_seen": 22889600, + "step": 7265 + }, + { + "epoch": 0.4653991421803982, + "grad_norm": 26.555004119873047, + "learning_rate": 1.2913671117138572e-06, + "loss": 0.4085, + "num_input_tokens_seen": 22904704, + "step": 7270 + }, + { + "epoch": 0.46571922412137506, + "grad_norm": 24.626081466674805, + "learning_rate": 1.29029804291085e-06, + "loss": 0.3516, + "num_input_tokens_seen": 22920384, + "step": 7275 + }, + { + "epoch": 0.46603930606235194, + "grad_norm": 46.00627899169922, + "learning_rate": 1.2892286116673269e-06, + "loss": 0.3724, + "num_input_tokens_seen": 22937024, + "step": 7280 + }, + { + "epoch": 0.4663593880033289, + "grad_norm": 27.727231979370117, + "learning_rate": 1.2881588193184865e-06, + "loss": 0.501, + "num_input_tokens_seen": 22954816, + "step": 7285 + }, + { + "epoch": 0.46667946994430576, + "grad_norm": 25.16861915588379, + "learning_rate": 1.287088667199977e-06, + "loss": 0.2811, + "num_input_tokens_seen": 22969472, + "step": 7290 + }, + { + "epoch": 0.46699955188528264, + "grad_norm": 25.07572364807129, + "learning_rate": 1.2860181566478956e-06, + "loss": 0.4666, + "num_input_tokens_seen": 22984192, + "step": 7295 + }, + { + "epoch": 0.4673196338262595, + "grad_norm": 13.781168937683105, + "learning_rate": 1.2849472889987874e-06, + "loss": 0.3772, + "num_input_tokens_seen": 22999680, + "step": 7300 + }, + { + "epoch": 0.4676397157672364, + "grad_norm": 28.55607032775879, + "learning_rate": 1.2838760655896431e-06, + "loss": 0.3756, + "num_input_tokens_seen": 23014720, + "step": 7305 + }, + { + "epoch": 0.4679597977082133, + "grad_norm": 37.8064079284668, + "learning_rate": 1.2828044877578983e-06, + "loss": 0.4629, + "num_input_tokens_seen": 23030528, + "step": 7310 + }, + { + "epoch": 0.4682798796491902, + "grad_norm": 27.748788833618164, + "learning_rate": 1.2817325568414297e-06, + "loss": 0.5176, + "num_input_tokens_seen": 23046784, + "step": 7315 + }, + { + "epoch": 0.4685999615901671, + "grad_norm": 25.824499130249023, + "learning_rate": 1.2806602741785562e-06, + "loss": 0.3307, + "num_input_tokens_seen": 23061632, + "step": 7320 + }, + { + "epoch": 0.46892004353114397, + "grad_norm": 18.876747131347656, + "learning_rate": 1.2795876411080346e-06, + "loss": 0.3325, + "num_input_tokens_seen": 23077888, + "step": 7325 + }, + { + "epoch": 0.46924012547212085, + "grad_norm": 25.548311233520508, + "learning_rate": 1.278514658969061e-06, + "loss": 0.3222, + "num_input_tokens_seen": 23093568, + "step": 7330 + }, + { + "epoch": 0.46956020741309773, + "grad_norm": 28.950227737426758, + "learning_rate": 1.2774413291012648e-06, + "loss": 0.5175, + "num_input_tokens_seen": 23108992, + "step": 7335 + }, + { + "epoch": 0.46988028935407467, + "grad_norm": 26.01544761657715, + "learning_rate": 1.2763676528447122e-06, + "loss": 0.4328, + "num_input_tokens_seen": 23124992, + "step": 7340 + }, + { + "epoch": 0.47020037129505154, + "grad_norm": 30.87302589416504, + "learning_rate": 1.2752936315399003e-06, + "loss": 0.3446, + "num_input_tokens_seen": 23141888, + "step": 7345 + }, + { + "epoch": 0.4705204532360284, + "grad_norm": 31.088359832763672, + "learning_rate": 1.2742192665277566e-06, + "loss": 0.343, + "num_input_tokens_seen": 23157888, + "step": 7350 + }, + { + "epoch": 0.4708405351770053, + "grad_norm": 26.50160026550293, + "learning_rate": 1.2731445591496393e-06, + "loss": 0.2838, + "num_input_tokens_seen": 23172864, + "step": 7355 + }, + { + "epoch": 0.4711606171179822, + "grad_norm": 35.69820022583008, + "learning_rate": 1.2720695107473325e-06, + "loss": 0.456, + "num_input_tokens_seen": 23188352, + "step": 7360 + }, + { + "epoch": 0.4714806990589591, + "grad_norm": 39.46232986450195, + "learning_rate": 1.2709941226630475e-06, + "loss": 0.3861, + "num_input_tokens_seen": 23204096, + "step": 7365 + }, + { + "epoch": 0.471800780999936, + "grad_norm": 30.66231346130371, + "learning_rate": 1.2699183962394182e-06, + "loss": 0.3526, + "num_input_tokens_seen": 23219072, + "step": 7370 + }, + { + "epoch": 0.4721208629409129, + "grad_norm": 16.721717834472656, + "learning_rate": 1.2688423328195021e-06, + "loss": 0.4323, + "num_input_tokens_seen": 23234560, + "step": 7375 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 45.48991394042969, + "learning_rate": 1.267765933746777e-06, + "loss": 0.3497, + "num_input_tokens_seen": 23250304, + "step": 7380 + }, + { + "epoch": 0.47276102682286664, + "grad_norm": 44.07309341430664, + "learning_rate": 1.2666892003651397e-06, + "loss": 0.6383, + "num_input_tokens_seen": 23265664, + "step": 7385 + }, + { + "epoch": 0.4730811087638435, + "grad_norm": 31.322635650634766, + "learning_rate": 1.2656121340189043e-06, + "loss": 0.453, + "num_input_tokens_seen": 23281472, + "step": 7390 + }, + { + "epoch": 0.47340119070482045, + "grad_norm": 24.864459991455078, + "learning_rate": 1.264534736052801e-06, + "loss": 0.4142, + "num_input_tokens_seen": 23297024, + "step": 7395 + }, + { + "epoch": 0.47372127264579733, + "grad_norm": 42.676055908203125, + "learning_rate": 1.2634570078119739e-06, + "loss": 0.4348, + "num_input_tokens_seen": 23313344, + "step": 7400 + }, + { + "epoch": 0.4740413545867742, + "grad_norm": 29.135011672973633, + "learning_rate": 1.262378950641979e-06, + "loss": 0.535, + "num_input_tokens_seen": 23328512, + "step": 7405 + }, + { + "epoch": 0.4743614365277511, + "grad_norm": 23.750194549560547, + "learning_rate": 1.2613005658887836e-06, + "loss": 0.444, + "num_input_tokens_seen": 23342400, + "step": 7410 + }, + { + "epoch": 0.47468151846872797, + "grad_norm": 37.87858200073242, + "learning_rate": 1.2602218548987637e-06, + "loss": 0.4198, + "num_input_tokens_seen": 23358400, + "step": 7415 + }, + { + "epoch": 0.4750016004097049, + "grad_norm": 33.777381896972656, + "learning_rate": 1.2591428190187029e-06, + "loss": 0.4155, + "num_input_tokens_seen": 23373376, + "step": 7420 + }, + { + "epoch": 0.4753216823506818, + "grad_norm": 56.56295394897461, + "learning_rate": 1.2580634595957898e-06, + "loss": 0.5093, + "num_input_tokens_seen": 23390400, + "step": 7425 + }, + { + "epoch": 0.47564176429165866, + "grad_norm": 27.435312271118164, + "learning_rate": 1.2569837779776172e-06, + "loss": 0.3871, + "num_input_tokens_seen": 23406400, + "step": 7430 + }, + { + "epoch": 0.47596184623263554, + "grad_norm": 30.586233139038086, + "learning_rate": 1.2559037755121804e-06, + "loss": 0.3134, + "num_input_tokens_seen": 23421824, + "step": 7435 + }, + { + "epoch": 0.4762819281736124, + "grad_norm": 50.65870666503906, + "learning_rate": 1.2548234535478754e-06, + "loss": 0.4599, + "num_input_tokens_seen": 23438272, + "step": 7440 + }, + { + "epoch": 0.47660201011458936, + "grad_norm": 18.9100341796875, + "learning_rate": 1.2537428134334968e-06, + "loss": 0.4267, + "num_input_tokens_seen": 23454976, + "step": 7445 + }, + { + "epoch": 0.47692209205556624, + "grad_norm": 89.36604309082031, + "learning_rate": 1.252661856518236e-06, + "loss": 0.5302, + "num_input_tokens_seen": 23471168, + "step": 7450 + }, + { + "epoch": 0.4772421739965431, + "grad_norm": 30.56731605529785, + "learning_rate": 1.251580584151681e-06, + "loss": 0.3683, + "num_input_tokens_seen": 23486720, + "step": 7455 + }, + { + "epoch": 0.47756225593752, + "grad_norm": 20.963420867919922, + "learning_rate": 1.2504989976838129e-06, + "loss": 0.309, + "num_input_tokens_seen": 23502912, + "step": 7460 + }, + { + "epoch": 0.4778823378784969, + "grad_norm": 25.654800415039062, + "learning_rate": 1.2494170984650048e-06, + "loss": 0.3629, + "num_input_tokens_seen": 23519552, + "step": 7465 + }, + { + "epoch": 0.4782024198194738, + "grad_norm": 27.470243453979492, + "learning_rate": 1.248334887846021e-06, + "loss": 0.4253, + "num_input_tokens_seen": 23535936, + "step": 7470 + }, + { + "epoch": 0.4785225017604507, + "grad_norm": 33.39189910888672, + "learning_rate": 1.2472523671780135e-06, + "loss": 0.4411, + "num_input_tokens_seen": 23551040, + "step": 7475 + }, + { + "epoch": 0.47884258370142757, + "grad_norm": 35.72373580932617, + "learning_rate": 1.2461695378125233e-06, + "loss": 0.309, + "num_input_tokens_seen": 23566208, + "step": 7480 + }, + { + "epoch": 0.47916266564240445, + "grad_norm": 22.527515411376953, + "learning_rate": 1.245086401101474e-06, + "loss": 0.4347, + "num_input_tokens_seen": 23581696, + "step": 7485 + }, + { + "epoch": 0.47948274758338133, + "grad_norm": 74.2776870727539, + "learning_rate": 1.2440029583971757e-06, + "loss": 0.4439, + "num_input_tokens_seen": 23597248, + "step": 7490 + }, + { + "epoch": 0.4798028295243582, + "grad_norm": 14.569833755493164, + "learning_rate": 1.2429192110523188e-06, + "loss": 0.502, + "num_input_tokens_seen": 23612800, + "step": 7495 + }, + { + "epoch": 0.48012291146533514, + "grad_norm": 28.646181106567383, + "learning_rate": 1.2418351604199746e-06, + "loss": 0.3388, + "num_input_tokens_seen": 23629056, + "step": 7500 + }, + { + "epoch": 0.480442993406312, + "grad_norm": 41.83679962158203, + "learning_rate": 1.2407508078535934e-06, + "loss": 0.4502, + "num_input_tokens_seen": 23644352, + "step": 7505 + }, + { + "epoch": 0.4807630753472889, + "grad_norm": 29.25275230407715, + "learning_rate": 1.2396661547070017e-06, + "loss": 0.2899, + "num_input_tokens_seen": 23661120, + "step": 7510 + }, + { + "epoch": 0.4810831572882658, + "grad_norm": 18.579309463500977, + "learning_rate": 1.238581202334402e-06, + "loss": 0.3362, + "num_input_tokens_seen": 23677632, + "step": 7515 + }, + { + "epoch": 0.48140323922924266, + "grad_norm": 35.11944580078125, + "learning_rate": 1.2374959520903699e-06, + "loss": 0.3676, + "num_input_tokens_seen": 23693952, + "step": 7520 + }, + { + "epoch": 0.4817233211702196, + "grad_norm": 20.344608306884766, + "learning_rate": 1.2364104053298531e-06, + "loss": 0.3442, + "num_input_tokens_seen": 23708736, + "step": 7525 + }, + { + "epoch": 0.4820434031111965, + "grad_norm": 30.352449417114258, + "learning_rate": 1.2353245634081692e-06, + "loss": 0.392, + "num_input_tokens_seen": 23724864, + "step": 7530 + }, + { + "epoch": 0.48236348505217336, + "grad_norm": 23.443605422973633, + "learning_rate": 1.2342384276810053e-06, + "loss": 0.4165, + "num_input_tokens_seen": 23740160, + "step": 7535 + }, + { + "epoch": 0.48268356699315024, + "grad_norm": 72.33162689208984, + "learning_rate": 1.233151999504414e-06, + "loss": 0.435, + "num_input_tokens_seen": 23755264, + "step": 7540 + }, + { + "epoch": 0.4830036489341271, + "grad_norm": 53.03398895263672, + "learning_rate": 1.232065280234814e-06, + "loss": 0.3445, + "num_input_tokens_seen": 23770112, + "step": 7545 + }, + { + "epoch": 0.48332373087510405, + "grad_norm": 26.0847110748291, + "learning_rate": 1.2309782712289867e-06, + "loss": 0.4075, + "num_input_tokens_seen": 23785536, + "step": 7550 + }, + { + "epoch": 0.48364381281608093, + "grad_norm": 53.779842376708984, + "learning_rate": 1.2298909738440758e-06, + "loss": 0.4257, + "num_input_tokens_seen": 23801280, + "step": 7555 + }, + { + "epoch": 0.4839638947570578, + "grad_norm": 39.13923263549805, + "learning_rate": 1.2288033894375847e-06, + "loss": 0.3893, + "num_input_tokens_seen": 23816448, + "step": 7560 + }, + { + "epoch": 0.4842839766980347, + "grad_norm": 32.721370697021484, + "learning_rate": 1.2277155193673755e-06, + "loss": 0.541, + "num_input_tokens_seen": 23832512, + "step": 7565 + }, + { + "epoch": 0.48460405863901157, + "grad_norm": 15.527641296386719, + "learning_rate": 1.2266273649916668e-06, + "loss": 0.3945, + "num_input_tokens_seen": 23848192, + "step": 7570 + }, + { + "epoch": 0.48492414057998845, + "grad_norm": 20.2283992767334, + "learning_rate": 1.2255389276690318e-06, + "loss": 0.4394, + "num_input_tokens_seen": 23863808, + "step": 7575 + }, + { + "epoch": 0.4852442225209654, + "grad_norm": 34.1326904296875, + "learning_rate": 1.2244502087583978e-06, + "loss": 0.3096, + "num_input_tokens_seen": 23880960, + "step": 7580 + }, + { + "epoch": 0.48556430446194226, + "grad_norm": 40.73923110961914, + "learning_rate": 1.2233612096190426e-06, + "loss": 0.3963, + "num_input_tokens_seen": 23896256, + "step": 7585 + }, + { + "epoch": 0.48588438640291914, + "grad_norm": 38.855438232421875, + "learning_rate": 1.222271931610595e-06, + "loss": 0.5109, + "num_input_tokens_seen": 23912832, + "step": 7590 + }, + { + "epoch": 0.486204468343896, + "grad_norm": 25.933034896850586, + "learning_rate": 1.2211823760930306e-06, + "loss": 0.4938, + "num_input_tokens_seen": 23928768, + "step": 7595 + }, + { + "epoch": 0.4865245502848729, + "grad_norm": 19.541147232055664, + "learning_rate": 1.2200925444266726e-06, + "loss": 0.4297, + "num_input_tokens_seen": 23945088, + "step": 7600 + }, + { + "epoch": 0.48684463222584984, + "grad_norm": 38.088348388671875, + "learning_rate": 1.219002437972189e-06, + "loss": 0.5101, + "num_input_tokens_seen": 23960192, + "step": 7605 + }, + { + "epoch": 0.4871647141668267, + "grad_norm": 31.72063446044922, + "learning_rate": 1.21791205809059e-06, + "loss": 0.4324, + "num_input_tokens_seen": 23977152, + "step": 7610 + }, + { + "epoch": 0.4874847961078036, + "grad_norm": 31.89913558959961, + "learning_rate": 1.2168214061432283e-06, + "loss": 0.3628, + "num_input_tokens_seen": 23992448, + "step": 7615 + }, + { + "epoch": 0.4878048780487805, + "grad_norm": 25.90325164794922, + "learning_rate": 1.2157304834917947e-06, + "loss": 0.4397, + "num_input_tokens_seen": 24008384, + "step": 7620 + }, + { + "epoch": 0.48812495998975736, + "grad_norm": 28.418067932128906, + "learning_rate": 1.2146392914983202e-06, + "loss": 0.6103, + "num_input_tokens_seen": 24025728, + "step": 7625 + }, + { + "epoch": 0.4884450419307343, + "grad_norm": 45.870277404785156, + "learning_rate": 1.2135478315251694e-06, + "loss": 0.51, + "num_input_tokens_seen": 24040448, + "step": 7630 + }, + { + "epoch": 0.48876512387171117, + "grad_norm": 30.824810028076172, + "learning_rate": 1.2124561049350442e-06, + "loss": 0.36, + "num_input_tokens_seen": 24055168, + "step": 7635 + }, + { + "epoch": 0.48908520581268805, + "grad_norm": 40.22310256958008, + "learning_rate": 1.2113641130909772e-06, + "loss": 0.4474, + "num_input_tokens_seen": 24070016, + "step": 7640 + }, + { + "epoch": 0.48940528775366493, + "grad_norm": 51.548828125, + "learning_rate": 1.2102718573563334e-06, + "loss": 0.3074, + "num_input_tokens_seen": 24084800, + "step": 7645 + }, + { + "epoch": 0.4897253696946418, + "grad_norm": 53.92572021484375, + "learning_rate": 1.2091793390948066e-06, + "loss": 0.4884, + "num_input_tokens_seen": 24100416, + "step": 7650 + }, + { + "epoch": 0.49004545163561875, + "grad_norm": 18.71347427368164, + "learning_rate": 1.2080865596704191e-06, + "loss": 0.2873, + "num_input_tokens_seen": 24117120, + "step": 7655 + }, + { + "epoch": 0.4903655335765956, + "grad_norm": 30.77579116821289, + "learning_rate": 1.2069935204475187e-06, + "loss": 0.4317, + "num_input_tokens_seen": 24132224, + "step": 7660 + }, + { + "epoch": 0.4906856155175725, + "grad_norm": 24.197696685791016, + "learning_rate": 1.2059002227907776e-06, + "loss": 0.4037, + "num_input_tokens_seen": 24147712, + "step": 7665 + }, + { + "epoch": 0.4910056974585494, + "grad_norm": 39.67091751098633, + "learning_rate": 1.2048066680651908e-06, + "loss": 0.408, + "num_input_tokens_seen": 24164288, + "step": 7670 + }, + { + "epoch": 0.49132577939952626, + "grad_norm": 39.406349182128906, + "learning_rate": 1.2037128576360743e-06, + "loss": 0.5751, + "num_input_tokens_seen": 24193728, + "step": 7675 + }, + { + "epoch": 0.49164586134050314, + "grad_norm": 39.66476821899414, + "learning_rate": 1.2026187928690627e-06, + "loss": 0.406, + "num_input_tokens_seen": 24208832, + "step": 7680 + }, + { + "epoch": 0.4919659432814801, + "grad_norm": 36.91902160644531, + "learning_rate": 1.2015244751301098e-06, + "loss": 0.5004, + "num_input_tokens_seen": 24223424, + "step": 7685 + }, + { + "epoch": 0.49228602522245696, + "grad_norm": 47.22456741333008, + "learning_rate": 1.2004299057854832e-06, + "loss": 0.444, + "num_input_tokens_seen": 24238976, + "step": 7690 + }, + { + "epoch": 0.49260610716343384, + "grad_norm": 23.27979850769043, + "learning_rate": 1.1993350862017661e-06, + "loss": 0.3837, + "num_input_tokens_seen": 24253632, + "step": 7695 + }, + { + "epoch": 0.4929261891044107, + "grad_norm": 37.785526275634766, + "learning_rate": 1.1982400177458534e-06, + "loss": 0.4074, + "num_input_tokens_seen": 24270720, + "step": 7700 + }, + { + "epoch": 0.4932462710453876, + "grad_norm": 37.083473205566406, + "learning_rate": 1.197144701784951e-06, + "loss": 0.4385, + "num_input_tokens_seen": 24285312, + "step": 7705 + }, + { + "epoch": 0.49356635298636453, + "grad_norm": 32.1000862121582, + "learning_rate": 1.1960491396865735e-06, + "loss": 0.409, + "num_input_tokens_seen": 24300352, + "step": 7710 + }, + { + "epoch": 0.4938864349273414, + "grad_norm": 26.52760887145996, + "learning_rate": 1.1949533328185435e-06, + "loss": 0.3518, + "num_input_tokens_seen": 24317056, + "step": 7715 + }, + { + "epoch": 0.4942065168683183, + "grad_norm": 27.518896102905273, + "learning_rate": 1.1938572825489883e-06, + "loss": 0.3705, + "num_input_tokens_seen": 24333184, + "step": 7720 + }, + { + "epoch": 0.49452659880929517, + "grad_norm": 32.64544677734375, + "learning_rate": 1.1927609902463394e-06, + "loss": 0.4313, + "num_input_tokens_seen": 24348672, + "step": 7725 + }, + { + "epoch": 0.49484668075027205, + "grad_norm": 44.3544807434082, + "learning_rate": 1.1916644572793314e-06, + "loss": 0.4342, + "num_input_tokens_seen": 24363648, + "step": 7730 + }, + { + "epoch": 0.495166762691249, + "grad_norm": 80.76370239257812, + "learning_rate": 1.190567685016998e-06, + "loss": 0.4951, + "num_input_tokens_seen": 24380992, + "step": 7735 + }, + { + "epoch": 0.49548684463222586, + "grad_norm": 30.18761444091797, + "learning_rate": 1.189470674828672e-06, + "loss": 0.4152, + "num_input_tokens_seen": 24395776, + "step": 7740 + }, + { + "epoch": 0.49580692657320274, + "grad_norm": 26.904821395874023, + "learning_rate": 1.188373428083984e-06, + "loss": 0.3851, + "num_input_tokens_seen": 24411584, + "step": 7745 + }, + { + "epoch": 0.4961270085141796, + "grad_norm": 43.27383041381836, + "learning_rate": 1.1872759461528596e-06, + "loss": 0.5355, + "num_input_tokens_seen": 24426560, + "step": 7750 + }, + { + "epoch": 0.4964470904551565, + "grad_norm": 12.632915496826172, + "learning_rate": 1.1861782304055174e-06, + "loss": 0.4046, + "num_input_tokens_seen": 24441856, + "step": 7755 + }, + { + "epoch": 0.4967671723961334, + "grad_norm": 19.117013931274414, + "learning_rate": 1.1850802822124686e-06, + "loss": 0.3269, + "num_input_tokens_seen": 24457472, + "step": 7760 + }, + { + "epoch": 0.4970872543371103, + "grad_norm": 61.428348541259766, + "learning_rate": 1.1839821029445143e-06, + "loss": 0.5104, + "num_input_tokens_seen": 24471936, + "step": 7765 + }, + { + "epoch": 0.4974073362780872, + "grad_norm": 39.07609939575195, + "learning_rate": 1.1828836939727442e-06, + "loss": 0.3332, + "num_input_tokens_seen": 24487616, + "step": 7770 + }, + { + "epoch": 0.4977274182190641, + "grad_norm": 38.0487060546875, + "learning_rate": 1.181785056668535e-06, + "loss": 0.4292, + "num_input_tokens_seen": 24503936, + "step": 7775 + }, + { + "epoch": 0.49804750016004096, + "grad_norm": 31.99479103088379, + "learning_rate": 1.180686192403548e-06, + "loss": 0.429, + "num_input_tokens_seen": 24518464, + "step": 7780 + }, + { + "epoch": 0.49836758210101784, + "grad_norm": 50.00809097290039, + "learning_rate": 1.1795871025497285e-06, + "loss": 0.3479, + "num_input_tokens_seen": 24533184, + "step": 7785 + }, + { + "epoch": 0.49868766404199477, + "grad_norm": 40.826236724853516, + "learning_rate": 1.1784877884793029e-06, + "loss": 0.4288, + "num_input_tokens_seen": 24548992, + "step": 7790 + }, + { + "epoch": 0.49900774598297165, + "grad_norm": 35.559532165527344, + "learning_rate": 1.1773882515647776e-06, + "loss": 0.3681, + "num_input_tokens_seen": 24566592, + "step": 7795 + }, + { + "epoch": 0.49932782792394853, + "grad_norm": 28.459714889526367, + "learning_rate": 1.1762884931789376e-06, + "loss": 0.4776, + "num_input_tokens_seen": 24583552, + "step": 7800 + }, + { + "epoch": 0.4996479098649254, + "grad_norm": 15.405625343322754, + "learning_rate": 1.1751885146948436e-06, + "loss": 0.4538, + "num_input_tokens_seen": 24599552, + "step": 7805 + }, + { + "epoch": 0.4999679918059023, + "grad_norm": 35.40610122680664, + "learning_rate": 1.1740883174858327e-06, + "loss": 0.3799, + "num_input_tokens_seen": 24614912, + "step": 7810 + }, + { + "epoch": 0.5002880737468792, + "grad_norm": 35.4653434753418, + "learning_rate": 1.1729879029255127e-06, + "loss": 0.3643, + "num_input_tokens_seen": 24629696, + "step": 7815 + }, + { + "epoch": 0.5006081556878561, + "grad_norm": 33.1492919921875, + "learning_rate": 1.171887272387765e-06, + "loss": 0.3997, + "num_input_tokens_seen": 24646208, + "step": 7820 + }, + { + "epoch": 0.5006081556878561, + "eval_loss": 0.4178144633769989, + "eval_runtime": 50.7766, + "eval_samples_per_second": 273.473, + "eval_steps_per_second": 34.189, + "num_input_tokens_seen": 24646208, + "step": 7820 + }, + { + "epoch": 0.500928237628833, + "grad_norm": 91.9991226196289, + "learning_rate": 1.1707864272467397e-06, + "loss": 0.4907, + "num_input_tokens_seen": 24661120, + "step": 7825 + }, + { + "epoch": 0.5012483195698099, + "grad_norm": 40.02210235595703, + "learning_rate": 1.169685368876855e-06, + "loss": 0.4269, + "num_input_tokens_seen": 24678336, + "step": 7830 + }, + { + "epoch": 0.5015684015107867, + "grad_norm": 62.80644989013672, + "learning_rate": 1.1685840986527946e-06, + "loss": 0.5471, + "num_input_tokens_seen": 24694336, + "step": 7835 + }, + { + "epoch": 0.5018884834517636, + "grad_norm": 35.32887649536133, + "learning_rate": 1.1674826179495076e-06, + "loss": 0.3986, + "num_input_tokens_seen": 24708608, + "step": 7840 + }, + { + "epoch": 0.5022085653927405, + "grad_norm": 35.998695373535156, + "learning_rate": 1.1663809281422056e-06, + "loss": 0.4302, + "num_input_tokens_seen": 24724672, + "step": 7845 + }, + { + "epoch": 0.5025286473337174, + "grad_norm": 43.341339111328125, + "learning_rate": 1.1652790306063615e-06, + "loss": 0.4556, + "num_input_tokens_seen": 24740608, + "step": 7850 + }, + { + "epoch": 0.5028487292746944, + "grad_norm": 35.64778137207031, + "learning_rate": 1.164176926717707e-06, + "loss": 0.4065, + "num_input_tokens_seen": 24758528, + "step": 7855 + }, + { + "epoch": 0.5031688112156713, + "grad_norm": 25.157997131347656, + "learning_rate": 1.1630746178522315e-06, + "loss": 0.3737, + "num_input_tokens_seen": 24772992, + "step": 7860 + }, + { + "epoch": 0.5034888931566481, + "grad_norm": 29.157825469970703, + "learning_rate": 1.1619721053861816e-06, + "loss": 0.4417, + "num_input_tokens_seen": 24788160, + "step": 7865 + }, + { + "epoch": 0.503808975097625, + "grad_norm": 22.016447067260742, + "learning_rate": 1.1608693906960558e-06, + "loss": 0.4104, + "num_input_tokens_seen": 24804224, + "step": 7870 + }, + { + "epoch": 0.5041290570386019, + "grad_norm": 32.85700988769531, + "learning_rate": 1.1597664751586069e-06, + "loss": 0.4523, + "num_input_tokens_seen": 24820928, + "step": 7875 + }, + { + "epoch": 0.5044491389795788, + "grad_norm": 49.433475494384766, + "learning_rate": 1.1586633601508382e-06, + "loss": 0.3953, + "num_input_tokens_seen": 24835776, + "step": 7880 + }, + { + "epoch": 0.5047692209205557, + "grad_norm": 48.25388717651367, + "learning_rate": 1.1575600470500014e-06, + "loss": 0.3764, + "num_input_tokens_seen": 24851648, + "step": 7885 + }, + { + "epoch": 0.5050893028615325, + "grad_norm": 61.41056823730469, + "learning_rate": 1.1564565372335957e-06, + "loss": 0.4222, + "num_input_tokens_seen": 24866880, + "step": 7890 + }, + { + "epoch": 0.5054093848025094, + "grad_norm": 42.029537200927734, + "learning_rate": 1.1553528320793663e-06, + "loss": 0.3276, + "num_input_tokens_seen": 24881856, + "step": 7895 + }, + { + "epoch": 0.5057294667434863, + "grad_norm": 24.842376708984375, + "learning_rate": 1.1542489329653022e-06, + "loss": 0.4327, + "num_input_tokens_seen": 24898560, + "step": 7900 + }, + { + "epoch": 0.5060495486844632, + "grad_norm": 28.775615692138672, + "learning_rate": 1.1531448412696343e-06, + "loss": 0.3841, + "num_input_tokens_seen": 24913216, + "step": 7905 + }, + { + "epoch": 0.5063696306254402, + "grad_norm": 22.78350067138672, + "learning_rate": 1.1520405583708337e-06, + "loss": 0.5014, + "num_input_tokens_seen": 24928832, + "step": 7910 + }, + { + "epoch": 0.506689712566417, + "grad_norm": 35.34454345703125, + "learning_rate": 1.1509360856476109e-06, + "loss": 0.4926, + "num_input_tokens_seen": 24944512, + "step": 7915 + }, + { + "epoch": 0.5070097945073939, + "grad_norm": 40.0589485168457, + "learning_rate": 1.149831424478913e-06, + "loss": 0.4731, + "num_input_tokens_seen": 24959744, + "step": 7920 + }, + { + "epoch": 0.5073298764483708, + "grad_norm": 44.30174255371094, + "learning_rate": 1.1487265762439224e-06, + "loss": 0.3939, + "num_input_tokens_seen": 24975488, + "step": 7925 + }, + { + "epoch": 0.5076499583893477, + "grad_norm": 41.936622619628906, + "learning_rate": 1.1476215423220547e-06, + "loss": 0.3612, + "num_input_tokens_seen": 24990272, + "step": 7930 + }, + { + "epoch": 0.5079700403303246, + "grad_norm": 39.607147216796875, + "learning_rate": 1.146516324092959e-06, + "loss": 0.3724, + "num_input_tokens_seen": 25006272, + "step": 7935 + }, + { + "epoch": 0.5082901222713014, + "grad_norm": 25.23112678527832, + "learning_rate": 1.1454109229365117e-06, + "loss": 0.2965, + "num_input_tokens_seen": 25022464, + "step": 7940 + }, + { + "epoch": 0.5086102042122783, + "grad_norm": 28.533090591430664, + "learning_rate": 1.14430534023282e-06, + "loss": 0.3151, + "num_input_tokens_seen": 25037376, + "step": 7945 + }, + { + "epoch": 0.5089302861532552, + "grad_norm": 44.65055465698242, + "learning_rate": 1.1431995773622167e-06, + "loss": 0.4737, + "num_input_tokens_seen": 25053440, + "step": 7950 + }, + { + "epoch": 0.5092503680942321, + "grad_norm": 22.451980590820312, + "learning_rate": 1.1420936357052597e-06, + "loss": 0.4343, + "num_input_tokens_seen": 25069120, + "step": 7955 + }, + { + "epoch": 0.5095704500352091, + "grad_norm": 21.964155197143555, + "learning_rate": 1.1409875166427303e-06, + "loss": 0.3024, + "num_input_tokens_seen": 25084224, + "step": 7960 + }, + { + "epoch": 0.509890531976186, + "grad_norm": 36.40359115600586, + "learning_rate": 1.1398812215556308e-06, + "loss": 0.5023, + "num_input_tokens_seen": 25099520, + "step": 7965 + }, + { + "epoch": 0.5102106139171628, + "grad_norm": 28.904006958007812, + "learning_rate": 1.1387747518251837e-06, + "loss": 0.372, + "num_input_tokens_seen": 25115200, + "step": 7970 + }, + { + "epoch": 0.5105306958581397, + "grad_norm": 21.367204666137695, + "learning_rate": 1.13766810883283e-06, + "loss": 0.3171, + "num_input_tokens_seen": 25131520, + "step": 7975 + }, + { + "epoch": 0.5108507777991166, + "grad_norm": 39.231876373291016, + "learning_rate": 1.1365612939602255e-06, + "loss": 0.5088, + "num_input_tokens_seen": 25147776, + "step": 7980 + }, + { + "epoch": 0.5111708597400935, + "grad_norm": 23.78633689880371, + "learning_rate": 1.1354543085892423e-06, + "loss": 0.3884, + "num_input_tokens_seen": 25162816, + "step": 7985 + }, + { + "epoch": 0.5114909416810703, + "grad_norm": 36.806182861328125, + "learning_rate": 1.1343471541019646e-06, + "loss": 0.3417, + "num_input_tokens_seen": 25178752, + "step": 7990 + }, + { + "epoch": 0.5118110236220472, + "grad_norm": 61.8624267578125, + "learning_rate": 1.1332398318806872e-06, + "loss": 0.3672, + "num_input_tokens_seen": 25194048, + "step": 7995 + }, + { + "epoch": 0.5121311055630241, + "grad_norm": 33.29353713989258, + "learning_rate": 1.1321323433079158e-06, + "loss": 0.3787, + "num_input_tokens_seen": 25209216, + "step": 8000 + }, + { + "epoch": 0.512451187504001, + "grad_norm": 31.924596786499023, + "learning_rate": 1.1310246897663623e-06, + "loss": 0.3897, + "num_input_tokens_seen": 25224640, + "step": 8005 + }, + { + "epoch": 0.5127712694449779, + "grad_norm": 25.257596969604492, + "learning_rate": 1.1299168726389447e-06, + "loss": 0.4115, + "num_input_tokens_seen": 25239808, + "step": 8010 + }, + { + "epoch": 0.5130913513859549, + "grad_norm": 50.187477111816406, + "learning_rate": 1.1288088933087868e-06, + "loss": 0.346, + "num_input_tokens_seen": 25257344, + "step": 8015 + }, + { + "epoch": 0.5134114333269317, + "grad_norm": 27.19639778137207, + "learning_rate": 1.1277007531592127e-06, + "loss": 0.3318, + "num_input_tokens_seen": 25272064, + "step": 8020 + }, + { + "epoch": 0.5137315152679086, + "grad_norm": 29.604515075683594, + "learning_rate": 1.1265924535737492e-06, + "loss": 0.3698, + "num_input_tokens_seen": 25287936, + "step": 8025 + }, + { + "epoch": 0.5140515972088855, + "grad_norm": 39.24460983276367, + "learning_rate": 1.125483995936121e-06, + "loss": 0.3019, + "num_input_tokens_seen": 25303232, + "step": 8030 + }, + { + "epoch": 0.5143716791498624, + "grad_norm": 20.35449981689453, + "learning_rate": 1.1243753816302507e-06, + "loss": 0.3742, + "num_input_tokens_seen": 25318656, + "step": 8035 + }, + { + "epoch": 0.5146917610908393, + "grad_norm": 43.355865478515625, + "learning_rate": 1.1232666120402558e-06, + "loss": 0.4047, + "num_input_tokens_seen": 25333760, + "step": 8040 + }, + { + "epoch": 0.5150118430318161, + "grad_norm": 34.452640533447266, + "learning_rate": 1.1221576885504487e-06, + "loss": 0.3819, + "num_input_tokens_seen": 25349824, + "step": 8045 + }, + { + "epoch": 0.515331924972793, + "grad_norm": 20.325450897216797, + "learning_rate": 1.121048612545333e-06, + "loss": 0.3978, + "num_input_tokens_seen": 25365376, + "step": 8050 + }, + { + "epoch": 0.5156520069137699, + "grad_norm": 47.98875427246094, + "learning_rate": 1.1199393854096034e-06, + "loss": 0.459, + "num_input_tokens_seen": 25380928, + "step": 8055 + }, + { + "epoch": 0.5159720888547468, + "grad_norm": 73.50482940673828, + "learning_rate": 1.118830008528143e-06, + "loss": 0.3448, + "num_input_tokens_seen": 25396352, + "step": 8060 + }, + { + "epoch": 0.5162921707957238, + "grad_norm": 22.99143409729004, + "learning_rate": 1.1177204832860212e-06, + "loss": 0.3084, + "num_input_tokens_seen": 25411456, + "step": 8065 + }, + { + "epoch": 0.5166122527367006, + "grad_norm": 20.2192325592041, + "learning_rate": 1.1166108110684947e-06, + "loss": 0.4402, + "num_input_tokens_seen": 25428544, + "step": 8070 + }, + { + "epoch": 0.5169323346776775, + "grad_norm": 38.74795150756836, + "learning_rate": 1.1155009932610003e-06, + "loss": 0.4209, + "num_input_tokens_seen": 25443968, + "step": 8075 + }, + { + "epoch": 0.5172524166186544, + "grad_norm": 47.510467529296875, + "learning_rate": 1.1143910312491605e-06, + "loss": 0.3319, + "num_input_tokens_seen": 25458880, + "step": 8080 + }, + { + "epoch": 0.5175724985596313, + "grad_norm": 60.593082427978516, + "learning_rate": 1.1132809264187748e-06, + "loss": 0.3206, + "num_input_tokens_seen": 25474304, + "step": 8085 + }, + { + "epoch": 0.5178925805006082, + "grad_norm": 58.513587951660156, + "learning_rate": 1.1121706801558226e-06, + "loss": 0.4119, + "num_input_tokens_seen": 25489472, + "step": 8090 + }, + { + "epoch": 0.518212662441585, + "grad_norm": 42.54377365112305, + "learning_rate": 1.111060293846459e-06, + "loss": 0.3921, + "num_input_tokens_seen": 25504896, + "step": 8095 + }, + { + "epoch": 0.5185327443825619, + "grad_norm": 77.95099639892578, + "learning_rate": 1.1099497688770148e-06, + "loss": 0.4749, + "num_input_tokens_seen": 25519360, + "step": 8100 + }, + { + "epoch": 0.5188528263235388, + "grad_norm": 36.677886962890625, + "learning_rate": 1.1088391066339928e-06, + "loss": 0.4449, + "num_input_tokens_seen": 25535680, + "step": 8105 + }, + { + "epoch": 0.5191729082645157, + "grad_norm": 44.013736724853516, + "learning_rate": 1.1077283085040684e-06, + "loss": 0.5377, + "num_input_tokens_seen": 25550592, + "step": 8110 + }, + { + "epoch": 0.5194929902054926, + "grad_norm": 40.93517303466797, + "learning_rate": 1.1066173758740863e-06, + "loss": 0.3997, + "num_input_tokens_seen": 25565696, + "step": 8115 + }, + { + "epoch": 0.5198130721464695, + "grad_norm": 19.403085708618164, + "learning_rate": 1.105506310131058e-06, + "loss": 0.3523, + "num_input_tokens_seen": 25581568, + "step": 8120 + }, + { + "epoch": 0.5201331540874464, + "grad_norm": 53.75294876098633, + "learning_rate": 1.1043951126621634e-06, + "loss": 0.4599, + "num_input_tokens_seen": 25597760, + "step": 8125 + }, + { + "epoch": 0.5204532360284233, + "grad_norm": 29.208173751831055, + "learning_rate": 1.1032837848547445e-06, + "loss": 0.4081, + "num_input_tokens_seen": 25615424, + "step": 8130 + }, + { + "epoch": 0.5207733179694002, + "grad_norm": 33.89809799194336, + "learning_rate": 1.1021723280963074e-06, + "loss": 0.4117, + "num_input_tokens_seen": 25630720, + "step": 8135 + }, + { + "epoch": 0.5210933999103771, + "grad_norm": 44.60910415649414, + "learning_rate": 1.1010607437745194e-06, + "loss": 0.5029, + "num_input_tokens_seen": 25649280, + "step": 8140 + }, + { + "epoch": 0.5214134818513539, + "grad_norm": 49.8292236328125, + "learning_rate": 1.0999490332772057e-06, + "loss": 0.5131, + "num_input_tokens_seen": 25664576, + "step": 8145 + }, + { + "epoch": 0.5217335637923308, + "grad_norm": 27.133338928222656, + "learning_rate": 1.0988371979923507e-06, + "loss": 0.426, + "num_input_tokens_seen": 25680384, + "step": 8150 + }, + { + "epoch": 0.5220536457333077, + "grad_norm": 26.779043197631836, + "learning_rate": 1.097725239308094e-06, + "loss": 0.4235, + "num_input_tokens_seen": 25696128, + "step": 8155 + }, + { + "epoch": 0.5223737276742846, + "grad_norm": 15.962846755981445, + "learning_rate": 1.0966131586127278e-06, + "loss": 0.2819, + "num_input_tokens_seen": 25712768, + "step": 8160 + }, + { + "epoch": 0.5226938096152615, + "grad_norm": 26.21531105041504, + "learning_rate": 1.0955009572946992e-06, + "loss": 0.4086, + "num_input_tokens_seen": 25727616, + "step": 8165 + }, + { + "epoch": 0.5230138915562383, + "grad_norm": 23.129108428955078, + "learning_rate": 1.094388636742604e-06, + "loss": 0.4159, + "num_input_tokens_seen": 25744384, + "step": 8170 + }, + { + "epoch": 0.5233339734972153, + "grad_norm": 37.188663482666016, + "learning_rate": 1.0932761983451878e-06, + "loss": 0.3516, + "num_input_tokens_seen": 25760640, + "step": 8175 + }, + { + "epoch": 0.5236540554381922, + "grad_norm": 32.39508819580078, + "learning_rate": 1.0921636434913425e-06, + "loss": 0.3157, + "num_input_tokens_seen": 25776640, + "step": 8180 + }, + { + "epoch": 0.5239741373791691, + "grad_norm": 26.821107864379883, + "learning_rate": 1.091050973570106e-06, + "loss": 0.2979, + "num_input_tokens_seen": 25791744, + "step": 8185 + }, + { + "epoch": 0.524294219320146, + "grad_norm": 60.61221694946289, + "learning_rate": 1.08993818997066e-06, + "loss": 0.5589, + "num_input_tokens_seen": 25808256, + "step": 8190 + }, + { + "epoch": 0.5246143012611229, + "grad_norm": 35.13313674926758, + "learning_rate": 1.0888252940823283e-06, + "loss": 0.4481, + "num_input_tokens_seen": 25824128, + "step": 8195 + }, + { + "epoch": 0.5249343832020997, + "grad_norm": 40.965362548828125, + "learning_rate": 1.0877122872945737e-06, + "loss": 0.4767, + "num_input_tokens_seen": 25840576, + "step": 8200 + }, + { + "epoch": 0.5252544651430766, + "grad_norm": 35.6567497253418, + "learning_rate": 1.0865991709969983e-06, + "loss": 0.3206, + "num_input_tokens_seen": 25856256, + "step": 8205 + }, + { + "epoch": 0.5255745470840535, + "grad_norm": 21.82451820373535, + "learning_rate": 1.0854859465793416e-06, + "loss": 0.4424, + "num_input_tokens_seen": 25871424, + "step": 8210 + }, + { + "epoch": 0.5258946290250304, + "grad_norm": 32.83222961425781, + "learning_rate": 1.0843726154314767e-06, + "loss": 0.4916, + "num_input_tokens_seen": 25886272, + "step": 8215 + }, + { + "epoch": 0.5262147109660072, + "grad_norm": 31.14963722229004, + "learning_rate": 1.083259178943411e-06, + "loss": 0.4302, + "num_input_tokens_seen": 25901952, + "step": 8220 + }, + { + "epoch": 0.5265347929069842, + "grad_norm": 20.824260711669922, + "learning_rate": 1.0821456385052822e-06, + "loss": 0.3779, + "num_input_tokens_seen": 25917888, + "step": 8225 + }, + { + "epoch": 0.5268548748479611, + "grad_norm": 45.031227111816406, + "learning_rate": 1.0810319955073598e-06, + "loss": 0.4074, + "num_input_tokens_seen": 25933824, + "step": 8230 + }, + { + "epoch": 0.527174956788938, + "grad_norm": 38.65109634399414, + "learning_rate": 1.0799182513400393e-06, + "loss": 0.3842, + "num_input_tokens_seen": 25951360, + "step": 8235 + }, + { + "epoch": 0.5274950387299149, + "grad_norm": 42.26575469970703, + "learning_rate": 1.0788044073938438e-06, + "loss": 0.3524, + "num_input_tokens_seen": 25967232, + "step": 8240 + }, + { + "epoch": 0.5278151206708918, + "grad_norm": 38.96324157714844, + "learning_rate": 1.0776904650594205e-06, + "loss": 0.4361, + "num_input_tokens_seen": 25982592, + "step": 8245 + }, + { + "epoch": 0.5281352026118686, + "grad_norm": 59.213871002197266, + "learning_rate": 1.0765764257275394e-06, + "loss": 0.4055, + "num_input_tokens_seen": 25997824, + "step": 8250 + }, + { + "epoch": 0.5284552845528455, + "grad_norm": 35.566043853759766, + "learning_rate": 1.0754622907890914e-06, + "loss": 0.4559, + "num_input_tokens_seen": 26013632, + "step": 8255 + }, + { + "epoch": 0.5287753664938224, + "grad_norm": 28.799236297607422, + "learning_rate": 1.0743480616350873e-06, + "loss": 0.3412, + "num_input_tokens_seen": 26028800, + "step": 8260 + }, + { + "epoch": 0.5290954484347993, + "grad_norm": 31.209489822387695, + "learning_rate": 1.0732337396566558e-06, + "loss": 0.3488, + "num_input_tokens_seen": 26044672, + "step": 8265 + }, + { + "epoch": 0.5294155303757762, + "grad_norm": 22.62862777709961, + "learning_rate": 1.07211932624504e-06, + "loss": 0.3944, + "num_input_tokens_seen": 26060544, + "step": 8270 + }, + { + "epoch": 0.529735612316753, + "grad_norm": 17.837751388549805, + "learning_rate": 1.0710048227915988e-06, + "loss": 0.3714, + "num_input_tokens_seen": 26076160, + "step": 8275 + }, + { + "epoch": 0.53005569425773, + "grad_norm": 28.29045295715332, + "learning_rate": 1.0698902306878024e-06, + "loss": 0.4306, + "num_input_tokens_seen": 26092352, + "step": 8280 + }, + { + "epoch": 0.5303757761987069, + "grad_norm": 26.03973960876465, + "learning_rate": 1.0687755513252325e-06, + "loss": 0.3033, + "num_input_tokens_seen": 26107776, + "step": 8285 + }, + { + "epoch": 0.5306958581396838, + "grad_norm": 11.766892433166504, + "learning_rate": 1.0676607860955794e-06, + "loss": 0.3065, + "num_input_tokens_seen": 26123712, + "step": 8290 + }, + { + "epoch": 0.5310159400806607, + "grad_norm": 44.154823303222656, + "learning_rate": 1.0665459363906404e-06, + "loss": 0.3837, + "num_input_tokens_seen": 26139200, + "step": 8295 + }, + { + "epoch": 0.5313360220216375, + "grad_norm": 23.409717559814453, + "learning_rate": 1.0654310036023185e-06, + "loss": 0.4238, + "num_input_tokens_seen": 26153600, + "step": 8300 + }, + { + "epoch": 0.5316561039626144, + "grad_norm": 21.443828582763672, + "learning_rate": 1.0643159891226203e-06, + "loss": 0.4224, + "num_input_tokens_seen": 26169600, + "step": 8305 + }, + { + "epoch": 0.5319761859035913, + "grad_norm": 32.9205322265625, + "learning_rate": 1.0632008943436545e-06, + "loss": 0.3419, + "num_input_tokens_seen": 26185536, + "step": 8310 + }, + { + "epoch": 0.5322962678445682, + "grad_norm": 19.891855239868164, + "learning_rate": 1.0620857206576299e-06, + "loss": 0.4642, + "num_input_tokens_seen": 26201536, + "step": 8315 + }, + { + "epoch": 0.5326163497855451, + "grad_norm": 14.737898826599121, + "learning_rate": 1.0609704694568546e-06, + "loss": 0.2997, + "num_input_tokens_seen": 26216576, + "step": 8320 + }, + { + "epoch": 0.5329364317265219, + "grad_norm": 24.686262130737305, + "learning_rate": 1.0598551421337318e-06, + "loss": 0.2991, + "num_input_tokens_seen": 26232640, + "step": 8325 + }, + { + "epoch": 0.5332565136674989, + "grad_norm": 21.631492614746094, + "learning_rate": 1.0587397400807617e-06, + "loss": 0.539, + "num_input_tokens_seen": 26248448, + "step": 8330 + }, + { + "epoch": 0.5335765956084758, + "grad_norm": 35.107906341552734, + "learning_rate": 1.057624264690536e-06, + "loss": 0.5144, + "num_input_tokens_seen": 26263872, + "step": 8335 + }, + { + "epoch": 0.5338966775494527, + "grad_norm": 37.214752197265625, + "learning_rate": 1.0565087173557394e-06, + "loss": 0.4616, + "num_input_tokens_seen": 26279872, + "step": 8340 + }, + { + "epoch": 0.5342167594904296, + "grad_norm": 25.41779136657715, + "learning_rate": 1.055393099469146e-06, + "loss": 0.3469, + "num_input_tokens_seen": 26295680, + "step": 8345 + }, + { + "epoch": 0.5345368414314065, + "grad_norm": 37.366329193115234, + "learning_rate": 1.054277412423617e-06, + "loss": 0.4155, + "num_input_tokens_seen": 26311040, + "step": 8350 + }, + { + "epoch": 0.5348569233723833, + "grad_norm": 27.91765594482422, + "learning_rate": 1.0531616576121017e-06, + "loss": 0.47, + "num_input_tokens_seen": 26326144, + "step": 8355 + }, + { + "epoch": 0.5351770053133602, + "grad_norm": 24.44135856628418, + "learning_rate": 1.0520458364276325e-06, + "loss": 0.336, + "num_input_tokens_seen": 26341952, + "step": 8360 + }, + { + "epoch": 0.5354970872543371, + "grad_norm": 37.1027717590332, + "learning_rate": 1.0509299502633256e-06, + "loss": 0.3636, + "num_input_tokens_seen": 26356672, + "step": 8365 + }, + { + "epoch": 0.535817169195314, + "grad_norm": 23.37914276123047, + "learning_rate": 1.0498140005123777e-06, + "loss": 0.4452, + "num_input_tokens_seen": 26373056, + "step": 8370 + }, + { + "epoch": 0.5361372511362908, + "grad_norm": 13.970296859741211, + "learning_rate": 1.0486979885680653e-06, + "loss": 0.4254, + "num_input_tokens_seen": 26388032, + "step": 8375 + }, + { + "epoch": 0.5364573330772677, + "grad_norm": 60.650535583496094, + "learning_rate": 1.0475819158237424e-06, + "loss": 0.4324, + "num_input_tokens_seen": 26402880, + "step": 8380 + }, + { + "epoch": 0.5367774150182447, + "grad_norm": 20.941083908081055, + "learning_rate": 1.0464657836728389e-06, + "loss": 0.481, + "num_input_tokens_seen": 26419328, + "step": 8385 + }, + { + "epoch": 0.5370974969592216, + "grad_norm": 34.531517028808594, + "learning_rate": 1.045349593508859e-06, + "loss": 0.4169, + "num_input_tokens_seen": 26434112, + "step": 8390 + }, + { + "epoch": 0.5374175789001985, + "grad_norm": 22.334697723388672, + "learning_rate": 1.0442333467253788e-06, + "loss": 0.2911, + "num_input_tokens_seen": 26450688, + "step": 8395 + }, + { + "epoch": 0.5377376608411754, + "grad_norm": 37.3916130065918, + "learning_rate": 1.0431170447160463e-06, + "loss": 0.3651, + "num_input_tokens_seen": 26466368, + "step": 8400 + }, + { + "epoch": 0.5380577427821522, + "grad_norm": 22.045780181884766, + "learning_rate": 1.0420006888745767e-06, + "loss": 0.3663, + "num_input_tokens_seen": 26482624, + "step": 8405 + }, + { + "epoch": 0.5383778247231291, + "grad_norm": 23.31511688232422, + "learning_rate": 1.0408842805947543e-06, + "loss": 0.3745, + "num_input_tokens_seen": 26499200, + "step": 8410 + }, + { + "epoch": 0.538697906664106, + "grad_norm": 35.43497848510742, + "learning_rate": 1.0397678212704276e-06, + "loss": 0.5144, + "num_input_tokens_seen": 26514048, + "step": 8415 + }, + { + "epoch": 0.5390179886050829, + "grad_norm": 33.43364715576172, + "learning_rate": 1.038651312295509e-06, + "loss": 0.4061, + "num_input_tokens_seen": 26529216, + "step": 8420 + }, + { + "epoch": 0.5393380705460598, + "grad_norm": 26.025461196899414, + "learning_rate": 1.037534755063973e-06, + "loss": 0.4173, + "num_input_tokens_seen": 26545152, + "step": 8425 + }, + { + "epoch": 0.5396581524870366, + "grad_norm": 46.11745071411133, + "learning_rate": 1.0364181509698548e-06, + "loss": 0.4124, + "num_input_tokens_seen": 26560512, + "step": 8430 + }, + { + "epoch": 0.5399782344280136, + "grad_norm": 32.11628723144531, + "learning_rate": 1.0353015014072476e-06, + "loss": 0.3606, + "num_input_tokens_seen": 26575488, + "step": 8435 + }, + { + "epoch": 0.5402983163689905, + "grad_norm": 50.48931884765625, + "learning_rate": 1.0341848077703013e-06, + "loss": 0.4008, + "num_input_tokens_seen": 26591040, + "step": 8440 + }, + { + "epoch": 0.5406183983099674, + "grad_norm": 26.283470153808594, + "learning_rate": 1.033068071453221e-06, + "loss": 0.3228, + "num_input_tokens_seen": 26606976, + "step": 8445 + }, + { + "epoch": 0.5409384802509443, + "grad_norm": 35.38628005981445, + "learning_rate": 1.0319512938502653e-06, + "loss": 0.372, + "num_input_tokens_seen": 26623296, + "step": 8450 + }, + { + "epoch": 0.5412585621919211, + "grad_norm": 33.70118713378906, + "learning_rate": 1.0308344763557444e-06, + "loss": 0.3241, + "num_input_tokens_seen": 26638336, + "step": 8455 + }, + { + "epoch": 0.541578644132898, + "grad_norm": 18.15200424194336, + "learning_rate": 1.0297176203640175e-06, + "loss": 0.2886, + "num_input_tokens_seen": 26654400, + "step": 8460 + }, + { + "epoch": 0.5418987260738749, + "grad_norm": 54.90450668334961, + "learning_rate": 1.0286007272694924e-06, + "loss": 0.3553, + "num_input_tokens_seen": 26669568, + "step": 8465 + }, + { + "epoch": 0.5422188080148518, + "grad_norm": 28.259803771972656, + "learning_rate": 1.0274837984666239e-06, + "loss": 0.4816, + "num_input_tokens_seen": 26686016, + "step": 8470 + }, + { + "epoch": 0.5425388899558287, + "grad_norm": 31.482337951660156, + "learning_rate": 1.02636683534991e-06, + "loss": 0.4212, + "num_input_tokens_seen": 26701504, + "step": 8475 + }, + { + "epoch": 0.5428589718968055, + "grad_norm": 61.22187042236328, + "learning_rate": 1.0252498393138928e-06, + "loss": 0.5995, + "num_input_tokens_seen": 26717120, + "step": 8480 + }, + { + "epoch": 0.5431790538377824, + "grad_norm": 66.0864486694336, + "learning_rate": 1.0241328117531546e-06, + "loss": 0.415, + "num_input_tokens_seen": 26732736, + "step": 8485 + }, + { + "epoch": 0.5434991357787594, + "grad_norm": 33.275177001953125, + "learning_rate": 1.0230157540623174e-06, + "loss": 0.4128, + "num_input_tokens_seen": 26747392, + "step": 8490 + }, + { + "epoch": 0.5438192177197363, + "grad_norm": 24.452760696411133, + "learning_rate": 1.0218986676360415e-06, + "loss": 0.4605, + "num_input_tokens_seen": 26762112, + "step": 8495 + }, + { + "epoch": 0.5441392996607132, + "grad_norm": 22.125686645507812, + "learning_rate": 1.0207815538690216e-06, + "loss": 0.3673, + "num_input_tokens_seen": 26777856, + "step": 8500 + }, + { + "epoch": 0.54445938160169, + "grad_norm": 49.08604049682617, + "learning_rate": 1.0196644141559877e-06, + "loss": 0.3133, + "num_input_tokens_seen": 26794048, + "step": 8505 + }, + { + "epoch": 0.5447794635426669, + "grad_norm": 40.573177337646484, + "learning_rate": 1.0185472498917021e-06, + "loss": 0.3397, + "num_input_tokens_seen": 26809792, + "step": 8510 + }, + { + "epoch": 0.5450995454836438, + "grad_norm": 53.308963775634766, + "learning_rate": 1.017430062470957e-06, + "loss": 0.4261, + "num_input_tokens_seen": 26825024, + "step": 8515 + }, + { + "epoch": 0.5454196274246207, + "grad_norm": 29.25503921508789, + "learning_rate": 1.016312853288574e-06, + "loss": 0.3494, + "num_input_tokens_seen": 26841536, + "step": 8520 + }, + { + "epoch": 0.5457397093655976, + "grad_norm": 23.03032684326172, + "learning_rate": 1.0151956237394027e-06, + "loss": 0.3875, + "num_input_tokens_seen": 26857600, + "step": 8525 + }, + { + "epoch": 0.5460597913065744, + "grad_norm": 30.113536834716797, + "learning_rate": 1.0140783752183164e-06, + "loss": 0.3999, + "num_input_tokens_seen": 26874176, + "step": 8530 + }, + { + "epoch": 0.5463798732475513, + "grad_norm": 29.10158348083496, + "learning_rate": 1.0129611091202138e-06, + "loss": 0.4338, + "num_input_tokens_seen": 26890176, + "step": 8535 + }, + { + "epoch": 0.5466999551885282, + "grad_norm": 29.037277221679688, + "learning_rate": 1.0118438268400135e-06, + "loss": 0.2926, + "num_input_tokens_seen": 26905728, + "step": 8540 + }, + { + "epoch": 0.5470200371295052, + "grad_norm": 48.003170013427734, + "learning_rate": 1.0107265297726568e-06, + "loss": 0.4599, + "num_input_tokens_seen": 26921280, + "step": 8545 + }, + { + "epoch": 0.5473401190704821, + "grad_norm": 32.988037109375, + "learning_rate": 1.009609219313102e-06, + "loss": 0.4048, + "num_input_tokens_seen": 26936704, + "step": 8550 + }, + { + "epoch": 0.547660201011459, + "grad_norm": 18.555313110351562, + "learning_rate": 1.0084918968563236e-06, + "loss": 0.3919, + "num_input_tokens_seen": 26952448, + "step": 8555 + }, + { + "epoch": 0.5479802829524358, + "grad_norm": 31.703615188598633, + "learning_rate": 1.0073745637973124e-06, + "loss": 0.3917, + "num_input_tokens_seen": 26967680, + "step": 8560 + }, + { + "epoch": 0.5483003648934127, + "grad_norm": 17.730825424194336, + "learning_rate": 1.0062572215310718e-06, + "loss": 0.3606, + "num_input_tokens_seen": 26982400, + "step": 8565 + }, + { + "epoch": 0.5486204468343896, + "grad_norm": 45.23028564453125, + "learning_rate": 1.0051398714526165e-06, + "loss": 0.3227, + "num_input_tokens_seen": 26998400, + "step": 8570 + }, + { + "epoch": 0.5489405287753665, + "grad_norm": 54.13836669921875, + "learning_rate": 1.0040225149569712e-06, + "loss": 0.3731, + "num_input_tokens_seen": 27015936, + "step": 8575 + }, + { + "epoch": 0.5492606107163434, + "grad_norm": 40.3460693359375, + "learning_rate": 1.0029051534391693e-06, + "loss": 0.3339, + "num_input_tokens_seen": 27030528, + "step": 8580 + }, + { + "epoch": 0.5495806926573202, + "grad_norm": 20.486562728881836, + "learning_rate": 1.001787788294249e-06, + "loss": 0.3793, + "num_input_tokens_seen": 27046080, + "step": 8585 + }, + { + "epoch": 0.5499007745982971, + "grad_norm": 27.3046932220459, + "learning_rate": 1.0006704209172537e-06, + "loss": 0.4226, + "num_input_tokens_seen": 27061504, + "step": 8590 + }, + { + "epoch": 0.5502208565392741, + "grad_norm": 53.627471923828125, + "learning_rate": 9.995530527032301e-07, + "loss": 0.4382, + "num_input_tokens_seen": 27077056, + "step": 8595 + }, + { + "epoch": 0.550540938480251, + "grad_norm": 27.333585739135742, + "learning_rate": 9.984356850472257e-07, + "loss": 0.3435, + "num_input_tokens_seen": 27095168, + "step": 8600 + }, + { + "epoch": 0.5506689712566417, + "eval_loss": 0.40140706300735474, + "eval_runtime": 50.6758, + "eval_samples_per_second": 274.017, + "eval_steps_per_second": 34.257, + "num_input_tokens_seen": 27101056, + "step": 8602 + }, + { + "epoch": 0.5508610204212279, + "grad_norm": 15.570174217224121, + "learning_rate": 9.97318319344287e-07, + "loss": 0.3753, + "num_input_tokens_seen": 27110144, + "step": 8605 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 27.186506271362305, + "learning_rate": 9.962009569894577e-07, + "loss": 0.5273, + "num_input_tokens_seen": 27124864, + "step": 8610 + }, + { + "epoch": 0.5515011843031816, + "grad_norm": 32.424312591552734, + "learning_rate": 9.95083599377778e-07, + "loss": 0.3813, + "num_input_tokens_seen": 27140160, + "step": 8615 + }, + { + "epoch": 0.5518212662441585, + "grad_norm": 29.457183837890625, + "learning_rate": 9.939662479042828e-07, + "loss": 0.3966, + "num_input_tokens_seen": 27155712, + "step": 8620 + }, + { + "epoch": 0.5521413481851354, + "grad_norm": 62.78722381591797, + "learning_rate": 9.92848903963998e-07, + "loss": 0.4682, + "num_input_tokens_seen": 27171520, + "step": 8625 + }, + { + "epoch": 0.5524614301261123, + "grad_norm": 38.034725189208984, + "learning_rate": 9.9173156895194e-07, + "loss": 0.441, + "num_input_tokens_seen": 27186752, + "step": 8630 + }, + { + "epoch": 0.5527815120670891, + "grad_norm": 42.317752838134766, + "learning_rate": 9.906142442631154e-07, + "loss": 0.3889, + "num_input_tokens_seen": 27201664, + "step": 8635 + }, + { + "epoch": 0.553101594008066, + "grad_norm": 18.538700103759766, + "learning_rate": 9.894969312925171e-07, + "loss": 0.3914, + "num_input_tokens_seen": 27218880, + "step": 8640 + }, + { + "epoch": 0.5534216759490429, + "grad_norm": 47.84024429321289, + "learning_rate": 9.883796314351234e-07, + "loss": 0.3477, + "num_input_tokens_seen": 27235648, + "step": 8645 + }, + { + "epoch": 0.5537417578900199, + "grad_norm": 15.358366012573242, + "learning_rate": 9.872623460858966e-07, + "loss": 0.3945, + "num_input_tokens_seen": 27250880, + "step": 8650 + }, + { + "epoch": 0.5540618398309968, + "grad_norm": 13.712646484375, + "learning_rate": 9.861450766397799e-07, + "loss": 0.3152, + "num_input_tokens_seen": 27266880, + "step": 8655 + }, + { + "epoch": 0.5543819217719737, + "grad_norm": 25.727859497070312, + "learning_rate": 9.850278244916976e-07, + "loss": 0.4157, + "num_input_tokens_seen": 27282816, + "step": 8660 + }, + { + "epoch": 0.5547020037129505, + "grad_norm": 19.773151397705078, + "learning_rate": 9.839105910365524e-07, + "loss": 0.4323, + "num_input_tokens_seen": 27298496, + "step": 8665 + }, + { + "epoch": 0.5550220856539274, + "grad_norm": 41.33201217651367, + "learning_rate": 9.827933776692235e-07, + "loss": 0.3436, + "num_input_tokens_seen": 27313856, + "step": 8670 + }, + { + "epoch": 0.5553421675949043, + "grad_norm": 30.78877067565918, + "learning_rate": 9.81676185784564e-07, + "loss": 0.3362, + "num_input_tokens_seen": 27328448, + "step": 8675 + }, + { + "epoch": 0.5556622495358812, + "grad_norm": 17.378814697265625, + "learning_rate": 9.805590167774021e-07, + "loss": 0.4001, + "num_input_tokens_seen": 27343872, + "step": 8680 + }, + { + "epoch": 0.555982331476858, + "grad_norm": 80.3104476928711, + "learning_rate": 9.79441872042536e-07, + "loss": 0.5593, + "num_input_tokens_seen": 27358720, + "step": 8685 + }, + { + "epoch": 0.5563024134178349, + "grad_norm": 31.166763305664062, + "learning_rate": 9.783247529747338e-07, + "loss": 0.3818, + "num_input_tokens_seen": 27373312, + "step": 8690 + }, + { + "epoch": 0.5566224953588118, + "grad_norm": 24.34861946105957, + "learning_rate": 9.772076609687323e-07, + "loss": 0.358, + "num_input_tokens_seen": 27388544, + "step": 8695 + }, + { + "epoch": 0.5569425772997888, + "grad_norm": 37.79168701171875, + "learning_rate": 9.760905974192334e-07, + "loss": 0.3191, + "num_input_tokens_seen": 27405120, + "step": 8700 + }, + { + "epoch": 0.5572626592407657, + "grad_norm": 25.076248168945312, + "learning_rate": 9.749735637209044e-07, + "loss": 0.4284, + "num_input_tokens_seen": 27420544, + "step": 8705 + }, + { + "epoch": 0.5575827411817426, + "grad_norm": 18.509187698364258, + "learning_rate": 9.738565612683754e-07, + "loss": 0.3233, + "num_input_tokens_seen": 27435456, + "step": 8710 + }, + { + "epoch": 0.5579028231227194, + "grad_norm": 29.372270584106445, + "learning_rate": 9.727395914562363e-07, + "loss": 0.3406, + "num_input_tokens_seen": 27452032, + "step": 8715 + }, + { + "epoch": 0.5582229050636963, + "grad_norm": 26.19953155517578, + "learning_rate": 9.716226556790372e-07, + "loss": 0.4084, + "num_input_tokens_seen": 27467520, + "step": 8720 + }, + { + "epoch": 0.5585429870046732, + "grad_norm": 29.907907485961914, + "learning_rate": 9.705057553312855e-07, + "loss": 0.3149, + "num_input_tokens_seen": 27482816, + "step": 8725 + }, + { + "epoch": 0.5588630689456501, + "grad_norm": 26.37492561340332, + "learning_rate": 9.693888918074452e-07, + "loss": 0.3853, + "num_input_tokens_seen": 27497600, + "step": 8730 + }, + { + "epoch": 0.559183150886627, + "grad_norm": 41.72505187988281, + "learning_rate": 9.682720665019325e-07, + "loss": 0.4952, + "num_input_tokens_seen": 27513344, + "step": 8735 + }, + { + "epoch": 0.5595032328276038, + "grad_norm": 21.078269958496094, + "learning_rate": 9.671552808091172e-07, + "loss": 0.4144, + "num_input_tokens_seen": 27530304, + "step": 8740 + }, + { + "epoch": 0.5598233147685807, + "grad_norm": 24.30893898010254, + "learning_rate": 9.660385361233195e-07, + "loss": 0.3504, + "num_input_tokens_seen": 27545664, + "step": 8745 + }, + { + "epoch": 0.5601433967095576, + "grad_norm": 21.70425796508789, + "learning_rate": 9.649218338388084e-07, + "loss": 0.3053, + "num_input_tokens_seen": 27560704, + "step": 8750 + }, + { + "epoch": 0.5604634786505346, + "grad_norm": 23.361452102661133, + "learning_rate": 9.638051753497994e-07, + "loss": 0.4472, + "num_input_tokens_seen": 27577472, + "step": 8755 + }, + { + "epoch": 0.5607835605915115, + "grad_norm": 21.00771713256836, + "learning_rate": 9.62688562050454e-07, + "loss": 0.3676, + "num_input_tokens_seen": 27592960, + "step": 8760 + }, + { + "epoch": 0.5611036425324883, + "grad_norm": 16.86574363708496, + "learning_rate": 9.615719953348772e-07, + "loss": 0.4074, + "num_input_tokens_seen": 27610304, + "step": 8765 + }, + { + "epoch": 0.5614237244734652, + "grad_norm": 36.6751594543457, + "learning_rate": 9.604554765971148e-07, + "loss": 0.568, + "num_input_tokens_seen": 27628288, + "step": 8770 + }, + { + "epoch": 0.5617438064144421, + "grad_norm": 21.949472427368164, + "learning_rate": 9.593390072311549e-07, + "loss": 0.4119, + "num_input_tokens_seen": 27643904, + "step": 8775 + }, + { + "epoch": 0.562063888355419, + "grad_norm": 21.52281951904297, + "learning_rate": 9.582225886309216e-07, + "loss": 0.3703, + "num_input_tokens_seen": 27660224, + "step": 8780 + }, + { + "epoch": 0.5623839702963959, + "grad_norm": 17.920726776123047, + "learning_rate": 9.571062221902767e-07, + "loss": 0.3098, + "num_input_tokens_seen": 27675136, + "step": 8785 + }, + { + "epoch": 0.5627040522373727, + "grad_norm": 56.251609802246094, + "learning_rate": 9.559899093030175e-07, + "loss": 0.3557, + "num_input_tokens_seen": 27690176, + "step": 8790 + }, + { + "epoch": 0.5630241341783496, + "grad_norm": 34.147911071777344, + "learning_rate": 9.54873651362873e-07, + "loss": 0.3065, + "num_input_tokens_seen": 27704512, + "step": 8795 + }, + { + "epoch": 0.5633442161193265, + "grad_norm": 47.091190338134766, + "learning_rate": 9.537574497635043e-07, + "loss": 0.4565, + "num_input_tokens_seen": 27720448, + "step": 8800 + }, + { + "epoch": 0.5636642980603035, + "grad_norm": 30.534454345703125, + "learning_rate": 9.52641305898503e-07, + "loss": 0.5121, + "num_input_tokens_seen": 27735808, + "step": 8805 + }, + { + "epoch": 0.5639843800012804, + "grad_norm": 26.536653518676758, + "learning_rate": 9.515252211613873e-07, + "loss": 0.3203, + "num_input_tokens_seen": 27750464, + "step": 8810 + }, + { + "epoch": 0.5643044619422573, + "grad_norm": 37.218082427978516, + "learning_rate": 9.504091969456021e-07, + "loss": 0.4539, + "num_input_tokens_seen": 27764352, + "step": 8815 + }, + { + "epoch": 0.5646245438832341, + "grad_norm": 21.591670989990234, + "learning_rate": 9.492932346445165e-07, + "loss": 0.3435, + "num_input_tokens_seen": 27779840, + "step": 8820 + }, + { + "epoch": 0.564944625824211, + "grad_norm": 23.125856399536133, + "learning_rate": 9.48177335651423e-07, + "loss": 0.2767, + "num_input_tokens_seen": 27796352, + "step": 8825 + }, + { + "epoch": 0.5652647077651879, + "grad_norm": 34.4669075012207, + "learning_rate": 9.470615013595346e-07, + "loss": 0.343, + "num_input_tokens_seen": 27810624, + "step": 8830 + }, + { + "epoch": 0.5655847897061648, + "grad_norm": 37.58964538574219, + "learning_rate": 9.459457331619829e-07, + "loss": 0.4395, + "num_input_tokens_seen": 27825152, + "step": 8835 + }, + { + "epoch": 0.5659048716471416, + "grad_norm": 32.50901794433594, + "learning_rate": 9.448300324518182e-07, + "loss": 0.4142, + "num_input_tokens_seen": 27840384, + "step": 8840 + }, + { + "epoch": 0.5662249535881185, + "grad_norm": 36.96337127685547, + "learning_rate": 9.437144006220058e-07, + "loss": 0.3014, + "num_input_tokens_seen": 27856640, + "step": 8845 + }, + { + "epoch": 0.5665450355290954, + "grad_norm": 9.99196720123291, + "learning_rate": 9.425988390654249e-07, + "loss": 0.2097, + "num_input_tokens_seen": 27872768, + "step": 8850 + }, + { + "epoch": 0.5668651174700723, + "grad_norm": 39.93415069580078, + "learning_rate": 9.414833491748677e-07, + "loss": 0.5239, + "num_input_tokens_seen": 27887488, + "step": 8855 + }, + { + "epoch": 0.5671851994110493, + "grad_norm": 42.46604919433594, + "learning_rate": 9.40367932343036e-07, + "loss": 0.2943, + "num_input_tokens_seen": 27902720, + "step": 8860 + }, + { + "epoch": 0.5675052813520262, + "grad_norm": 32.06291961669922, + "learning_rate": 9.392525899625407e-07, + "loss": 0.3817, + "num_input_tokens_seen": 27918080, + "step": 8865 + }, + { + "epoch": 0.567825363293003, + "grad_norm": 50.3513298034668, + "learning_rate": 9.381373234259004e-07, + "loss": 0.3887, + "num_input_tokens_seen": 27933760, + "step": 8870 + }, + { + "epoch": 0.5681454452339799, + "grad_norm": 45.42866134643555, + "learning_rate": 9.370221341255382e-07, + "loss": 0.3858, + "num_input_tokens_seen": 27948992, + "step": 8875 + }, + { + "epoch": 0.5684655271749568, + "grad_norm": 33.09145736694336, + "learning_rate": 9.359070234537807e-07, + "loss": 0.3428, + "num_input_tokens_seen": 27966848, + "step": 8880 + }, + { + "epoch": 0.5687856091159337, + "grad_norm": 28.965185165405273, + "learning_rate": 9.34791992802857e-07, + "loss": 0.3816, + "num_input_tokens_seen": 27981696, + "step": 8885 + }, + { + "epoch": 0.5691056910569106, + "grad_norm": 28.548234939575195, + "learning_rate": 9.336770435648963e-07, + "loss": 0.2578, + "num_input_tokens_seen": 27997376, + "step": 8890 + }, + { + "epoch": 0.5694257729978874, + "grad_norm": 27.79604148864746, + "learning_rate": 9.325621771319246e-07, + "loss": 0.4013, + "num_input_tokens_seen": 28014016, + "step": 8895 + }, + { + "epoch": 0.5697458549388643, + "grad_norm": 21.713794708251953, + "learning_rate": 9.314473948958673e-07, + "loss": 0.4245, + "num_input_tokens_seen": 28030400, + "step": 8900 + }, + { + "epoch": 0.5700659368798412, + "grad_norm": 28.179527282714844, + "learning_rate": 9.303326982485422e-07, + "loss": 0.3464, + "num_input_tokens_seen": 28047104, + "step": 8905 + }, + { + "epoch": 0.5703860188208181, + "grad_norm": 53.25274658203125, + "learning_rate": 9.29218088581661e-07, + "loss": 0.3751, + "num_input_tokens_seen": 28063168, + "step": 8910 + }, + { + "epoch": 0.5707061007617951, + "grad_norm": 29.214618682861328, + "learning_rate": 9.281035672868278e-07, + "loss": 0.3567, + "num_input_tokens_seen": 28079104, + "step": 8915 + }, + { + "epoch": 0.571026182702772, + "grad_norm": 28.718603134155273, + "learning_rate": 9.269891357555348e-07, + "loss": 0.4098, + "num_input_tokens_seen": 28094720, + "step": 8920 + }, + { + "epoch": 0.5713462646437488, + "grad_norm": 38.927711486816406, + "learning_rate": 9.25874795379163e-07, + "loss": 0.2775, + "num_input_tokens_seen": 28110848, + "step": 8925 + }, + { + "epoch": 0.5716663465847257, + "grad_norm": 23.283519744873047, + "learning_rate": 9.247605475489793e-07, + "loss": 0.4246, + "num_input_tokens_seen": 28127040, + "step": 8930 + }, + { + "epoch": 0.5719864285257026, + "grad_norm": 33.84523010253906, + "learning_rate": 9.236463936561358e-07, + "loss": 0.3106, + "num_input_tokens_seen": 28143424, + "step": 8935 + }, + { + "epoch": 0.5723065104666795, + "grad_norm": 48.51530456542969, + "learning_rate": 9.225323350916661e-07, + "loss": 0.5312, + "num_input_tokens_seen": 28158528, + "step": 8940 + }, + { + "epoch": 0.5726265924076563, + "grad_norm": 38.23236846923828, + "learning_rate": 9.214183732464855e-07, + "loss": 0.3963, + "num_input_tokens_seen": 28173888, + "step": 8945 + }, + { + "epoch": 0.5729466743486332, + "grad_norm": 21.70241928100586, + "learning_rate": 9.203045095113886e-07, + "loss": 0.3663, + "num_input_tokens_seen": 28191872, + "step": 8950 + }, + { + "epoch": 0.5732667562896101, + "grad_norm": 45.41887283325195, + "learning_rate": 9.191907452770476e-07, + "loss": 0.4394, + "num_input_tokens_seen": 28206912, + "step": 8955 + }, + { + "epoch": 0.573586838230587, + "grad_norm": 30.060129165649414, + "learning_rate": 9.180770819340095e-07, + "loss": 0.4103, + "num_input_tokens_seen": 28222336, + "step": 8960 + }, + { + "epoch": 0.573906920171564, + "grad_norm": 13.660242080688477, + "learning_rate": 9.169635208726967e-07, + "loss": 0.3816, + "num_input_tokens_seen": 28238144, + "step": 8965 + }, + { + "epoch": 0.5742270021125409, + "grad_norm": 61.72530746459961, + "learning_rate": 9.15850063483403e-07, + "loss": 0.3919, + "num_input_tokens_seen": 28253376, + "step": 8970 + }, + { + "epoch": 0.5745470840535177, + "grad_norm": 27.641267776489258, + "learning_rate": 9.147367111562928e-07, + "loss": 0.3549, + "num_input_tokens_seen": 28269248, + "step": 8975 + }, + { + "epoch": 0.5748671659944946, + "grad_norm": 41.6641845703125, + "learning_rate": 9.136234652814005e-07, + "loss": 0.4151, + "num_input_tokens_seen": 28285440, + "step": 8980 + }, + { + "epoch": 0.5751872479354715, + "grad_norm": 31.049930572509766, + "learning_rate": 9.125103272486255e-07, + "loss": 0.3061, + "num_input_tokens_seen": 28300736, + "step": 8985 + }, + { + "epoch": 0.5755073298764484, + "grad_norm": 31.115554809570312, + "learning_rate": 9.11397298447734e-07, + "loss": 0.3626, + "num_input_tokens_seen": 28315712, + "step": 8990 + }, + { + "epoch": 0.5758274118174252, + "grad_norm": 28.032060623168945, + "learning_rate": 9.10284380268356e-07, + "loss": 0.342, + "num_input_tokens_seen": 28332032, + "step": 8995 + }, + { + "epoch": 0.5761474937584021, + "grad_norm": 27.94725227355957, + "learning_rate": 9.091715740999828e-07, + "loss": 0.4546, + "num_input_tokens_seen": 28347968, + "step": 9000 + }, + { + "epoch": 0.576467575699379, + "grad_norm": 26.713380813598633, + "learning_rate": 9.080588813319654e-07, + "loss": 0.39, + "num_input_tokens_seen": 28362944, + "step": 9005 + }, + { + "epoch": 0.5767876576403559, + "grad_norm": 41.13479995727539, + "learning_rate": 9.069463033535143e-07, + "loss": 0.2894, + "num_input_tokens_seen": 28378624, + "step": 9010 + }, + { + "epoch": 0.5771077395813328, + "grad_norm": 62.73693084716797, + "learning_rate": 9.058338415536962e-07, + "loss": 0.3832, + "num_input_tokens_seen": 28394048, + "step": 9015 + }, + { + "epoch": 0.5774278215223098, + "grad_norm": 40.832916259765625, + "learning_rate": 9.04721497321432e-07, + "loss": 0.3796, + "num_input_tokens_seen": 28409664, + "step": 9020 + }, + { + "epoch": 0.5777479034632866, + "grad_norm": 33.98543930053711, + "learning_rate": 9.036092720454977e-07, + "loss": 0.3794, + "num_input_tokens_seen": 28424768, + "step": 9025 + }, + { + "epoch": 0.5780679854042635, + "grad_norm": 32.285762786865234, + "learning_rate": 9.024971671145189e-07, + "loss": 0.3439, + "num_input_tokens_seen": 28439424, + "step": 9030 + }, + { + "epoch": 0.5783880673452404, + "grad_norm": 46.4213752746582, + "learning_rate": 9.013851839169718e-07, + "loss": 0.443, + "num_input_tokens_seen": 28456064, + "step": 9035 + }, + { + "epoch": 0.5787081492862173, + "grad_norm": 41.62101745605469, + "learning_rate": 9.002733238411801e-07, + "loss": 0.3457, + "num_input_tokens_seen": 28472768, + "step": 9040 + }, + { + "epoch": 0.5790282312271942, + "grad_norm": 31.970539093017578, + "learning_rate": 8.991615882753147e-07, + "loss": 0.3528, + "num_input_tokens_seen": 28488704, + "step": 9045 + }, + { + "epoch": 0.579348313168171, + "grad_norm": 60.6664924621582, + "learning_rate": 8.980499786073904e-07, + "loss": 0.4516, + "num_input_tokens_seen": 28503808, + "step": 9050 + }, + { + "epoch": 0.5796683951091479, + "grad_norm": 67.51182556152344, + "learning_rate": 8.969384962252645e-07, + "loss": 0.4616, + "num_input_tokens_seen": 28520320, + "step": 9055 + }, + { + "epoch": 0.5799884770501248, + "grad_norm": 48.923702239990234, + "learning_rate": 8.958271425166366e-07, + "loss": 0.4395, + "num_input_tokens_seen": 28535680, + "step": 9060 + }, + { + "epoch": 0.5803085589911017, + "grad_norm": 24.09952735900879, + "learning_rate": 8.947159188690442e-07, + "loss": 0.3943, + "num_input_tokens_seen": 28551488, + "step": 9065 + }, + { + "epoch": 0.5806286409320787, + "grad_norm": 63.0959587097168, + "learning_rate": 8.93604826669863e-07, + "loss": 0.4633, + "num_input_tokens_seen": 28567040, + "step": 9070 + }, + { + "epoch": 0.5809487228730555, + "grad_norm": 25.30870246887207, + "learning_rate": 8.924938673063052e-07, + "loss": 0.389, + "num_input_tokens_seen": 28581568, + "step": 9075 + }, + { + "epoch": 0.5812688048140324, + "grad_norm": 15.70492172241211, + "learning_rate": 8.913830421654166e-07, + "loss": 0.3616, + "num_input_tokens_seen": 28596992, + "step": 9080 + }, + { + "epoch": 0.5815888867550093, + "grad_norm": 25.204086303710938, + "learning_rate": 8.902723526340746e-07, + "loss": 0.4752, + "num_input_tokens_seen": 28613952, + "step": 9085 + }, + { + "epoch": 0.5819089686959862, + "grad_norm": 33.69202423095703, + "learning_rate": 8.89161800098989e-07, + "loss": 0.4343, + "num_input_tokens_seen": 28628736, + "step": 9090 + }, + { + "epoch": 0.5822290506369631, + "grad_norm": 56.151214599609375, + "learning_rate": 8.880513859466974e-07, + "loss": 0.3683, + "num_input_tokens_seen": 28644928, + "step": 9095 + }, + { + "epoch": 0.5825491325779399, + "grad_norm": 20.137807846069336, + "learning_rate": 8.869411115635645e-07, + "loss": 0.2861, + "num_input_tokens_seen": 28661184, + "step": 9100 + }, + { + "epoch": 0.5828692145189168, + "grad_norm": 19.313888549804688, + "learning_rate": 8.858309783357816e-07, + "loss": 0.2823, + "num_input_tokens_seen": 28675776, + "step": 9105 + }, + { + "epoch": 0.5831892964598937, + "grad_norm": 55.6663703918457, + "learning_rate": 8.847209876493629e-07, + "loss": 0.4335, + "num_input_tokens_seen": 28692160, + "step": 9110 + }, + { + "epoch": 0.5835093784008706, + "grad_norm": 29.397314071655273, + "learning_rate": 8.836111408901441e-07, + "loss": 0.2627, + "num_input_tokens_seen": 28707328, + "step": 9115 + }, + { + "epoch": 0.5838294603418475, + "grad_norm": 45.07856369018555, + "learning_rate": 8.825014394437828e-07, + "loss": 0.4159, + "num_input_tokens_seen": 28722624, + "step": 9120 + }, + { + "epoch": 0.5841495422828245, + "grad_norm": 19.385255813598633, + "learning_rate": 8.813918846957542e-07, + "loss": 0.4013, + "num_input_tokens_seen": 28737856, + "step": 9125 + }, + { + "epoch": 0.5844696242238013, + "grad_norm": 20.24775505065918, + "learning_rate": 8.802824780313499e-07, + "loss": 0.4447, + "num_input_tokens_seen": 28752448, + "step": 9130 + }, + { + "epoch": 0.5847897061647782, + "grad_norm": 24.05107307434082, + "learning_rate": 8.791732208356771e-07, + "loss": 0.3924, + "num_input_tokens_seen": 28767616, + "step": 9135 + }, + { + "epoch": 0.5851097881057551, + "grad_norm": 16.49118995666504, + "learning_rate": 8.780641144936573e-07, + "loss": 0.4676, + "num_input_tokens_seen": 28782400, + "step": 9140 + }, + { + "epoch": 0.585429870046732, + "grad_norm": 51.63336944580078, + "learning_rate": 8.76955160390022e-07, + "loss": 0.446, + "num_input_tokens_seen": 28798336, + "step": 9145 + }, + { + "epoch": 0.5857499519877089, + "grad_norm": 16.7198543548584, + "learning_rate": 8.758463599093136e-07, + "loss": 0.2893, + "num_input_tokens_seen": 28814336, + "step": 9150 + }, + { + "epoch": 0.5860700339286857, + "grad_norm": 42.81842041015625, + "learning_rate": 8.747377144358825e-07, + "loss": 0.5245, + "num_input_tokens_seen": 28830656, + "step": 9155 + }, + { + "epoch": 0.5863901158696626, + "grad_norm": 45.69813919067383, + "learning_rate": 8.736292253538861e-07, + "loss": 0.4169, + "num_input_tokens_seen": 28846656, + "step": 9160 + }, + { + "epoch": 0.5867101978106395, + "grad_norm": 36.1093635559082, + "learning_rate": 8.725208940472851e-07, + "loss": 0.3115, + "num_input_tokens_seen": 28862848, + "step": 9165 + }, + { + "epoch": 0.5870302797516164, + "grad_norm": 15.554344177246094, + "learning_rate": 8.714127218998448e-07, + "loss": 0.4071, + "num_input_tokens_seen": 28878400, + "step": 9170 + }, + { + "epoch": 0.5873503616925934, + "grad_norm": 65.59827423095703, + "learning_rate": 8.70304710295131e-07, + "loss": 0.5141, + "num_input_tokens_seen": 28893568, + "step": 9175 + }, + { + "epoch": 0.5876704436335702, + "grad_norm": 33.058006286621094, + "learning_rate": 8.691968606165092e-07, + "loss": 0.3766, + "num_input_tokens_seen": 28909824, + "step": 9180 + }, + { + "epoch": 0.5879905255745471, + "grad_norm": 31.04238510131836, + "learning_rate": 8.680891742471429e-07, + "loss": 0.3189, + "num_input_tokens_seen": 28925568, + "step": 9185 + }, + { + "epoch": 0.588310607515524, + "grad_norm": 28.480064392089844, + "learning_rate": 8.669816525699912e-07, + "loss": 0.3236, + "num_input_tokens_seen": 28941056, + "step": 9190 + }, + { + "epoch": 0.5886306894565009, + "grad_norm": 35.62641143798828, + "learning_rate": 8.658742969678079e-07, + "loss": 0.4153, + "num_input_tokens_seen": 28955456, + "step": 9195 + }, + { + "epoch": 0.5889507713974778, + "grad_norm": 35.563079833984375, + "learning_rate": 8.647671088231398e-07, + "loss": 0.2925, + "num_input_tokens_seen": 28971136, + "step": 9200 + }, + { + "epoch": 0.5892708533384546, + "grad_norm": 49.758174896240234, + "learning_rate": 8.636600895183245e-07, + "loss": 0.4144, + "num_input_tokens_seen": 28988480, + "step": 9205 + }, + { + "epoch": 0.5895909352794315, + "grad_norm": 45.750244140625, + "learning_rate": 8.625532404354877e-07, + "loss": 0.3702, + "num_input_tokens_seen": 29004544, + "step": 9210 + }, + { + "epoch": 0.5899110172204084, + "grad_norm": 15.713849067687988, + "learning_rate": 8.614465629565443e-07, + "loss": 0.3944, + "num_input_tokens_seen": 29019328, + "step": 9215 + }, + { + "epoch": 0.5902310991613853, + "grad_norm": 27.874608993530273, + "learning_rate": 8.603400584631939e-07, + "loss": 0.3414, + "num_input_tokens_seen": 29034752, + "step": 9220 + }, + { + "epoch": 0.5905511811023622, + "grad_norm": 35.91742706298828, + "learning_rate": 8.592337283369198e-07, + "loss": 0.4473, + "num_input_tokens_seen": 29050816, + "step": 9225 + }, + { + "epoch": 0.5908712630433391, + "grad_norm": 28.39652442932129, + "learning_rate": 8.581275739589893e-07, + "loss": 0.2833, + "num_input_tokens_seen": 29065920, + "step": 9230 + }, + { + "epoch": 0.591191344984316, + "grad_norm": 38.084529876708984, + "learning_rate": 8.570215967104481e-07, + "loss": 0.509, + "num_input_tokens_seen": 29080960, + "step": 9235 + }, + { + "epoch": 0.5915114269252929, + "grad_norm": 25.937759399414062, + "learning_rate": 8.559157979721225e-07, + "loss": 0.4754, + "num_input_tokens_seen": 29096768, + "step": 9240 + }, + { + "epoch": 0.5918315088662698, + "grad_norm": 34.819908142089844, + "learning_rate": 8.548101791246145e-07, + "loss": 0.5592, + "num_input_tokens_seen": 29112448, + "step": 9245 + }, + { + "epoch": 0.5921515908072467, + "grad_norm": 25.236101150512695, + "learning_rate": 8.537047415483028e-07, + "loss": 0.3436, + "num_input_tokens_seen": 29127808, + "step": 9250 + }, + { + "epoch": 0.5924716727482235, + "grad_norm": 14.861101150512695, + "learning_rate": 8.525994866233388e-07, + "loss": 0.2783, + "num_input_tokens_seen": 29142912, + "step": 9255 + }, + { + "epoch": 0.5927917546892004, + "grad_norm": 46.569793701171875, + "learning_rate": 8.514944157296464e-07, + "loss": 0.3963, + "num_input_tokens_seen": 29159168, + "step": 9260 + }, + { + "epoch": 0.5931118366301773, + "grad_norm": 38.154476165771484, + "learning_rate": 8.503895302469199e-07, + "loss": 0.3875, + "num_input_tokens_seen": 29175488, + "step": 9265 + }, + { + "epoch": 0.5934319185711542, + "grad_norm": 38.24485778808594, + "learning_rate": 8.492848315546214e-07, + "loss": 0.4151, + "num_input_tokens_seen": 29191104, + "step": 9270 + }, + { + "epoch": 0.5937520005121311, + "grad_norm": 18.280685424804688, + "learning_rate": 8.4818032103198e-07, + "loss": 0.4485, + "num_input_tokens_seen": 29206208, + "step": 9275 + }, + { + "epoch": 0.5940720824531079, + "grad_norm": 44.53709030151367, + "learning_rate": 8.470760000579906e-07, + "loss": 0.4186, + "num_input_tokens_seen": 29221312, + "step": 9280 + }, + { + "epoch": 0.5943921643940849, + "grad_norm": 47.62037658691406, + "learning_rate": 8.459718700114108e-07, + "loss": 0.5047, + "num_input_tokens_seen": 29236800, + "step": 9285 + }, + { + "epoch": 0.5947122463350618, + "grad_norm": 32.94901657104492, + "learning_rate": 8.448679322707595e-07, + "loss": 0.4508, + "num_input_tokens_seen": 29252480, + "step": 9290 + }, + { + "epoch": 0.5950323282760387, + "grad_norm": 41.171470642089844, + "learning_rate": 8.437641882143163e-07, + "loss": 0.6011, + "num_input_tokens_seen": 29266944, + "step": 9295 + }, + { + "epoch": 0.5953524102170156, + "grad_norm": 19.1787166595459, + "learning_rate": 8.426606392201185e-07, + "loss": 0.3106, + "num_input_tokens_seen": 29282816, + "step": 9300 + }, + { + "epoch": 0.5956724921579925, + "grad_norm": 25.21051597595215, + "learning_rate": 8.415572866659599e-07, + "loss": 0.3154, + "num_input_tokens_seen": 29297984, + "step": 9305 + }, + { + "epoch": 0.5959925740989693, + "grad_norm": 24.09900665283203, + "learning_rate": 8.404541319293896e-07, + "loss": 0.3652, + "num_input_tokens_seen": 29313664, + "step": 9310 + }, + { + "epoch": 0.5963126560399462, + "grad_norm": 21.775083541870117, + "learning_rate": 8.393511763877086e-07, + "loss": 0.593, + "num_input_tokens_seen": 29329472, + "step": 9315 + }, + { + "epoch": 0.5966327379809231, + "grad_norm": 33.893714904785156, + "learning_rate": 8.3824842141797e-07, + "loss": 0.4438, + "num_input_tokens_seen": 29346048, + "step": 9320 + }, + { + "epoch": 0.5969528199219, + "grad_norm": 30.351116180419922, + "learning_rate": 8.371458683969765e-07, + "loss": 0.3806, + "num_input_tokens_seen": 29361664, + "step": 9325 + }, + { + "epoch": 0.5972729018628768, + "grad_norm": 23.981342315673828, + "learning_rate": 8.360435187012787e-07, + "loss": 0.3848, + "num_input_tokens_seen": 29376896, + "step": 9330 + }, + { + "epoch": 0.5975929838038538, + "grad_norm": 38.46620559692383, + "learning_rate": 8.349413737071725e-07, + "loss": 0.3866, + "num_input_tokens_seen": 29392640, + "step": 9335 + }, + { + "epoch": 0.5979130657448307, + "grad_norm": 31.351964950561523, + "learning_rate": 8.338394347906994e-07, + "loss": 0.4486, + "num_input_tokens_seen": 29407808, + "step": 9340 + }, + { + "epoch": 0.5982331476858076, + "grad_norm": 39.49605178833008, + "learning_rate": 8.327377033276431e-07, + "loss": 0.3114, + "num_input_tokens_seen": 29422528, + "step": 9345 + }, + { + "epoch": 0.5985532296267845, + "grad_norm": 27.02570343017578, + "learning_rate": 8.316361806935279e-07, + "loss": 0.3484, + "num_input_tokens_seen": 29438272, + "step": 9350 + }, + { + "epoch": 0.5988733115677614, + "grad_norm": 31.83344078063965, + "learning_rate": 8.305348682636177e-07, + "loss": 0.4397, + "num_input_tokens_seen": 29453376, + "step": 9355 + }, + { + "epoch": 0.5991933935087382, + "grad_norm": 31.224191665649414, + "learning_rate": 8.294337674129144e-07, + "loss": 0.4149, + "num_input_tokens_seen": 29469248, + "step": 9360 + }, + { + "epoch": 0.5995134754497151, + "grad_norm": 36.2857780456543, + "learning_rate": 8.283328795161554e-07, + "loss": 0.2745, + "num_input_tokens_seen": 29485888, + "step": 9365 + }, + { + "epoch": 0.599833557390692, + "grad_norm": 29.26090431213379, + "learning_rate": 8.272322059478114e-07, + "loss": 0.3205, + "num_input_tokens_seen": 29500864, + "step": 9370 + }, + { + "epoch": 0.6001536393316689, + "grad_norm": 22.406578063964844, + "learning_rate": 8.261317480820871e-07, + "loss": 0.2427, + "num_input_tokens_seen": 29516288, + "step": 9375 + }, + { + "epoch": 0.6004737212726458, + "grad_norm": 34.06975555419922, + "learning_rate": 8.250315072929168e-07, + "loss": 0.4129, + "num_input_tokens_seen": 29530880, + "step": 9380 + }, + { + "epoch": 0.6007297868254273, + "eval_loss": 0.39462828636169434, + "eval_runtime": 50.6774, + "eval_samples_per_second": 274.008, + "eval_steps_per_second": 34.256, + "num_input_tokens_seen": 29544576, + "step": 9384 + }, + { + "epoch": 0.6007938032136226, + "grad_norm": 21.22164535522461, + "learning_rate": 8.239314849539637e-07, + "loss": 0.35, + "num_input_tokens_seen": 29547840, + "step": 9385 + }, + { + "epoch": 0.6011138851545996, + "grad_norm": 33.94794464111328, + "learning_rate": 8.228316824386193e-07, + "loss": 0.4234, + "num_input_tokens_seen": 29564096, + "step": 9390 + }, + { + "epoch": 0.6014339670955765, + "grad_norm": 38.5579948425293, + "learning_rate": 8.217321011199995e-07, + "loss": 0.378, + "num_input_tokens_seen": 29579520, + "step": 9395 + }, + { + "epoch": 0.6017540490365534, + "grad_norm": 48.82097625732422, + "learning_rate": 8.206327423709441e-07, + "loss": 0.433, + "num_input_tokens_seen": 29594048, + "step": 9400 + }, + { + "epoch": 0.6020741309775303, + "grad_norm": 24.50299644470215, + "learning_rate": 8.195336075640163e-07, + "loss": 0.3913, + "num_input_tokens_seen": 29610368, + "step": 9405 + }, + { + "epoch": 0.6023942129185071, + "grad_norm": 33.91872787475586, + "learning_rate": 8.184346980714984e-07, + "loss": 0.4248, + "num_input_tokens_seen": 29625792, + "step": 9410 + }, + { + "epoch": 0.602714294859484, + "grad_norm": 45.42316818237305, + "learning_rate": 8.173360152653914e-07, + "loss": 0.3563, + "num_input_tokens_seen": 29642240, + "step": 9415 + }, + { + "epoch": 0.6030343768004609, + "grad_norm": 29.160640716552734, + "learning_rate": 8.162375605174143e-07, + "loss": 0.3138, + "num_input_tokens_seen": 29658176, + "step": 9420 + }, + { + "epoch": 0.6033544587414378, + "grad_norm": 29.974868774414062, + "learning_rate": 8.151393351990005e-07, + "loss": 0.3068, + "num_input_tokens_seen": 29675392, + "step": 9425 + }, + { + "epoch": 0.6036745406824147, + "grad_norm": 30.860563278198242, + "learning_rate": 8.140413406812971e-07, + "loss": 0.4185, + "num_input_tokens_seen": 29690048, + "step": 9430 + }, + { + "epoch": 0.6039946226233915, + "grad_norm": 44.825531005859375, + "learning_rate": 8.129435783351635e-07, + "loss": 0.3111, + "num_input_tokens_seen": 29705088, + "step": 9435 + }, + { + "epoch": 0.6043147045643685, + "grad_norm": 30.896249771118164, + "learning_rate": 8.118460495311685e-07, + "loss": 0.4421, + "num_input_tokens_seen": 29720576, + "step": 9440 + }, + { + "epoch": 0.6046347865053454, + "grad_norm": 30.919109344482422, + "learning_rate": 8.107487556395901e-07, + "loss": 0.4352, + "num_input_tokens_seen": 29736896, + "step": 9445 + }, + { + "epoch": 0.6049548684463223, + "grad_norm": 31.737159729003906, + "learning_rate": 8.096516980304115e-07, + "loss": 0.3688, + "num_input_tokens_seen": 29752768, + "step": 9450 + }, + { + "epoch": 0.6052749503872992, + "grad_norm": 50.259193420410156, + "learning_rate": 8.085548780733238e-07, + "loss": 0.3448, + "num_input_tokens_seen": 29768640, + "step": 9455 + }, + { + "epoch": 0.605595032328276, + "grad_norm": 30.822101593017578, + "learning_rate": 8.074582971377182e-07, + "loss": 0.3368, + "num_input_tokens_seen": 29786240, + "step": 9460 + }, + { + "epoch": 0.6059151142692529, + "grad_norm": 40.47896194458008, + "learning_rate": 8.063619565926892e-07, + "loss": 0.4407, + "num_input_tokens_seen": 29802176, + "step": 9465 + }, + { + "epoch": 0.6062351962102298, + "grad_norm": 17.590438842773438, + "learning_rate": 8.052658578070313e-07, + "loss": 0.3992, + "num_input_tokens_seen": 29817600, + "step": 9470 + }, + { + "epoch": 0.6065552781512067, + "grad_norm": 14.345329284667969, + "learning_rate": 8.041700021492362e-07, + "loss": 0.3233, + "num_input_tokens_seen": 29832960, + "step": 9475 + }, + { + "epoch": 0.6068753600921836, + "grad_norm": 21.619794845581055, + "learning_rate": 8.030743909874924e-07, + "loss": 0.2929, + "num_input_tokens_seen": 29848448, + "step": 9480 + }, + { + "epoch": 0.6071954420331604, + "grad_norm": 19.34469985961914, + "learning_rate": 8.019790256896839e-07, + "loss": 0.3299, + "num_input_tokens_seen": 29863296, + "step": 9485 + }, + { + "epoch": 0.6075155239741373, + "grad_norm": 49.3680419921875, + "learning_rate": 8.008839076233871e-07, + "loss": 0.3934, + "num_input_tokens_seen": 29880128, + "step": 9490 + }, + { + "epoch": 0.6078356059151143, + "grad_norm": 24.034257888793945, + "learning_rate": 7.997890381558691e-07, + "loss": 0.3564, + "num_input_tokens_seen": 29895296, + "step": 9495 + }, + { + "epoch": 0.6081556878560912, + "grad_norm": 33.2259521484375, + "learning_rate": 7.986944186540878e-07, + "loss": 0.434, + "num_input_tokens_seen": 29911296, + "step": 9500 + }, + { + "epoch": 0.6084757697970681, + "grad_norm": 48.712547302246094, + "learning_rate": 7.976000504846885e-07, + "loss": 0.4603, + "num_input_tokens_seen": 29926912, + "step": 9505 + }, + { + "epoch": 0.608795851738045, + "grad_norm": 96.85162353515625, + "learning_rate": 7.965059350140024e-07, + "loss": 0.4725, + "num_input_tokens_seen": 29942272, + "step": 9510 + }, + { + "epoch": 0.6091159336790218, + "grad_norm": 39.1156120300293, + "learning_rate": 7.954120736080461e-07, + "loss": 0.4093, + "num_input_tokens_seen": 29958016, + "step": 9515 + }, + { + "epoch": 0.6094360156199987, + "grad_norm": 25.699668884277344, + "learning_rate": 7.943184676325178e-07, + "loss": 0.5561, + "num_input_tokens_seen": 29974720, + "step": 9520 + }, + { + "epoch": 0.6097560975609756, + "grad_norm": 27.526456832885742, + "learning_rate": 7.932251184527974e-07, + "loss": 0.4295, + "num_input_tokens_seen": 29991680, + "step": 9525 + }, + { + "epoch": 0.6100761795019525, + "grad_norm": 25.670839309692383, + "learning_rate": 7.921320274339446e-07, + "loss": 0.2678, + "num_input_tokens_seen": 30007168, + "step": 9530 + }, + { + "epoch": 0.6103962614429294, + "grad_norm": 40.132652282714844, + "learning_rate": 7.910391959406966e-07, + "loss": 0.34, + "num_input_tokens_seen": 30022656, + "step": 9535 + }, + { + "epoch": 0.6107163433839062, + "grad_norm": 35.257442474365234, + "learning_rate": 7.899466253374653e-07, + "loss": 0.3896, + "num_input_tokens_seen": 30038144, + "step": 9540 + }, + { + "epoch": 0.6110364253248832, + "grad_norm": 33.6865234375, + "learning_rate": 7.88854316988339e-07, + "loss": 0.3321, + "num_input_tokens_seen": 30055488, + "step": 9545 + }, + { + "epoch": 0.6113565072658601, + "grad_norm": 41.24062728881836, + "learning_rate": 7.877622722570771e-07, + "loss": 0.3085, + "num_input_tokens_seen": 30071040, + "step": 9550 + }, + { + "epoch": 0.611676589206837, + "grad_norm": 26.09576416015625, + "learning_rate": 7.866704925071101e-07, + "loss": 0.4224, + "num_input_tokens_seen": 30088000, + "step": 9555 + }, + { + "epoch": 0.6119966711478139, + "grad_norm": 25.814329147338867, + "learning_rate": 7.855789791015377e-07, + "loss": 0.4359, + "num_input_tokens_seen": 30103040, + "step": 9560 + }, + { + "epoch": 0.6123167530887907, + "grad_norm": 42.739044189453125, + "learning_rate": 7.844877334031277e-07, + "loss": 0.3887, + "num_input_tokens_seen": 30117760, + "step": 9565 + }, + { + "epoch": 0.6126368350297676, + "grad_norm": 33.584739685058594, + "learning_rate": 7.833967567743131e-07, + "loss": 0.4969, + "num_input_tokens_seen": 30133888, + "step": 9570 + }, + { + "epoch": 0.6129569169707445, + "grad_norm": 33.699302673339844, + "learning_rate": 7.823060505771903e-07, + "loss": 0.3596, + "num_input_tokens_seen": 30149312, + "step": 9575 + }, + { + "epoch": 0.6132769989117214, + "grad_norm": 46.7923698425293, + "learning_rate": 7.812156161735199e-07, + "loss": 0.4176, + "num_input_tokens_seen": 30163840, + "step": 9580 + }, + { + "epoch": 0.6135970808526983, + "grad_norm": 67.17172241210938, + "learning_rate": 7.801254549247215e-07, + "loss": 0.5474, + "num_input_tokens_seen": 30180544, + "step": 9585 + }, + { + "epoch": 0.6139171627936751, + "grad_norm": 19.53614044189453, + "learning_rate": 7.790355681918739e-07, + "loss": 0.338, + "num_input_tokens_seen": 30197120, + "step": 9590 + }, + { + "epoch": 0.614237244734652, + "grad_norm": 56.427513122558594, + "learning_rate": 7.779459573357144e-07, + "loss": 0.4222, + "num_input_tokens_seen": 30213376, + "step": 9595 + }, + { + "epoch": 0.614557326675629, + "grad_norm": 21.21477508544922, + "learning_rate": 7.768566237166338e-07, + "loss": 0.4138, + "num_input_tokens_seen": 30229120, + "step": 9600 + }, + { + "epoch": 0.6148774086166059, + "grad_norm": 44.509056091308594, + "learning_rate": 7.757675686946786e-07, + "loss": 0.5188, + "num_input_tokens_seen": 30244544, + "step": 9605 + }, + { + "epoch": 0.6151974905575828, + "grad_norm": 29.196828842163086, + "learning_rate": 7.746787936295468e-07, + "loss": 0.4258, + "num_input_tokens_seen": 30260864, + "step": 9610 + }, + { + "epoch": 0.6155175724985597, + "grad_norm": 43.83945846557617, + "learning_rate": 7.735902998805868e-07, + "loss": 0.3681, + "num_input_tokens_seen": 30275456, + "step": 9615 + }, + { + "epoch": 0.6158376544395365, + "grad_norm": 48.068565368652344, + "learning_rate": 7.725020888067955e-07, + "loss": 0.4284, + "num_input_tokens_seen": 30291008, + "step": 9620 + }, + { + "epoch": 0.6161577363805134, + "grad_norm": 19.066804885864258, + "learning_rate": 7.714141617668176e-07, + "loss": 0.4779, + "num_input_tokens_seen": 30306816, + "step": 9625 + }, + { + "epoch": 0.6164778183214903, + "grad_norm": 25.80859375, + "learning_rate": 7.703265201189426e-07, + "loss": 0.3342, + "num_input_tokens_seen": 30322240, + "step": 9630 + }, + { + "epoch": 0.6167979002624672, + "grad_norm": 18.206134796142578, + "learning_rate": 7.692391652211036e-07, + "loss": 0.3333, + "num_input_tokens_seen": 30338048, + "step": 9635 + }, + { + "epoch": 0.617117982203444, + "grad_norm": 39.0733757019043, + "learning_rate": 7.681520984308769e-07, + "loss": 0.3256, + "num_input_tokens_seen": 30353984, + "step": 9640 + }, + { + "epoch": 0.6174380641444209, + "grad_norm": 41.74904251098633, + "learning_rate": 7.670653211054772e-07, + "loss": 0.496, + "num_input_tokens_seen": 30370048, + "step": 9645 + }, + { + "epoch": 0.6177581460853978, + "grad_norm": 36.59706497192383, + "learning_rate": 7.659788346017591e-07, + "loss": 0.4137, + "num_input_tokens_seen": 30385344, + "step": 9650 + }, + { + "epoch": 0.6180782280263748, + "grad_norm": 45.44746017456055, + "learning_rate": 7.648926402762133e-07, + "loss": 0.3994, + "num_input_tokens_seen": 30400576, + "step": 9655 + }, + { + "epoch": 0.6183983099673517, + "grad_norm": 39.379695892333984, + "learning_rate": 7.638067394849671e-07, + "loss": 0.3861, + "num_input_tokens_seen": 30415424, + "step": 9660 + }, + { + "epoch": 0.6187183919083286, + "grad_norm": 45.68638229370117, + "learning_rate": 7.627211335837797e-07, + "loss": 0.3971, + "num_input_tokens_seen": 30430592, + "step": 9665 + }, + { + "epoch": 0.6190384738493054, + "grad_norm": 22.4736385345459, + "learning_rate": 7.616358239280427e-07, + "loss": 0.4285, + "num_input_tokens_seen": 30445952, + "step": 9670 + }, + { + "epoch": 0.6193585557902823, + "grad_norm": 30.363630294799805, + "learning_rate": 7.605508118727787e-07, + "loss": 0.3194, + "num_input_tokens_seen": 30461568, + "step": 9675 + }, + { + "epoch": 0.6196786377312592, + "grad_norm": 26.44789695739746, + "learning_rate": 7.594660987726373e-07, + "loss": 0.3642, + "num_input_tokens_seen": 30476672, + "step": 9680 + }, + { + "epoch": 0.6199987196722361, + "grad_norm": 48.39008712768555, + "learning_rate": 7.583816859818956e-07, + "loss": 0.3969, + "num_input_tokens_seen": 30492672, + "step": 9685 + }, + { + "epoch": 0.620318801613213, + "grad_norm": 23.859933853149414, + "learning_rate": 7.57297574854456e-07, + "loss": 0.3783, + "num_input_tokens_seen": 30507712, + "step": 9690 + }, + { + "epoch": 0.6206388835541898, + "grad_norm": 48.97274398803711, + "learning_rate": 7.56213766743844e-07, + "loss": 0.4477, + "num_input_tokens_seen": 30524032, + "step": 9695 + }, + { + "epoch": 0.6209589654951667, + "grad_norm": 15.872191429138184, + "learning_rate": 7.551302630032064e-07, + "loss": 0.3281, + "num_input_tokens_seen": 30539776, + "step": 9700 + }, + { + "epoch": 0.6212790474361437, + "grad_norm": 20.59368324279785, + "learning_rate": 7.540470649853106e-07, + "loss": 0.3758, + "num_input_tokens_seen": 30554752, + "step": 9705 + }, + { + "epoch": 0.6215991293771206, + "grad_norm": 24.169780731201172, + "learning_rate": 7.529641740425419e-07, + "loss": 0.3955, + "num_input_tokens_seen": 30571968, + "step": 9710 + }, + { + "epoch": 0.6219192113180975, + "grad_norm": 31.49615478515625, + "learning_rate": 7.518815915269023e-07, + "loss": 0.449, + "num_input_tokens_seen": 30587264, + "step": 9715 + }, + { + "epoch": 0.6222392932590743, + "grad_norm": 19.680313110351562, + "learning_rate": 7.507993187900092e-07, + "loss": 0.3823, + "num_input_tokens_seen": 30603200, + "step": 9720 + }, + { + "epoch": 0.6225593752000512, + "grad_norm": 26.631305694580078, + "learning_rate": 7.497173571830926e-07, + "loss": 0.4186, + "num_input_tokens_seen": 30617856, + "step": 9725 + }, + { + "epoch": 0.6228794571410281, + "grad_norm": 45.39612579345703, + "learning_rate": 7.486357080569938e-07, + "loss": 0.4631, + "num_input_tokens_seen": 30632448, + "step": 9730 + }, + { + "epoch": 0.623199539082005, + "grad_norm": 23.03763771057129, + "learning_rate": 7.47554372762165e-07, + "loss": 0.3768, + "num_input_tokens_seen": 30647680, + "step": 9735 + }, + { + "epoch": 0.6235196210229819, + "grad_norm": 55.08168029785156, + "learning_rate": 7.464733526486662e-07, + "loss": 0.4872, + "num_input_tokens_seen": 30663616, + "step": 9740 + }, + { + "epoch": 0.6238397029639587, + "grad_norm": 44.099178314208984, + "learning_rate": 7.453926490661628e-07, + "loss": 0.3515, + "num_input_tokens_seen": 30682496, + "step": 9745 + }, + { + "epoch": 0.6241597849049356, + "grad_norm": 45.976837158203125, + "learning_rate": 7.443122633639267e-07, + "loss": 0.3687, + "num_input_tokens_seen": 30697664, + "step": 9750 + }, + { + "epoch": 0.6244798668459125, + "grad_norm": 68.98104858398438, + "learning_rate": 7.432321968908319e-07, + "loss": 0.3856, + "num_input_tokens_seen": 30713408, + "step": 9755 + }, + { + "epoch": 0.6247999487868895, + "grad_norm": 24.77080535888672, + "learning_rate": 7.421524509953543e-07, + "loss": 0.3178, + "num_input_tokens_seen": 30730496, + "step": 9760 + }, + { + "epoch": 0.6251200307278664, + "grad_norm": 30.445371627807617, + "learning_rate": 7.410730270255687e-07, + "loss": 0.4143, + "num_input_tokens_seen": 30745664, + "step": 9765 + }, + { + "epoch": 0.6254401126688433, + "grad_norm": 35.7066764831543, + "learning_rate": 7.399939263291493e-07, + "loss": 0.3747, + "num_input_tokens_seen": 30760960, + "step": 9770 + }, + { + "epoch": 0.6257601946098201, + "grad_norm": 36.02008819580078, + "learning_rate": 7.389151502533657e-07, + "loss": 0.479, + "num_input_tokens_seen": 30775872, + "step": 9775 + }, + { + "epoch": 0.626080276550797, + "grad_norm": 19.047998428344727, + "learning_rate": 7.378367001450819e-07, + "loss": 0.3696, + "num_input_tokens_seen": 30791424, + "step": 9780 + }, + { + "epoch": 0.6264003584917739, + "grad_norm": 57.05532455444336, + "learning_rate": 7.367585773507567e-07, + "loss": 0.426, + "num_input_tokens_seen": 30807680, + "step": 9785 + }, + { + "epoch": 0.6267204404327508, + "grad_norm": 42.29533386230469, + "learning_rate": 7.356807832164385e-07, + "loss": 0.4515, + "num_input_tokens_seen": 30823680, + "step": 9790 + }, + { + "epoch": 0.6270405223737276, + "grad_norm": 19.884836196899414, + "learning_rate": 7.346033190877654e-07, + "loss": 0.4401, + "num_input_tokens_seen": 30839360, + "step": 9795 + }, + { + "epoch": 0.6273606043147045, + "grad_norm": 32.67311096191406, + "learning_rate": 7.335261863099651e-07, + "loss": 0.3541, + "num_input_tokens_seen": 30854784, + "step": 9800 + }, + { + "epoch": 0.6276806862556814, + "grad_norm": 33.140811920166016, + "learning_rate": 7.324493862278498e-07, + "loss": 0.4232, + "num_input_tokens_seen": 30870592, + "step": 9805 + }, + { + "epoch": 0.6280007681966584, + "grad_norm": 41.48550796508789, + "learning_rate": 7.313729201858167e-07, + "loss": 0.4636, + "num_input_tokens_seen": 30885952, + "step": 9810 + }, + { + "epoch": 0.6283208501376353, + "grad_norm": 23.54149627685547, + "learning_rate": 7.302967895278473e-07, + "loss": 0.3329, + "num_input_tokens_seen": 30902080, + "step": 9815 + }, + { + "epoch": 0.6286409320786122, + "grad_norm": 30.32986831665039, + "learning_rate": 7.292209955975028e-07, + "loss": 0.4042, + "num_input_tokens_seen": 30919232, + "step": 9820 + }, + { + "epoch": 0.628961014019589, + "grad_norm": 37.84484100341797, + "learning_rate": 7.281455397379244e-07, + "loss": 0.4078, + "num_input_tokens_seen": 30936448, + "step": 9825 + }, + { + "epoch": 0.6292810959605659, + "grad_norm": 40.27985763549805, + "learning_rate": 7.270704232918316e-07, + "loss": 0.3225, + "num_input_tokens_seen": 30952256, + "step": 9830 + }, + { + "epoch": 0.6296011779015428, + "grad_norm": 56.62047576904297, + "learning_rate": 7.2599564760152e-07, + "loss": 0.4216, + "num_input_tokens_seen": 30967360, + "step": 9835 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 28.868928909301758, + "learning_rate": 7.249212140088592e-07, + "loss": 0.3852, + "num_input_tokens_seen": 30982016, + "step": 9840 + }, + { + "epoch": 0.6302413417834966, + "grad_norm": 20.260948181152344, + "learning_rate": 7.23847123855293e-07, + "loss": 0.3347, + "num_input_tokens_seen": 30998080, + "step": 9845 + }, + { + "epoch": 0.6305614237244734, + "grad_norm": 20.72256088256836, + "learning_rate": 7.227733784818349e-07, + "loss": 0.2805, + "num_input_tokens_seen": 31013184, + "step": 9850 + }, + { + "epoch": 0.6308815056654503, + "grad_norm": 11.925490379333496, + "learning_rate": 7.216999792290683e-07, + "loss": 0.3804, + "num_input_tokens_seen": 31028800, + "step": 9855 + }, + { + "epoch": 0.6312015876064272, + "grad_norm": 35.1019287109375, + "learning_rate": 7.206269274371457e-07, + "loss": 0.49, + "num_input_tokens_seen": 31044736, + "step": 9860 + }, + { + "epoch": 0.6315216695474042, + "grad_norm": 17.91670036315918, + "learning_rate": 7.195542244457845e-07, + "loss": 0.3496, + "num_input_tokens_seen": 31059968, + "step": 9865 + }, + { + "epoch": 0.6318417514883811, + "grad_norm": 22.282245635986328, + "learning_rate": 7.184818715942666e-07, + "loss": 0.3266, + "num_input_tokens_seen": 31074880, + "step": 9870 + }, + { + "epoch": 0.6321618334293579, + "grad_norm": 35.720767974853516, + "learning_rate": 7.174098702214374e-07, + "loss": 0.355, + "num_input_tokens_seen": 31090432, + "step": 9875 + }, + { + "epoch": 0.6324819153703348, + "grad_norm": 28.854347229003906, + "learning_rate": 7.163382216657033e-07, + "loss": 0.37, + "num_input_tokens_seen": 31107264, + "step": 9880 + }, + { + "epoch": 0.6328019973113117, + "grad_norm": 52.169334411621094, + "learning_rate": 7.152669272650302e-07, + "loss": 0.3444, + "num_input_tokens_seen": 31124096, + "step": 9885 + }, + { + "epoch": 0.6331220792522886, + "grad_norm": 51.4202766418457, + "learning_rate": 7.141959883569411e-07, + "loss": 0.3869, + "num_input_tokens_seen": 31138752, + "step": 9890 + }, + { + "epoch": 0.6334421611932655, + "grad_norm": 29.026763916015625, + "learning_rate": 7.131254062785165e-07, + "loss": 0.4701, + "num_input_tokens_seen": 31154048, + "step": 9895 + }, + { + "epoch": 0.6337622431342423, + "grad_norm": 26.36556625366211, + "learning_rate": 7.120551823663907e-07, + "loss": 0.5118, + "num_input_tokens_seen": 31170304, + "step": 9900 + }, + { + "epoch": 0.6340823250752192, + "grad_norm": 15.353890419006348, + "learning_rate": 7.109853179567499e-07, + "loss": 0.2817, + "num_input_tokens_seen": 31186368, + "step": 9905 + }, + { + "epoch": 0.6344024070161961, + "grad_norm": 23.679868698120117, + "learning_rate": 7.099158143853337e-07, + "loss": 0.4235, + "num_input_tokens_seen": 31201664, + "step": 9910 + }, + { + "epoch": 0.634722488957173, + "grad_norm": 39.42850112915039, + "learning_rate": 7.088466729874289e-07, + "loss": 0.3891, + "num_input_tokens_seen": 31217216, + "step": 9915 + }, + { + "epoch": 0.63504257089815, + "grad_norm": 30.177127838134766, + "learning_rate": 7.077778950978713e-07, + "loss": 0.3784, + "num_input_tokens_seen": 31233728, + "step": 9920 + }, + { + "epoch": 0.6353626528391269, + "grad_norm": 19.685361862182617, + "learning_rate": 7.06709482051043e-07, + "loss": 0.4682, + "num_input_tokens_seen": 31249664, + "step": 9925 + }, + { + "epoch": 0.6356827347801037, + "grad_norm": 21.19015121459961, + "learning_rate": 7.056414351808698e-07, + "loss": 0.3033, + "num_input_tokens_seen": 31265408, + "step": 9930 + }, + { + "epoch": 0.6360028167210806, + "grad_norm": 26.959909439086914, + "learning_rate": 7.045737558208206e-07, + "loss": 0.3517, + "num_input_tokens_seen": 31281088, + "step": 9935 + }, + { + "epoch": 0.6363228986620575, + "grad_norm": 30.385330200195312, + "learning_rate": 7.035064453039064e-07, + "loss": 0.4014, + "num_input_tokens_seen": 31296512, + "step": 9940 + }, + { + "epoch": 0.6366429806030344, + "grad_norm": 14.387809753417969, + "learning_rate": 7.024395049626766e-07, + "loss": 0.3772, + "num_input_tokens_seen": 31312000, + "step": 9945 + }, + { + "epoch": 0.6369630625440112, + "grad_norm": 43.21665954589844, + "learning_rate": 7.013729361292182e-07, + "loss": 0.3408, + "num_input_tokens_seen": 31327488, + "step": 9950 + }, + { + "epoch": 0.6372831444849881, + "grad_norm": 40.43202209472656, + "learning_rate": 7.003067401351554e-07, + "loss": 0.3065, + "num_input_tokens_seen": 31343936, + "step": 9955 + }, + { + "epoch": 0.637603226425965, + "grad_norm": 74.08061218261719, + "learning_rate": 6.992409183116465e-07, + "loss": 0.406, + "num_input_tokens_seen": 31359232, + "step": 9960 + }, + { + "epoch": 0.6379233083669419, + "grad_norm": 19.582399368286133, + "learning_rate": 6.981754719893826e-07, + "loss": 0.3724, + "num_input_tokens_seen": 31375616, + "step": 9965 + }, + { + "epoch": 0.6382433903079189, + "grad_norm": 47.04770278930664, + "learning_rate": 6.971104024985852e-07, + "loss": 0.4679, + "num_input_tokens_seen": 31391680, + "step": 9970 + }, + { + "epoch": 0.6385634722488958, + "grad_norm": 29.400909423828125, + "learning_rate": 6.960457111690068e-07, + "loss": 0.3809, + "num_input_tokens_seen": 31407424, + "step": 9975 + }, + { + "epoch": 0.6388835541898726, + "grad_norm": 20.47035789489746, + "learning_rate": 6.94981399329927e-07, + "loss": 0.3787, + "num_input_tokens_seen": 31422912, + "step": 9980 + }, + { + "epoch": 0.6392036361308495, + "grad_norm": 73.91484832763672, + "learning_rate": 6.939174683101509e-07, + "loss": 0.3921, + "num_input_tokens_seen": 31438912, + "step": 9985 + }, + { + "epoch": 0.6395237180718264, + "grad_norm": 23.82988739013672, + "learning_rate": 6.9285391943801e-07, + "loss": 0.2898, + "num_input_tokens_seen": 31455168, + "step": 9990 + }, + { + "epoch": 0.6398438000128033, + "grad_norm": 32.45968246459961, + "learning_rate": 6.917907540413569e-07, + "loss": 0.3133, + "num_input_tokens_seen": 31470592, + "step": 9995 + }, + { + "epoch": 0.6401638819537802, + "grad_norm": 32.134952545166016, + "learning_rate": 6.907279734475659e-07, + "loss": 0.3477, + "num_input_tokens_seen": 31485632, + "step": 10000 + }, + { + "epoch": 0.640483963894757, + "grad_norm": 35.19672393798828, + "learning_rate": 6.896655789835317e-07, + "loss": 0.3725, + "num_input_tokens_seen": 31500352, + "step": 10005 + }, + { + "epoch": 0.6408040458357339, + "grad_norm": 39.76215744018555, + "learning_rate": 6.886035719756656e-07, + "loss": 0.3702, + "num_input_tokens_seen": 31516928, + "step": 10010 + }, + { + "epoch": 0.6411241277767108, + "grad_norm": 20.91424560546875, + "learning_rate": 6.875419537498959e-07, + "loss": 0.279, + "num_input_tokens_seen": 31532608, + "step": 10015 + }, + { + "epoch": 0.6414442097176877, + "grad_norm": 57.11235809326172, + "learning_rate": 6.864807256316658e-07, + "loss": 0.6005, + "num_input_tokens_seen": 31548608, + "step": 10020 + }, + { + "epoch": 0.6417642916586647, + "grad_norm": 28.377958297729492, + "learning_rate": 6.854198889459311e-07, + "loss": 0.4117, + "num_input_tokens_seen": 31564224, + "step": 10025 + }, + { + "epoch": 0.6420843735996415, + "grad_norm": 8.59209156036377, + "learning_rate": 6.84359445017158e-07, + "loss": 0.2567, + "num_input_tokens_seen": 31579200, + "step": 10030 + }, + { + "epoch": 0.6424044555406184, + "grad_norm": 46.38290023803711, + "learning_rate": 6.832993951693244e-07, + "loss": 0.4257, + "num_input_tokens_seen": 31594816, + "step": 10035 + }, + { + "epoch": 0.6427245374815953, + "grad_norm": 16.47113800048828, + "learning_rate": 6.822397407259144e-07, + "loss": 0.3547, + "num_input_tokens_seen": 31610432, + "step": 10040 + }, + { + "epoch": 0.6430446194225722, + "grad_norm": 40.31021499633789, + "learning_rate": 6.811804830099186e-07, + "loss": 0.3794, + "num_input_tokens_seen": 31627520, + "step": 10045 + }, + { + "epoch": 0.6433647013635491, + "grad_norm": 48.28114318847656, + "learning_rate": 6.801216233438336e-07, + "loss": 0.3557, + "num_input_tokens_seen": 31644352, + "step": 10050 + }, + { + "epoch": 0.6436847833045259, + "grad_norm": 32.33661651611328, + "learning_rate": 6.790631630496575e-07, + "loss": 0.3919, + "num_input_tokens_seen": 31660160, + "step": 10055 + }, + { + "epoch": 0.6440048652455028, + "grad_norm": 47.6024169921875, + "learning_rate": 6.780051034488903e-07, + "loss": 0.45, + "num_input_tokens_seen": 31676352, + "step": 10060 + }, + { + "epoch": 0.6443249471864797, + "grad_norm": 95.15774536132812, + "learning_rate": 6.769474458625323e-07, + "loss": 0.3409, + "num_input_tokens_seen": 31692160, + "step": 10065 + }, + { + "epoch": 0.6446450291274566, + "grad_norm": 19.220699310302734, + "learning_rate": 6.758901916110813e-07, + "loss": 0.316, + "num_input_tokens_seen": 31707712, + "step": 10070 + }, + { + "epoch": 0.6449651110684336, + "grad_norm": 16.066856384277344, + "learning_rate": 6.748333420145315e-07, + "loss": 0.3278, + "num_input_tokens_seen": 31723776, + "step": 10075 + }, + { + "epoch": 0.6452851930094105, + "grad_norm": 22.89158058166504, + "learning_rate": 6.737768983923718e-07, + "loss": 0.4116, + "num_input_tokens_seen": 31740672, + "step": 10080 + }, + { + "epoch": 0.6456052749503873, + "grad_norm": 35.07290267944336, + "learning_rate": 6.727208620635849e-07, + "loss": 0.2941, + "num_input_tokens_seen": 31755648, + "step": 10085 + }, + { + "epoch": 0.6459253568913642, + "grad_norm": 32.60226058959961, + "learning_rate": 6.716652343466446e-07, + "loss": 0.4488, + "num_input_tokens_seen": 31770624, + "step": 10090 + }, + { + "epoch": 0.6462454388323411, + "grad_norm": 39.100215911865234, + "learning_rate": 6.706100165595139e-07, + "loss": 0.3044, + "num_input_tokens_seen": 31786816, + "step": 10095 + }, + { + "epoch": 0.646565520773318, + "grad_norm": 34.54078674316406, + "learning_rate": 6.695552100196452e-07, + "loss": 0.3924, + "num_input_tokens_seen": 31801792, + "step": 10100 + }, + { + "epoch": 0.6468856027142948, + "grad_norm": 69.25830841064453, + "learning_rate": 6.685008160439769e-07, + "loss": 0.5025, + "num_input_tokens_seen": 31818944, + "step": 10105 + }, + { + "epoch": 0.6472056846552717, + "grad_norm": 33.3784294128418, + "learning_rate": 6.674468359489313e-07, + "loss": 0.406, + "num_input_tokens_seen": 31834176, + "step": 10110 + }, + { + "epoch": 0.6475257665962486, + "grad_norm": 29.71148109436035, + "learning_rate": 6.663932710504163e-07, + "loss": 0.3488, + "num_input_tokens_seen": 31850176, + "step": 10115 + }, + { + "epoch": 0.6478458485372255, + "grad_norm": 48.27974319458008, + "learning_rate": 6.653401226638192e-07, + "loss": 0.3845, + "num_input_tokens_seen": 31865600, + "step": 10120 + }, + { + "epoch": 0.6481659304782024, + "grad_norm": 23.962369918823242, + "learning_rate": 6.64287392104008e-07, + "loss": 0.3985, + "num_input_tokens_seen": 31880512, + "step": 10125 + }, + { + "epoch": 0.6484860124191794, + "grad_norm": 24.32285499572754, + "learning_rate": 6.632350806853299e-07, + "loss": 0.4502, + "num_input_tokens_seen": 31896512, + "step": 10130 + }, + { + "epoch": 0.6488060943601562, + "grad_norm": 44.430274963378906, + "learning_rate": 6.621831897216074e-07, + "loss": 0.4127, + "num_input_tokens_seen": 31912768, + "step": 10135 + }, + { + "epoch": 0.6491261763011331, + "grad_norm": 137.93301391601562, + "learning_rate": 6.611317205261387e-07, + "loss": 0.4332, + "num_input_tokens_seen": 31927488, + "step": 10140 + }, + { + "epoch": 0.64944625824211, + "grad_norm": 28.834609985351562, + "learning_rate": 6.60080674411696e-07, + "loss": 0.3464, + "num_input_tokens_seen": 31942784, + "step": 10145 + }, + { + "epoch": 0.6497663401830869, + "grad_norm": 15.092977523803711, + "learning_rate": 6.590300526905225e-07, + "loss": 0.3139, + "num_input_tokens_seen": 31958528, + "step": 10150 + }, + { + "epoch": 0.6500864221240638, + "grad_norm": 38.77704620361328, + "learning_rate": 6.579798566743313e-07, + "loss": 0.4675, + "num_input_tokens_seen": 31974016, + "step": 10155 + }, + { + "epoch": 0.6504065040650406, + "grad_norm": 41.677734375, + "learning_rate": 6.569300876743049e-07, + "loss": 0.3272, + "num_input_tokens_seen": 31990720, + "step": 10160 + }, + { + "epoch": 0.6507265860060175, + "grad_norm": 31.978822708129883, + "learning_rate": 6.558807470010923e-07, + "loss": 0.324, + "num_input_tokens_seen": 32007168, + "step": 10165 + }, + { + "epoch": 0.6507906023942129, + "eval_loss": 0.38159435987472534, + "eval_runtime": 50.6443, + "eval_samples_per_second": 274.187, + "eval_steps_per_second": 34.278, + "num_input_tokens_seen": 32010176, + "step": 10166 + }, + { + "epoch": 0.6510466679469944, + "grad_norm": 30.454833984375, + "learning_rate": 6.548318359648071e-07, + "loss": 0.355, + "num_input_tokens_seen": 32022208, + "step": 10170 + }, + { + "epoch": 0.6513667498879713, + "grad_norm": 41.25565719604492, + "learning_rate": 6.537833558750279e-07, + "loss": 0.4036, + "num_input_tokens_seen": 32037760, + "step": 10175 + }, + { + "epoch": 0.6516868318289483, + "grad_norm": 51.35231018066406, + "learning_rate": 6.527353080407938e-07, + "loss": 0.3108, + "num_input_tokens_seen": 32052800, + "step": 10180 + }, + { + "epoch": 0.6520069137699251, + "grad_norm": 28.18378448486328, + "learning_rate": 6.516876937706048e-07, + "loss": 0.3491, + "num_input_tokens_seen": 32068288, + "step": 10185 + }, + { + "epoch": 0.652326995710902, + "grad_norm": 26.677705764770508, + "learning_rate": 6.506405143724196e-07, + "loss": 0.3769, + "num_input_tokens_seen": 32083200, + "step": 10190 + }, + { + "epoch": 0.6526470776518789, + "grad_norm": 50.78616714477539, + "learning_rate": 6.495937711536546e-07, + "loss": 0.4685, + "num_input_tokens_seen": 32098432, + "step": 10195 + }, + { + "epoch": 0.6529671595928558, + "grad_norm": 38.68675994873047, + "learning_rate": 6.485474654211803e-07, + "loss": 0.4177, + "num_input_tokens_seen": 32114944, + "step": 10200 + }, + { + "epoch": 0.6532872415338327, + "grad_norm": 40.70989227294922, + "learning_rate": 6.475015984813217e-07, + "loss": 0.3062, + "num_input_tokens_seen": 32131520, + "step": 10205 + }, + { + "epoch": 0.6536073234748095, + "grad_norm": 13.664650917053223, + "learning_rate": 6.464561716398564e-07, + "loss": 0.321, + "num_input_tokens_seen": 32147008, + "step": 10210 + }, + { + "epoch": 0.6539274054157864, + "grad_norm": 33.89069366455078, + "learning_rate": 6.454111862020122e-07, + "loss": 0.3851, + "num_input_tokens_seen": 32162560, + "step": 10215 + }, + { + "epoch": 0.6542474873567633, + "grad_norm": 28.84914207458496, + "learning_rate": 6.443666434724649e-07, + "loss": 0.3665, + "num_input_tokens_seen": 32177024, + "step": 10220 + }, + { + "epoch": 0.6545675692977402, + "grad_norm": 25.591217041015625, + "learning_rate": 6.43322544755339e-07, + "loss": 0.542, + "num_input_tokens_seen": 32193024, + "step": 10225 + }, + { + "epoch": 0.6548876512387171, + "grad_norm": 30.0502986907959, + "learning_rate": 6.422788913542038e-07, + "loss": 0.3447, + "num_input_tokens_seen": 32208896, + "step": 10230 + }, + { + "epoch": 0.655207733179694, + "grad_norm": 16.857473373413086, + "learning_rate": 6.412356845720726e-07, + "loss": 0.338, + "num_input_tokens_seen": 32225280, + "step": 10235 + }, + { + "epoch": 0.6555278151206709, + "grad_norm": 17.287302017211914, + "learning_rate": 6.40192925711402e-07, + "loss": 0.3601, + "num_input_tokens_seen": 32240768, + "step": 10240 + }, + { + "epoch": 0.6558478970616478, + "grad_norm": 27.92411994934082, + "learning_rate": 6.39150616074088e-07, + "loss": 0.3259, + "num_input_tokens_seen": 32255872, + "step": 10245 + }, + { + "epoch": 0.6561679790026247, + "grad_norm": 30.83510971069336, + "learning_rate": 6.381087569614668e-07, + "loss": 0.4068, + "num_input_tokens_seen": 32272512, + "step": 10250 + }, + { + "epoch": 0.6564880609436016, + "grad_norm": 14.433576583862305, + "learning_rate": 6.370673496743116e-07, + "loss": 0.3801, + "num_input_tokens_seen": 32286272, + "step": 10255 + }, + { + "epoch": 0.6568081428845784, + "grad_norm": 24.54606056213379, + "learning_rate": 6.360263955128315e-07, + "loss": 0.4224, + "num_input_tokens_seen": 32301952, + "step": 10260 + }, + { + "epoch": 0.6571282248255553, + "grad_norm": 18.52509307861328, + "learning_rate": 6.349858957766701e-07, + "loss": 0.3657, + "num_input_tokens_seen": 32318208, + "step": 10265 + }, + { + "epoch": 0.6574483067665322, + "grad_norm": 23.090232849121094, + "learning_rate": 6.339458517649036e-07, + "loss": 0.3385, + "num_input_tokens_seen": 32333504, + "step": 10270 + }, + { + "epoch": 0.6577683887075091, + "grad_norm": 33.056419372558594, + "learning_rate": 6.329062647760395e-07, + "loss": 0.3685, + "num_input_tokens_seen": 32350208, + "step": 10275 + }, + { + "epoch": 0.658088470648486, + "grad_norm": 35.26163864135742, + "learning_rate": 6.318671361080137e-07, + "loss": 0.3259, + "num_input_tokens_seen": 32365376, + "step": 10280 + }, + { + "epoch": 0.6584085525894628, + "grad_norm": 18.646900177001953, + "learning_rate": 6.308284670581906e-07, + "loss": 0.3411, + "num_input_tokens_seen": 32381248, + "step": 10285 + }, + { + "epoch": 0.6587286345304398, + "grad_norm": 28.427839279174805, + "learning_rate": 6.297902589233612e-07, + "loss": 0.47, + "num_input_tokens_seen": 32395968, + "step": 10290 + }, + { + "epoch": 0.6590487164714167, + "grad_norm": 32.31058883666992, + "learning_rate": 6.287525129997404e-07, + "loss": 0.3728, + "num_input_tokens_seen": 32411456, + "step": 10295 + }, + { + "epoch": 0.6593687984123936, + "grad_norm": 25.885282516479492, + "learning_rate": 6.277152305829656e-07, + "loss": 0.4016, + "num_input_tokens_seen": 32426880, + "step": 10300 + }, + { + "epoch": 0.6596888803533705, + "grad_norm": 29.73259925842285, + "learning_rate": 6.266784129680968e-07, + "loss": 0.326, + "num_input_tokens_seen": 32442368, + "step": 10305 + }, + { + "epoch": 0.6600089622943474, + "grad_norm": 39.80248260498047, + "learning_rate": 6.256420614496129e-07, + "loss": 0.3979, + "num_input_tokens_seen": 32457920, + "step": 10310 + }, + { + "epoch": 0.6603290442353242, + "grad_norm": 36.66291809082031, + "learning_rate": 6.246061773214102e-07, + "loss": 0.4182, + "num_input_tokens_seen": 32473536, + "step": 10315 + }, + { + "epoch": 0.6606491261763011, + "grad_norm": 38.01105499267578, + "learning_rate": 6.235707618768032e-07, + "loss": 0.4073, + "num_input_tokens_seen": 32490240, + "step": 10320 + }, + { + "epoch": 0.660969208117278, + "grad_norm": 57.50590515136719, + "learning_rate": 6.225358164085196e-07, + "loss": 0.344, + "num_input_tokens_seen": 32505728, + "step": 10325 + }, + { + "epoch": 0.6612892900582549, + "grad_norm": 46.767845153808594, + "learning_rate": 6.21501342208701e-07, + "loss": 0.3463, + "num_input_tokens_seen": 32520960, + "step": 10330 + }, + { + "epoch": 0.6616093719992318, + "grad_norm": 22.598268508911133, + "learning_rate": 6.204673405689007e-07, + "loss": 0.3945, + "num_input_tokens_seen": 32535872, + "step": 10335 + }, + { + "epoch": 0.6619294539402087, + "grad_norm": 21.846588134765625, + "learning_rate": 6.194338127800823e-07, + "loss": 0.3129, + "num_input_tokens_seen": 32552448, + "step": 10340 + }, + { + "epoch": 0.6622495358811856, + "grad_norm": 34.799537658691406, + "learning_rate": 6.184007601326165e-07, + "loss": 0.3936, + "num_input_tokens_seen": 32567232, + "step": 10345 + }, + { + "epoch": 0.6625696178221625, + "grad_norm": 30.42659568786621, + "learning_rate": 6.173681839162824e-07, + "loss": 0.37, + "num_input_tokens_seen": 32583360, + "step": 10350 + }, + { + "epoch": 0.6628896997631394, + "grad_norm": 31.33951187133789, + "learning_rate": 6.163360854202635e-07, + "loss": 0.3328, + "num_input_tokens_seen": 32598656, + "step": 10355 + }, + { + "epoch": 0.6632097817041163, + "grad_norm": 19.95844841003418, + "learning_rate": 6.153044659331461e-07, + "loss": 0.3189, + "num_input_tokens_seen": 32614144, + "step": 10360 + }, + { + "epoch": 0.6635298636450931, + "grad_norm": 32.167152404785156, + "learning_rate": 6.142733267429203e-07, + "loss": 0.3708, + "num_input_tokens_seen": 32629120, + "step": 10365 + }, + { + "epoch": 0.66384994558607, + "grad_norm": 25.17389678955078, + "learning_rate": 6.132426691369748e-07, + "loss": 0.4218, + "num_input_tokens_seen": 32645952, + "step": 10370 + }, + { + "epoch": 0.6641700275270469, + "grad_norm": 13.657832145690918, + "learning_rate": 6.122124944020977e-07, + "loss": 0.3955, + "num_input_tokens_seen": 32661696, + "step": 10375 + }, + { + "epoch": 0.6644901094680238, + "grad_norm": 24.945608139038086, + "learning_rate": 6.111828038244749e-07, + "loss": 0.3779, + "num_input_tokens_seen": 32677760, + "step": 10380 + }, + { + "epoch": 0.6648101914090007, + "grad_norm": 15.53358268737793, + "learning_rate": 6.101535986896866e-07, + "loss": 0.3063, + "num_input_tokens_seen": 32693568, + "step": 10385 + }, + { + "epoch": 0.6651302733499775, + "grad_norm": 16.71603775024414, + "learning_rate": 6.091248802827076e-07, + "loss": 0.2929, + "num_input_tokens_seen": 32708736, + "step": 10390 + }, + { + "epoch": 0.6654503552909545, + "grad_norm": 23.295944213867188, + "learning_rate": 6.080966498879048e-07, + "loss": 0.3258, + "num_input_tokens_seen": 32725440, + "step": 10395 + }, + { + "epoch": 0.6657704372319314, + "grad_norm": 39.92107009887695, + "learning_rate": 6.070689087890363e-07, + "loss": 0.293, + "num_input_tokens_seen": 32740608, + "step": 10400 + }, + { + "epoch": 0.6660905191729083, + "grad_norm": 21.400508880615234, + "learning_rate": 6.060416582692487e-07, + "loss": 0.4026, + "num_input_tokens_seen": 32756416, + "step": 10405 + }, + { + "epoch": 0.6664106011138852, + "grad_norm": 31.731203079223633, + "learning_rate": 6.05014899611076e-07, + "loss": 0.3334, + "num_input_tokens_seen": 32771904, + "step": 10410 + }, + { + "epoch": 0.666730683054862, + "grad_norm": 53.159175872802734, + "learning_rate": 6.039886340964391e-07, + "loss": 0.3801, + "num_input_tokens_seen": 32787392, + "step": 10415 + }, + { + "epoch": 0.6670507649958389, + "grad_norm": 21.526613235473633, + "learning_rate": 6.029628630066423e-07, + "loss": 0.3367, + "num_input_tokens_seen": 32803136, + "step": 10420 + }, + { + "epoch": 0.6673708469368158, + "grad_norm": 33.03938293457031, + "learning_rate": 6.019375876223724e-07, + "loss": 0.4266, + "num_input_tokens_seen": 32818624, + "step": 10425 + }, + { + "epoch": 0.6676909288777927, + "grad_norm": 28.526151657104492, + "learning_rate": 6.009128092236982e-07, + "loss": 0.4689, + "num_input_tokens_seen": 32833920, + "step": 10430 + }, + { + "epoch": 0.6680110108187696, + "grad_norm": 19.832090377807617, + "learning_rate": 5.998885290900679e-07, + "loss": 0.3876, + "num_input_tokens_seen": 32848512, + "step": 10435 + }, + { + "epoch": 0.6683310927597464, + "grad_norm": 27.41183853149414, + "learning_rate": 5.988647485003061e-07, + "loss": 0.3414, + "num_input_tokens_seen": 32865088, + "step": 10440 + }, + { + "epoch": 0.6686511747007234, + "grad_norm": 63.17194366455078, + "learning_rate": 5.978414687326164e-07, + "loss": 0.4652, + "num_input_tokens_seen": 32882048, + "step": 10445 + }, + { + "epoch": 0.6689712566417003, + "grad_norm": 34.8876953125, + "learning_rate": 5.968186910645745e-07, + "loss": 0.3775, + "num_input_tokens_seen": 32898624, + "step": 10450 + }, + { + "epoch": 0.6692913385826772, + "grad_norm": 36.981510162353516, + "learning_rate": 5.957964167731305e-07, + "loss": 0.5049, + "num_input_tokens_seen": 32914176, + "step": 10455 + }, + { + "epoch": 0.6696114205236541, + "grad_norm": 41.19829559326172, + "learning_rate": 5.947746471346065e-07, + "loss": 0.4117, + "num_input_tokens_seen": 32931136, + "step": 10460 + }, + { + "epoch": 0.669931502464631, + "grad_norm": 48.431705474853516, + "learning_rate": 5.937533834246932e-07, + "loss": 0.3321, + "num_input_tokens_seen": 32947648, + "step": 10465 + }, + { + "epoch": 0.6702515844056078, + "grad_norm": 25.455169677734375, + "learning_rate": 5.927326269184504e-07, + "loss": 0.3795, + "num_input_tokens_seen": 32964224, + "step": 10470 + }, + { + "epoch": 0.6705716663465847, + "grad_norm": 49.933773040771484, + "learning_rate": 5.917123788903049e-07, + "loss": 0.4602, + "num_input_tokens_seen": 32982080, + "step": 10475 + }, + { + "epoch": 0.6708917482875616, + "grad_norm": 38.94703674316406, + "learning_rate": 5.906926406140484e-07, + "loss": 0.4674, + "num_input_tokens_seen": 32997440, + "step": 10480 + }, + { + "epoch": 0.6712118302285385, + "grad_norm": 37.308963775634766, + "learning_rate": 5.896734133628354e-07, + "loss": 0.424, + "num_input_tokens_seen": 33013056, + "step": 10485 + }, + { + "epoch": 0.6715319121695154, + "grad_norm": 24.161361694335938, + "learning_rate": 5.886546984091838e-07, + "loss": 0.3804, + "num_input_tokens_seen": 33028416, + "step": 10490 + }, + { + "epoch": 0.6718519941104922, + "grad_norm": 31.681415557861328, + "learning_rate": 5.876364970249711e-07, + "loss": 0.3567, + "num_input_tokens_seen": 33042880, + "step": 10495 + }, + { + "epoch": 0.6721720760514692, + "grad_norm": 34.7954216003418, + "learning_rate": 5.866188104814336e-07, + "loss": 0.2744, + "num_input_tokens_seen": 33058240, + "step": 10500 + }, + { + "epoch": 0.6724921579924461, + "grad_norm": 19.30687141418457, + "learning_rate": 5.856016400491646e-07, + "loss": 0.3833, + "num_input_tokens_seen": 33073920, + "step": 10505 + }, + { + "epoch": 0.672812239933423, + "grad_norm": 9.509024620056152, + "learning_rate": 5.845849869981136e-07, + "loss": 0.3158, + "num_input_tokens_seen": 33089344, + "step": 10510 + }, + { + "epoch": 0.6731323218743999, + "grad_norm": 23.290632247924805, + "learning_rate": 5.835688525975842e-07, + "loss": 0.3608, + "num_input_tokens_seen": 33104384, + "step": 10515 + }, + { + "epoch": 0.6734524038153767, + "grad_norm": 24.240638732910156, + "learning_rate": 5.825532381162311e-07, + "loss": 0.3926, + "num_input_tokens_seen": 33120064, + "step": 10520 + }, + { + "epoch": 0.6737724857563536, + "grad_norm": 24.758691787719727, + "learning_rate": 5.815381448220619e-07, + "loss": 0.3889, + "num_input_tokens_seen": 33136128, + "step": 10525 + }, + { + "epoch": 0.6740925676973305, + "grad_norm": 29.978185653686523, + "learning_rate": 5.805235739824327e-07, + "loss": 0.3599, + "num_input_tokens_seen": 33154816, + "step": 10530 + }, + { + "epoch": 0.6744126496383074, + "grad_norm": 44.5728759765625, + "learning_rate": 5.795095268640458e-07, + "loss": 0.5053, + "num_input_tokens_seen": 33169920, + "step": 10535 + }, + { + "epoch": 0.6747327315792843, + "grad_norm": 37.81836700439453, + "learning_rate": 5.784960047329519e-07, + "loss": 0.5436, + "num_input_tokens_seen": 33187712, + "step": 10540 + }, + { + "epoch": 0.6750528135202611, + "grad_norm": 14.268577575683594, + "learning_rate": 5.774830088545452e-07, + "loss": 0.3931, + "num_input_tokens_seen": 33202880, + "step": 10545 + }, + { + "epoch": 0.6753728954612381, + "grad_norm": 17.859638214111328, + "learning_rate": 5.76470540493563e-07, + "loss": 0.3059, + "num_input_tokens_seen": 33218944, + "step": 10550 + }, + { + "epoch": 0.675692977402215, + "grad_norm": 27.35489845275879, + "learning_rate": 5.754586009140836e-07, + "loss": 0.4468, + "num_input_tokens_seen": 33234688, + "step": 10555 + }, + { + "epoch": 0.6760130593431919, + "grad_norm": 52.45825958251953, + "learning_rate": 5.744471913795256e-07, + "loss": 0.3582, + "num_input_tokens_seen": 33249920, + "step": 10560 + }, + { + "epoch": 0.6763331412841688, + "grad_norm": 35.08700180053711, + "learning_rate": 5.734363131526459e-07, + "loss": 0.3455, + "num_input_tokens_seen": 33265792, + "step": 10565 + }, + { + "epoch": 0.6766532232251457, + "grad_norm": 40.150508880615234, + "learning_rate": 5.724259674955377e-07, + "loss": 0.3779, + "num_input_tokens_seen": 33280832, + "step": 10570 + }, + { + "epoch": 0.6769733051661225, + "grad_norm": 30.927886962890625, + "learning_rate": 5.714161556696291e-07, + "loss": 0.3829, + "num_input_tokens_seen": 33296576, + "step": 10575 + }, + { + "epoch": 0.6772933871070994, + "grad_norm": 44.968849182128906, + "learning_rate": 5.704068789356824e-07, + "loss": 0.3425, + "num_input_tokens_seen": 33316672, + "step": 10580 + }, + { + "epoch": 0.6776134690480763, + "grad_norm": 28.765954971313477, + "learning_rate": 5.693981385537912e-07, + "loss": 0.3569, + "num_input_tokens_seen": 33331456, + "step": 10585 + }, + { + "epoch": 0.6779335509890532, + "grad_norm": 26.096681594848633, + "learning_rate": 5.683899357833801e-07, + "loss": 0.3483, + "num_input_tokens_seen": 33346752, + "step": 10590 + }, + { + "epoch": 0.67825363293003, + "grad_norm": 31.515544891357422, + "learning_rate": 5.673822718832015e-07, + "loss": 0.4486, + "num_input_tokens_seen": 33362688, + "step": 10595 + }, + { + "epoch": 0.6785737148710069, + "grad_norm": 40.024139404296875, + "learning_rate": 5.663751481113362e-07, + "loss": 0.3732, + "num_input_tokens_seen": 33377600, + "step": 10600 + }, + { + "epoch": 0.6788937968119839, + "grad_norm": 26.293109893798828, + "learning_rate": 5.653685657251896e-07, + "loss": 0.4346, + "num_input_tokens_seen": 33393280, + "step": 10605 + }, + { + "epoch": 0.6792138787529608, + "grad_norm": 42.274269104003906, + "learning_rate": 5.643625259814922e-07, + "loss": 0.378, + "num_input_tokens_seen": 33410112, + "step": 10610 + }, + { + "epoch": 0.6795339606939377, + "grad_norm": 18.171001434326172, + "learning_rate": 5.633570301362953e-07, + "loss": 0.3557, + "num_input_tokens_seen": 33426624, + "step": 10615 + }, + { + "epoch": 0.6798540426349146, + "grad_norm": 36.791378021240234, + "learning_rate": 5.623520794449739e-07, + "loss": 0.3642, + "num_input_tokens_seen": 33442240, + "step": 10620 + }, + { + "epoch": 0.6801741245758914, + "grad_norm": 39.23925018310547, + "learning_rate": 5.613476751622195e-07, + "loss": 0.4764, + "num_input_tokens_seen": 33458432, + "step": 10625 + }, + { + "epoch": 0.6804942065168683, + "grad_norm": 27.09739112854004, + "learning_rate": 5.603438185420426e-07, + "loss": 0.4373, + "num_input_tokens_seen": 33473856, + "step": 10630 + }, + { + "epoch": 0.6808142884578452, + "grad_norm": 51.08125305175781, + "learning_rate": 5.593405108377714e-07, + "loss": 0.473, + "num_input_tokens_seen": 33489216, + "step": 10635 + }, + { + "epoch": 0.6811343703988221, + "grad_norm": 24.226774215698242, + "learning_rate": 5.583377533020457e-07, + "loss": 0.4676, + "num_input_tokens_seen": 33505280, + "step": 10640 + }, + { + "epoch": 0.681454452339799, + "grad_norm": 38.91953659057617, + "learning_rate": 5.573355471868201e-07, + "loss": 0.2929, + "num_input_tokens_seen": 33520512, + "step": 10645 + }, + { + "epoch": 0.6817745342807758, + "grad_norm": 25.037273406982422, + "learning_rate": 5.563338937433621e-07, + "loss": 0.3535, + "num_input_tokens_seen": 33537344, + "step": 10650 + }, + { + "epoch": 0.6820946162217527, + "grad_norm": 17.132577896118164, + "learning_rate": 5.553327942222472e-07, + "loss": 0.2518, + "num_input_tokens_seen": 33552128, + "step": 10655 + }, + { + "epoch": 0.6824146981627297, + "grad_norm": 27.395191192626953, + "learning_rate": 5.54332249873359e-07, + "loss": 0.3535, + "num_input_tokens_seen": 33566784, + "step": 10660 + }, + { + "epoch": 0.6827347801037066, + "grad_norm": 21.259674072265625, + "learning_rate": 5.533322619458896e-07, + "loss": 0.2955, + "num_input_tokens_seen": 33582080, + "step": 10665 + }, + { + "epoch": 0.6830548620446835, + "grad_norm": 45.052799224853516, + "learning_rate": 5.52332831688336e-07, + "loss": 0.4268, + "num_input_tokens_seen": 33596864, + "step": 10670 + }, + { + "epoch": 0.6833749439856603, + "grad_norm": 72.60736083984375, + "learning_rate": 5.513339603484981e-07, + "loss": 0.3527, + "num_input_tokens_seen": 33613056, + "step": 10675 + }, + { + "epoch": 0.6836950259266372, + "grad_norm": 67.02263641357422, + "learning_rate": 5.503356491734785e-07, + "loss": 0.4979, + "num_input_tokens_seen": 33628160, + "step": 10680 + }, + { + "epoch": 0.6840151078676141, + "grad_norm": 19.969636917114258, + "learning_rate": 5.493378994096806e-07, + "loss": 0.4457, + "num_input_tokens_seen": 33645184, + "step": 10685 + }, + { + "epoch": 0.684335189808591, + "grad_norm": 20.564537048339844, + "learning_rate": 5.483407123028067e-07, + "loss": 0.39, + "num_input_tokens_seen": 33660800, + "step": 10690 + }, + { + "epoch": 0.6846552717495679, + "grad_norm": 39.593467712402344, + "learning_rate": 5.473440890978566e-07, + "loss": 0.4734, + "num_input_tokens_seen": 33676736, + "step": 10695 + }, + { + "epoch": 0.6849753536905447, + "grad_norm": 25.922780990600586, + "learning_rate": 5.463480310391261e-07, + "loss": 0.4094, + "num_input_tokens_seen": 33692928, + "step": 10700 + }, + { + "epoch": 0.6852954356315216, + "grad_norm": 24.099258422851562, + "learning_rate": 5.453525393702052e-07, + "loss": 0.3824, + "num_input_tokens_seen": 33708352, + "step": 10705 + }, + { + "epoch": 0.6856155175724986, + "grad_norm": 37.71764373779297, + "learning_rate": 5.443576153339771e-07, + "loss": 0.3687, + "num_input_tokens_seen": 33723968, + "step": 10710 + }, + { + "epoch": 0.6859355995134755, + "grad_norm": 50.91608428955078, + "learning_rate": 5.433632601726159e-07, + "loss": 0.3238, + "num_input_tokens_seen": 33739200, + "step": 10715 + }, + { + "epoch": 0.6862556814544524, + "grad_norm": 34.224552154541016, + "learning_rate": 5.42369475127586e-07, + "loss": 0.3306, + "num_input_tokens_seen": 33754944, + "step": 10720 + }, + { + "epoch": 0.6865757633954293, + "grad_norm": 64.74101257324219, + "learning_rate": 5.413762614396396e-07, + "loss": 0.4715, + "num_input_tokens_seen": 33769472, + "step": 10725 + }, + { + "epoch": 0.6868958453364061, + "grad_norm": 37.43178939819336, + "learning_rate": 5.403836203488157e-07, + "loss": 0.4267, + "num_input_tokens_seen": 33784896, + "step": 10730 + }, + { + "epoch": 0.687215927277383, + "grad_norm": 20.12882423400879, + "learning_rate": 5.393915530944382e-07, + "loss": 0.3686, + "num_input_tokens_seen": 33800320, + "step": 10735 + }, + { + "epoch": 0.6875360092183599, + "grad_norm": 26.698909759521484, + "learning_rate": 5.384000609151145e-07, + "loss": 0.3743, + "num_input_tokens_seen": 33816896, + "step": 10740 + }, + { + "epoch": 0.6878560911593368, + "grad_norm": 22.242435455322266, + "learning_rate": 5.374091450487353e-07, + "loss": 0.3655, + "num_input_tokens_seen": 33833344, + "step": 10745 + }, + { + "epoch": 0.6881761731003136, + "grad_norm": 31.46050453186035, + "learning_rate": 5.364188067324693e-07, + "loss": 0.3346, + "num_input_tokens_seen": 33849856, + "step": 10750 + }, + { + "epoch": 0.6884962550412905, + "grad_norm": 16.067712783813477, + "learning_rate": 5.354290472027659e-07, + "loss": 0.3566, + "num_input_tokens_seen": 33865344, + "step": 10755 + }, + { + "epoch": 0.6888163369822674, + "grad_norm": 67.90633392333984, + "learning_rate": 5.344398676953525e-07, + "loss": 0.4921, + "num_input_tokens_seen": 33881792, + "step": 10760 + }, + { + "epoch": 0.6891364189232444, + "grad_norm": 33.24453353881836, + "learning_rate": 5.334512694452303e-07, + "loss": 0.4873, + "num_input_tokens_seen": 33898368, + "step": 10765 + }, + { + "epoch": 0.6894565008642213, + "grad_norm": 22.773826599121094, + "learning_rate": 5.324632536866755e-07, + "loss": 0.345, + "num_input_tokens_seen": 33914368, + "step": 10770 + }, + { + "epoch": 0.6897765828051982, + "grad_norm": 40.114479064941406, + "learning_rate": 5.314758216532386e-07, + "loss": 0.349, + "num_input_tokens_seen": 33929728, + "step": 10775 + }, + { + "epoch": 0.690096664746175, + "grad_norm": 21.76487159729004, + "learning_rate": 5.304889745777396e-07, + "loss": 0.3866, + "num_input_tokens_seen": 33944704, + "step": 10780 + }, + { + "epoch": 0.6904167466871519, + "grad_norm": 33.12062454223633, + "learning_rate": 5.295027136922678e-07, + "loss": 0.6472, + "num_input_tokens_seen": 33960128, + "step": 10785 + }, + { + "epoch": 0.6907368286281288, + "grad_norm": 23.689847946166992, + "learning_rate": 5.285170402281827e-07, + "loss": 0.4201, + "num_input_tokens_seen": 33975104, + "step": 10790 + }, + { + "epoch": 0.6910569105691057, + "grad_norm": 36.35055923461914, + "learning_rate": 5.275319554161087e-07, + "loss": 0.459, + "num_input_tokens_seen": 33990720, + "step": 10795 + }, + { + "epoch": 0.6913769925100826, + "grad_norm": 32.871768951416016, + "learning_rate": 5.265474604859356e-07, + "loss": 0.4207, + "num_input_tokens_seen": 34006272, + "step": 10800 + }, + { + "epoch": 0.6916970744510594, + "grad_norm": 27.291601181030273, + "learning_rate": 5.255635566668171e-07, + "loss": 0.3828, + "num_input_tokens_seen": 34022400, + "step": 10805 + }, + { + "epoch": 0.6920171563920363, + "grad_norm": 24.551733016967773, + "learning_rate": 5.245802451871686e-07, + "loss": 0.3811, + "num_input_tokens_seen": 34038720, + "step": 10810 + }, + { + "epoch": 0.6923372383330133, + "grad_norm": 24.757070541381836, + "learning_rate": 5.235975272746663e-07, + "loss": 0.4381, + "num_input_tokens_seen": 34053760, + "step": 10815 + }, + { + "epoch": 0.6926573202739902, + "grad_norm": 23.8084716796875, + "learning_rate": 5.226154041562442e-07, + "loss": 0.3033, + "num_input_tokens_seen": 34069568, + "step": 10820 + }, + { + "epoch": 0.6929774022149671, + "grad_norm": 26.603805541992188, + "learning_rate": 5.216338770580953e-07, + "loss": 0.4078, + "num_input_tokens_seen": 34086912, + "step": 10825 + }, + { + "epoch": 0.6932974841559439, + "grad_norm": 21.865713119506836, + "learning_rate": 5.206529472056678e-07, + "loss": 0.359, + "num_input_tokens_seen": 34101696, + "step": 10830 + }, + { + "epoch": 0.6936175660969208, + "grad_norm": 14.517900466918945, + "learning_rate": 5.196726158236637e-07, + "loss": 0.3084, + "num_input_tokens_seen": 34115904, + "step": 10835 + }, + { + "epoch": 0.6939376480378977, + "grad_norm": 25.503976821899414, + "learning_rate": 5.186928841360384e-07, + "loss": 0.3404, + "num_input_tokens_seen": 34131328, + "step": 10840 + }, + { + "epoch": 0.6942577299788746, + "grad_norm": 30.737321853637695, + "learning_rate": 5.177137533659985e-07, + "loss": 0.4466, + "num_input_tokens_seen": 34148544, + "step": 10845 + }, + { + "epoch": 0.6945778119198515, + "grad_norm": 22.222187042236328, + "learning_rate": 5.167352247360002e-07, + "loss": 0.4562, + "num_input_tokens_seen": 34163520, + "step": 10850 + }, + { + "epoch": 0.6948978938608283, + "grad_norm": 30.551633834838867, + "learning_rate": 5.157572994677479e-07, + "loss": 0.398, + "num_input_tokens_seen": 34178368, + "step": 10855 + }, + { + "epoch": 0.6952179758018052, + "grad_norm": 34.05165100097656, + "learning_rate": 5.147799787821929e-07, + "loss": 0.4086, + "num_input_tokens_seen": 34193920, + "step": 10860 + }, + { + "epoch": 0.6955380577427821, + "grad_norm": 39.741065979003906, + "learning_rate": 5.138032638995315e-07, + "loss": 0.4939, + "num_input_tokens_seen": 34210176, + "step": 10865 + }, + { + "epoch": 0.6958581396837591, + "grad_norm": 55.47737503051758, + "learning_rate": 5.128271560392037e-07, + "loss": 0.3602, + "num_input_tokens_seen": 34227328, + "step": 10870 + }, + { + "epoch": 0.696178221624736, + "grad_norm": 31.166147232055664, + "learning_rate": 5.118516564198916e-07, + "loss": 0.3959, + "num_input_tokens_seen": 34241984, + "step": 10875 + }, + { + "epoch": 0.6964983035657129, + "grad_norm": 22.45415496826172, + "learning_rate": 5.108767662595175e-07, + "loss": 0.3339, + "num_input_tokens_seen": 34256896, + "step": 10880 + }, + { + "epoch": 0.6968183855066897, + "grad_norm": 20.498132705688477, + "learning_rate": 5.099024867752446e-07, + "loss": 0.3904, + "num_input_tokens_seen": 34273792, + "step": 10885 + }, + { + "epoch": 0.6971384674476666, + "grad_norm": 37.48936462402344, + "learning_rate": 5.089288191834709e-07, + "loss": 0.3381, + "num_input_tokens_seen": 34290752, + "step": 10890 + }, + { + "epoch": 0.6974585493886435, + "grad_norm": 34.213294982910156, + "learning_rate": 5.079557646998318e-07, + "loss": 0.3422, + "num_input_tokens_seen": 34308416, + "step": 10895 + }, + { + "epoch": 0.6977786313296204, + "grad_norm": 14.548563003540039, + "learning_rate": 5.069833245391981e-07, + "loss": 0.3981, + "num_input_tokens_seen": 34323776, + "step": 10900 + }, + { + "epoch": 0.6980987132705972, + "grad_norm": 24.348325729370117, + "learning_rate": 5.060114999156728e-07, + "loss": 0.2941, + "num_input_tokens_seen": 34338944, + "step": 10905 + }, + { + "epoch": 0.6984187952115741, + "grad_norm": 34.29429626464844, + "learning_rate": 5.050402920425895e-07, + "loss": 0.3407, + "num_input_tokens_seen": 34354432, + "step": 10910 + }, + { + "epoch": 0.698738877152551, + "grad_norm": 18.496606826782227, + "learning_rate": 5.040697021325128e-07, + "loss": 0.2503, + "num_input_tokens_seen": 34370432, + "step": 10915 + }, + { + "epoch": 0.699058959093528, + "grad_norm": 33.679046630859375, + "learning_rate": 5.030997313972361e-07, + "loss": 0.4438, + "num_input_tokens_seen": 34386496, + "step": 10920 + }, + { + "epoch": 0.6993790410345049, + "grad_norm": 22.789405822753906, + "learning_rate": 5.021303810477795e-07, + "loss": 0.3692, + "num_input_tokens_seen": 34402560, + "step": 10925 + }, + { + "epoch": 0.6996991229754818, + "grad_norm": 17.632328033447266, + "learning_rate": 5.011616522943869e-07, + "loss": 0.2937, + "num_input_tokens_seen": 34418496, + "step": 10930 + }, + { + "epoch": 0.7000192049164586, + "grad_norm": 48.676658630371094, + "learning_rate": 5.001935463465289e-07, + "loss": 0.2772, + "num_input_tokens_seen": 34434752, + "step": 10935 + }, + { + "epoch": 0.7003392868574355, + "grad_norm": 25.778039932250977, + "learning_rate": 4.99226064412897e-07, + "loss": 0.3775, + "num_input_tokens_seen": 34450176, + "step": 10940 + }, + { + "epoch": 0.7006593687984124, + "grad_norm": 19.5657958984375, + "learning_rate": 4.982592077014026e-07, + "loss": 0.4286, + "num_input_tokens_seen": 34465600, + "step": 10945 + }, + { + "epoch": 0.7008514179629985, + "eval_loss": 0.3744131922721863, + "eval_runtime": 50.689, + "eval_samples_per_second": 273.945, + "eval_steps_per_second": 34.248, + "num_input_tokens_seen": 34475136, + "step": 10948 + }, + { + "epoch": 0.7009794507393893, + "grad_norm": 34.119754791259766, + "learning_rate": 4.97292977419179e-07, + "loss": 0.2973, + "num_input_tokens_seen": 34481600, + "step": 10950 + }, + { + "epoch": 0.7012995326803662, + "grad_norm": 21.208982467651367, + "learning_rate": 4.963273747725755e-07, + "loss": 0.2881, + "num_input_tokens_seen": 34498752, + "step": 10955 + }, + { + "epoch": 0.701619614621343, + "grad_norm": 26.19671630859375, + "learning_rate": 4.953624009671582e-07, + "loss": 0.413, + "num_input_tokens_seen": 34514240, + "step": 10960 + }, + { + "epoch": 0.7019396965623199, + "grad_norm": 44.88737487792969, + "learning_rate": 4.943980572077086e-07, + "loss": 0.4164, + "num_input_tokens_seen": 34528704, + "step": 10965 + }, + { + "epoch": 0.7022597785032968, + "grad_norm": 38.300411224365234, + "learning_rate": 4.934343446982209e-07, + "loss": 0.3207, + "num_input_tokens_seen": 34544704, + "step": 10970 + }, + { + "epoch": 0.7025798604442738, + "grad_norm": 13.832147598266602, + "learning_rate": 4.924712646419016e-07, + "loss": 0.3836, + "num_input_tokens_seen": 34560000, + "step": 10975 + }, + { + "epoch": 0.7028999423852507, + "grad_norm": 70.15164947509766, + "learning_rate": 4.915088182411674e-07, + "loss": 0.3222, + "num_input_tokens_seen": 34575296, + "step": 10980 + }, + { + "epoch": 0.7032200243262275, + "grad_norm": 35.194576263427734, + "learning_rate": 4.905470066976439e-07, + "loss": 0.3897, + "num_input_tokens_seen": 34590528, + "step": 10985 + }, + { + "epoch": 0.7035401062672044, + "grad_norm": 37.24507141113281, + "learning_rate": 4.895858312121644e-07, + "loss": 0.4156, + "num_input_tokens_seen": 34605312, + "step": 10990 + }, + { + "epoch": 0.7038601882081813, + "grad_norm": 25.404296875, + "learning_rate": 4.886252929847674e-07, + "loss": 0.4342, + "num_input_tokens_seen": 34620736, + "step": 10995 + }, + { + "epoch": 0.7041802701491582, + "grad_norm": 41.86030578613281, + "learning_rate": 4.876653932146963e-07, + "loss": 0.4627, + "num_input_tokens_seen": 34636736, + "step": 11000 + }, + { + "epoch": 0.7045003520901351, + "grad_norm": 31.331430435180664, + "learning_rate": 4.86706133100397e-07, + "loss": 0.3895, + "num_input_tokens_seen": 34651776, + "step": 11005 + }, + { + "epoch": 0.7048204340311119, + "grad_norm": 46.46551513671875, + "learning_rate": 4.857475138395178e-07, + "loss": 0.2889, + "num_input_tokens_seen": 34666176, + "step": 11010 + }, + { + "epoch": 0.7051405159720888, + "grad_norm": 15.934460639953613, + "learning_rate": 4.847895366289054e-07, + "loss": 0.2493, + "num_input_tokens_seen": 34682112, + "step": 11015 + }, + { + "epoch": 0.7054605979130657, + "grad_norm": 33.465232849121094, + "learning_rate": 4.838322026646057e-07, + "loss": 0.3825, + "num_input_tokens_seen": 34697024, + "step": 11020 + }, + { + "epoch": 0.7057806798540426, + "grad_norm": 22.186031341552734, + "learning_rate": 4.82875513141861e-07, + "loss": 0.371, + "num_input_tokens_seen": 34712704, + "step": 11025 + }, + { + "epoch": 0.7061007617950196, + "grad_norm": 28.25750160217285, + "learning_rate": 4.819194692551106e-07, + "loss": 0.375, + "num_input_tokens_seen": 34728256, + "step": 11030 + }, + { + "epoch": 0.7064208437359965, + "grad_norm": 17.980060577392578, + "learning_rate": 4.809640721979855e-07, + "loss": 0.435, + "num_input_tokens_seen": 34744512, + "step": 11035 + }, + { + "epoch": 0.7067409256769733, + "grad_norm": 46.3629264831543, + "learning_rate": 4.8000932316331e-07, + "loss": 0.4181, + "num_input_tokens_seen": 34758912, + "step": 11040 + }, + { + "epoch": 0.7070610076179502, + "grad_norm": 29.38511085510254, + "learning_rate": 4.790552233431002e-07, + "loss": 0.3914, + "num_input_tokens_seen": 34774848, + "step": 11045 + }, + { + "epoch": 0.7073810895589271, + "grad_norm": 34.6492805480957, + "learning_rate": 4.781017739285611e-07, + "loss": 0.416, + "num_input_tokens_seen": 34790016, + "step": 11050 + }, + { + "epoch": 0.707701171499904, + "grad_norm": 14.354450225830078, + "learning_rate": 4.771489761100842e-07, + "loss": 0.3528, + "num_input_tokens_seen": 34804992, + "step": 11055 + }, + { + "epoch": 0.7080212534408808, + "grad_norm": 39.80025863647461, + "learning_rate": 4.761968310772501e-07, + "loss": 0.2746, + "num_input_tokens_seen": 34820288, + "step": 11060 + }, + { + "epoch": 0.7083413353818577, + "grad_norm": 40.38865661621094, + "learning_rate": 4.7524534001882267e-07, + "loss": 0.2814, + "num_input_tokens_seen": 34836096, + "step": 11065 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 29.79903221130371, + "learning_rate": 4.7429450412274897e-07, + "loss": 0.3875, + "num_input_tokens_seen": 34851584, + "step": 11070 + }, + { + "epoch": 0.7089814992638115, + "grad_norm": 23.854610443115234, + "learning_rate": 4.733443245761596e-07, + "loss": 0.3542, + "num_input_tokens_seen": 34868032, + "step": 11075 + }, + { + "epoch": 0.7093015812047885, + "grad_norm": 26.83568572998047, + "learning_rate": 4.723948025653646e-07, + "loss": 0.3826, + "num_input_tokens_seen": 34884032, + "step": 11080 + }, + { + "epoch": 0.7096216631457654, + "grad_norm": 31.357860565185547, + "learning_rate": 4.714459392758534e-07, + "loss": 0.3252, + "num_input_tokens_seen": 34899456, + "step": 11085 + }, + { + "epoch": 0.7099417450867422, + "grad_norm": 50.6710205078125, + "learning_rate": 4.70497735892293e-07, + "loss": 0.3772, + "num_input_tokens_seen": 34915456, + "step": 11090 + }, + { + "epoch": 0.7102618270277191, + "grad_norm": 17.10684585571289, + "learning_rate": 4.695501935985263e-07, + "loss": 0.3408, + "num_input_tokens_seen": 34931328, + "step": 11095 + }, + { + "epoch": 0.710581908968696, + "grad_norm": 37.1428337097168, + "learning_rate": 4.686033135775711e-07, + "loss": 0.4064, + "num_input_tokens_seen": 34946816, + "step": 11100 + }, + { + "epoch": 0.7109019909096729, + "grad_norm": 25.48462677001953, + "learning_rate": 4.6765709701161817e-07, + "loss": 0.3274, + "num_input_tokens_seen": 34964544, + "step": 11105 + }, + { + "epoch": 0.7112220728506498, + "grad_norm": 88.31197357177734, + "learning_rate": 4.6671154508203003e-07, + "loss": 0.3861, + "num_input_tokens_seen": 34982208, + "step": 11110 + }, + { + "epoch": 0.7115421547916266, + "grad_norm": 39.50213623046875, + "learning_rate": 4.657666589693393e-07, + "loss": 0.3523, + "num_input_tokens_seen": 35000576, + "step": 11115 + }, + { + "epoch": 0.7118622367326035, + "grad_norm": 26.08376693725586, + "learning_rate": 4.6482243985324753e-07, + "loss": 0.3167, + "num_input_tokens_seen": 35014912, + "step": 11120 + }, + { + "epoch": 0.7121823186735804, + "grad_norm": 29.463659286499023, + "learning_rate": 4.638788889126232e-07, + "loss": 0.2867, + "num_input_tokens_seen": 35029632, + "step": 11125 + }, + { + "epoch": 0.7125024006145573, + "grad_norm": 28.548364639282227, + "learning_rate": 4.6293600732550085e-07, + "loss": 0.3423, + "num_input_tokens_seen": 35044992, + "step": 11130 + }, + { + "epoch": 0.7128224825555343, + "grad_norm": 18.932619094848633, + "learning_rate": 4.619937962690792e-07, + "loss": 0.4721, + "num_input_tokens_seen": 35060544, + "step": 11135 + }, + { + "epoch": 0.7131425644965111, + "grad_norm": 56.20754623413086, + "learning_rate": 4.610522569197197e-07, + "loss": 0.5205, + "num_input_tokens_seen": 35075648, + "step": 11140 + }, + { + "epoch": 0.713462646437488, + "grad_norm": 21.114213943481445, + "learning_rate": 4.6011139045294554e-07, + "loss": 0.3271, + "num_input_tokens_seen": 35090880, + "step": 11145 + }, + { + "epoch": 0.7137827283784649, + "grad_norm": 89.78885650634766, + "learning_rate": 4.59171198043439e-07, + "loss": 0.3935, + "num_input_tokens_seen": 35106432, + "step": 11150 + }, + { + "epoch": 0.7141028103194418, + "grad_norm": 29.25298309326172, + "learning_rate": 4.582316808650424e-07, + "loss": 0.4446, + "num_input_tokens_seen": 35121664, + "step": 11155 + }, + { + "epoch": 0.7144228922604187, + "grad_norm": 42.61500549316406, + "learning_rate": 4.572928400907529e-07, + "loss": 0.4704, + "num_input_tokens_seen": 35137152, + "step": 11160 + }, + { + "epoch": 0.7147429742013955, + "grad_norm": 41.548580169677734, + "learning_rate": 4.5635467689272434e-07, + "loss": 0.3787, + "num_input_tokens_seen": 35153088, + "step": 11165 + }, + { + "epoch": 0.7150630561423724, + "grad_norm": 23.75127410888672, + "learning_rate": 4.554171924422655e-07, + "loss": 0.3674, + "num_input_tokens_seen": 35168192, + "step": 11170 + }, + { + "epoch": 0.7153831380833493, + "grad_norm": 23.422161102294922, + "learning_rate": 4.544803879098356e-07, + "loss": 0.3288, + "num_input_tokens_seen": 35184192, + "step": 11175 + }, + { + "epoch": 0.7157032200243262, + "grad_norm": 24.220752716064453, + "learning_rate": 4.535442644650462e-07, + "loss": 0.3703, + "num_input_tokens_seen": 35200256, + "step": 11180 + }, + { + "epoch": 0.7160233019653032, + "grad_norm": 23.171953201293945, + "learning_rate": 4.5260882327665906e-07, + "loss": 0.4906, + "num_input_tokens_seen": 35214720, + "step": 11185 + }, + { + "epoch": 0.71634338390628, + "grad_norm": 38.95206069946289, + "learning_rate": 4.5167406551258347e-07, + "loss": 0.5148, + "num_input_tokens_seen": 35230720, + "step": 11190 + }, + { + "epoch": 0.7166634658472569, + "grad_norm": 30.46370506286621, + "learning_rate": 4.5073999233987445e-07, + "loss": 0.3863, + "num_input_tokens_seen": 35246400, + "step": 11195 + }, + { + "epoch": 0.7169835477882338, + "grad_norm": 31.349842071533203, + "learning_rate": 4.4980660492473434e-07, + "loss": 0.47, + "num_input_tokens_seen": 35262784, + "step": 11200 + }, + { + "epoch": 0.7173036297292107, + "grad_norm": 15.219905853271484, + "learning_rate": 4.4887390443250804e-07, + "loss": 0.2775, + "num_input_tokens_seen": 35277632, + "step": 11205 + }, + { + "epoch": 0.7176237116701876, + "grad_norm": 18.419071197509766, + "learning_rate": 4.4794189202768295e-07, + "loss": 0.2913, + "num_input_tokens_seen": 35292544, + "step": 11210 + }, + { + "epoch": 0.7179437936111644, + "grad_norm": 33.719818115234375, + "learning_rate": 4.4701056887388757e-07, + "loss": 0.368, + "num_input_tokens_seen": 35308352, + "step": 11215 + }, + { + "epoch": 0.7182638755521413, + "grad_norm": 32.227081298828125, + "learning_rate": 4.460799361338897e-07, + "loss": 0.3343, + "num_input_tokens_seen": 35323904, + "step": 11220 + }, + { + "epoch": 0.7185839574931182, + "grad_norm": 19.966176986694336, + "learning_rate": 4.451499949695954e-07, + "loss": 0.4156, + "num_input_tokens_seen": 35340224, + "step": 11225 + }, + { + "epoch": 0.7189040394340951, + "grad_norm": 17.299413681030273, + "learning_rate": 4.44220746542047e-07, + "loss": 0.375, + "num_input_tokens_seen": 35355776, + "step": 11230 + }, + { + "epoch": 0.719224121375072, + "grad_norm": 28.009838104248047, + "learning_rate": 4.432921920114221e-07, + "loss": 0.4772, + "num_input_tokens_seen": 35371072, + "step": 11235 + }, + { + "epoch": 0.719544203316049, + "grad_norm": 36.488346099853516, + "learning_rate": 4.4236433253703185e-07, + "loss": 0.3169, + "num_input_tokens_seen": 35387520, + "step": 11240 + }, + { + "epoch": 0.7198642852570258, + "grad_norm": 38.739227294921875, + "learning_rate": 4.4143716927732e-07, + "loss": 0.3928, + "num_input_tokens_seen": 35403840, + "step": 11245 + }, + { + "epoch": 0.7201843671980027, + "grad_norm": 30.09905433654785, + "learning_rate": 4.405107033898604e-07, + "loss": 0.3873, + "num_input_tokens_seen": 35420032, + "step": 11250 + }, + { + "epoch": 0.7205044491389796, + "grad_norm": 35.15446472167969, + "learning_rate": 4.395849360313568e-07, + "loss": 0.2845, + "num_input_tokens_seen": 35436032, + "step": 11255 + }, + { + "epoch": 0.7208245310799565, + "grad_norm": 37.864742279052734, + "learning_rate": 4.386598683576406e-07, + "loss": 0.3583, + "num_input_tokens_seen": 35451136, + "step": 11260 + }, + { + "epoch": 0.7211446130209334, + "grad_norm": 17.579322814941406, + "learning_rate": 4.377355015236696e-07, + "loss": 0.4711, + "num_input_tokens_seen": 35466816, + "step": 11265 + }, + { + "epoch": 0.7214646949619102, + "grad_norm": 34.376991271972656, + "learning_rate": 4.368118366835266e-07, + "loss": 0.3555, + "num_input_tokens_seen": 35483456, + "step": 11270 + }, + { + "epoch": 0.7217847769028871, + "grad_norm": 37.30057144165039, + "learning_rate": 4.358888749904177e-07, + "loss": 0.4612, + "num_input_tokens_seen": 35499584, + "step": 11275 + }, + { + "epoch": 0.722104858843864, + "grad_norm": 25.58052635192871, + "learning_rate": 4.349666175966725e-07, + "loss": 0.3546, + "num_input_tokens_seen": 35515328, + "step": 11280 + }, + { + "epoch": 0.7224249407848409, + "grad_norm": 18.5773983001709, + "learning_rate": 4.340450656537392e-07, + "loss": 0.4744, + "num_input_tokens_seen": 35530048, + "step": 11285 + }, + { + "epoch": 0.7227450227258178, + "grad_norm": 32.79454040527344, + "learning_rate": 4.331242203121861e-07, + "loss": 0.2965, + "num_input_tokens_seen": 35545792, + "step": 11290 + }, + { + "epoch": 0.7230651046667947, + "grad_norm": 44.36042785644531, + "learning_rate": 4.322040827217004e-07, + "loss": 0.3871, + "num_input_tokens_seen": 35561344, + "step": 11295 + }, + { + "epoch": 0.7233851866077716, + "grad_norm": 43.496337890625, + "learning_rate": 4.312846540310838e-07, + "loss": 0.405, + "num_input_tokens_seen": 35577024, + "step": 11300 + }, + { + "epoch": 0.7237052685487485, + "grad_norm": 28.110532760620117, + "learning_rate": 4.3036593538825373e-07, + "loss": 0.3728, + "num_input_tokens_seen": 35592192, + "step": 11305 + }, + { + "epoch": 0.7240253504897254, + "grad_norm": 15.80125904083252, + "learning_rate": 4.2944792794024196e-07, + "loss": 0.3287, + "num_input_tokens_seen": 35607872, + "step": 11310 + }, + { + "epoch": 0.7243454324307023, + "grad_norm": 23.26807975769043, + "learning_rate": 4.285306328331915e-07, + "loss": 0.3117, + "num_input_tokens_seen": 35623872, + "step": 11315 + }, + { + "epoch": 0.7246655143716791, + "grad_norm": 27.19857406616211, + "learning_rate": 4.2761405121235506e-07, + "loss": 0.3067, + "num_input_tokens_seen": 35638720, + "step": 11320 + }, + { + "epoch": 0.724985596312656, + "grad_norm": 23.31117057800293, + "learning_rate": 4.266981842220965e-07, + "loss": 0.5403, + "num_input_tokens_seen": 35655680, + "step": 11325 + }, + { + "epoch": 0.7253056782536329, + "grad_norm": 27.773475646972656, + "learning_rate": 4.257830330058864e-07, + "loss": 0.2708, + "num_input_tokens_seen": 35671168, + "step": 11330 + }, + { + "epoch": 0.7256257601946098, + "grad_norm": 32.48497772216797, + "learning_rate": 4.248685987063019e-07, + "loss": 0.4088, + "num_input_tokens_seen": 35686848, + "step": 11335 + }, + { + "epoch": 0.7259458421355867, + "grad_norm": 25.382577896118164, + "learning_rate": 4.2395488246502396e-07, + "loss": 0.3478, + "num_input_tokens_seen": 35702720, + "step": 11340 + }, + { + "epoch": 0.7262659240765637, + "grad_norm": 34.661277770996094, + "learning_rate": 4.2304188542283913e-07, + "loss": 0.4566, + "num_input_tokens_seen": 35720640, + "step": 11345 + }, + { + "epoch": 0.7265860060175405, + "grad_norm": 61.42772674560547, + "learning_rate": 4.221296087196347e-07, + "loss": 0.3923, + "num_input_tokens_seen": 35735424, + "step": 11350 + }, + { + "epoch": 0.7269060879585174, + "grad_norm": 23.816184997558594, + "learning_rate": 4.2121805349439867e-07, + "loss": 0.4596, + "num_input_tokens_seen": 35751168, + "step": 11355 + }, + { + "epoch": 0.7272261698994943, + "grad_norm": 32.27251052856445, + "learning_rate": 4.203072208852184e-07, + "loss": 0.3787, + "num_input_tokens_seen": 35767168, + "step": 11360 + }, + { + "epoch": 0.7275462518404712, + "grad_norm": 42.87307357788086, + "learning_rate": 4.193971120292793e-07, + "loss": 0.439, + "num_input_tokens_seen": 35782464, + "step": 11365 + }, + { + "epoch": 0.727866333781448, + "grad_norm": 20.0147705078125, + "learning_rate": 4.184877280628629e-07, + "loss": 0.406, + "num_input_tokens_seen": 35798592, + "step": 11370 + }, + { + "epoch": 0.7281864157224249, + "grad_norm": 36.623085021972656, + "learning_rate": 4.1757907012134565e-07, + "loss": 0.396, + "num_input_tokens_seen": 35814720, + "step": 11375 + }, + { + "epoch": 0.7285064976634018, + "grad_norm": 34.45808792114258, + "learning_rate": 4.166711393391978e-07, + "loss": 0.2826, + "num_input_tokens_seen": 35830016, + "step": 11380 + }, + { + "epoch": 0.7288265796043787, + "grad_norm": 18.86490821838379, + "learning_rate": 4.1576393684998146e-07, + "loss": 0.345, + "num_input_tokens_seen": 35845632, + "step": 11385 + }, + { + "epoch": 0.7291466615453556, + "grad_norm": 23.06288719177246, + "learning_rate": 4.1485746378634966e-07, + "loss": 0.3556, + "num_input_tokens_seen": 35861184, + "step": 11390 + }, + { + "epoch": 0.7294667434863324, + "grad_norm": 37.96226501464844, + "learning_rate": 4.1395172128004473e-07, + "loss": 0.4311, + "num_input_tokens_seen": 35876864, + "step": 11395 + }, + { + "epoch": 0.7297868254273094, + "grad_norm": 25.186145782470703, + "learning_rate": 4.130467104618963e-07, + "loss": 0.3318, + "num_input_tokens_seen": 35893568, + "step": 11400 + }, + { + "epoch": 0.7301069073682863, + "grad_norm": 30.771581649780273, + "learning_rate": 4.1214243246182223e-07, + "loss": 0.3364, + "num_input_tokens_seen": 35909696, + "step": 11405 + }, + { + "epoch": 0.7304269893092632, + "grad_norm": 35.04588317871094, + "learning_rate": 4.1123888840882306e-07, + "loss": 0.5046, + "num_input_tokens_seen": 35925120, + "step": 11410 + }, + { + "epoch": 0.7307470712502401, + "grad_norm": 28.61571502685547, + "learning_rate": 4.1033607943098415e-07, + "loss": 0.3223, + "num_input_tokens_seen": 35940800, + "step": 11415 + }, + { + "epoch": 0.731067153191217, + "grad_norm": 12.746711730957031, + "learning_rate": 4.0943400665547423e-07, + "loss": 0.3444, + "num_input_tokens_seen": 35955968, + "step": 11420 + }, + { + "epoch": 0.7313872351321938, + "grad_norm": 43.476295471191406, + "learning_rate": 4.0853267120854064e-07, + "loss": 0.3449, + "num_input_tokens_seen": 35972096, + "step": 11425 + }, + { + "epoch": 0.7317073170731707, + "grad_norm": 27.513898849487305, + "learning_rate": 4.076320742155117e-07, + "loss": 0.3315, + "num_input_tokens_seen": 35986624, + "step": 11430 + }, + { + "epoch": 0.7320273990141476, + "grad_norm": 13.824392318725586, + "learning_rate": 4.067322168007928e-07, + "loss": 0.3493, + "num_input_tokens_seen": 36003008, + "step": 11435 + }, + { + "epoch": 0.7323474809551245, + "grad_norm": 27.101316452026367, + "learning_rate": 4.0583310008786775e-07, + "loss": 0.3597, + "num_input_tokens_seen": 36017152, + "step": 11440 + }, + { + "epoch": 0.7326675628961014, + "grad_norm": 38.84494400024414, + "learning_rate": 4.049347251992932e-07, + "loss": 0.271, + "num_input_tokens_seen": 36031936, + "step": 11445 + }, + { + "epoch": 0.7329876448370783, + "grad_norm": 29.943532943725586, + "learning_rate": 4.0403709325670064e-07, + "loss": 0.353, + "num_input_tokens_seen": 36048064, + "step": 11450 + }, + { + "epoch": 0.7333077267780552, + "grad_norm": 56.9892692565918, + "learning_rate": 4.03140205380795e-07, + "loss": 0.4653, + "num_input_tokens_seen": 36064256, + "step": 11455 + }, + { + "epoch": 0.7336278087190321, + "grad_norm": 67.21539306640625, + "learning_rate": 4.0224406269135115e-07, + "loss": 0.6715, + "num_input_tokens_seen": 36079424, + "step": 11460 + }, + { + "epoch": 0.733947890660009, + "grad_norm": 48.787593841552734, + "learning_rate": 4.0134866630721266e-07, + "loss": 0.3111, + "num_input_tokens_seen": 36095424, + "step": 11465 + }, + { + "epoch": 0.7342679726009859, + "grad_norm": 21.646329879760742, + "learning_rate": 4.0045401734629367e-07, + "loss": 0.3618, + "num_input_tokens_seen": 36111360, + "step": 11470 + }, + { + "epoch": 0.7345880545419627, + "grad_norm": 26.18387794494629, + "learning_rate": 3.9956011692557377e-07, + "loss": 0.3825, + "num_input_tokens_seen": 36127232, + "step": 11475 + }, + { + "epoch": 0.7349081364829396, + "grad_norm": 51.78586196899414, + "learning_rate": 3.986669661610972e-07, + "loss": 0.3532, + "num_input_tokens_seen": 36143168, + "step": 11480 + }, + { + "epoch": 0.7352282184239165, + "grad_norm": 33.300621032714844, + "learning_rate": 3.9777456616797414e-07, + "loss": 0.3323, + "num_input_tokens_seen": 36158272, + "step": 11485 + }, + { + "epoch": 0.7355483003648934, + "grad_norm": 51.799171447753906, + "learning_rate": 3.968829180603761e-07, + "loss": 0.3731, + "num_input_tokens_seen": 36173056, + "step": 11490 + }, + { + "epoch": 0.7358683823058703, + "grad_norm": 45.12591552734375, + "learning_rate": 3.9599202295153624e-07, + "loss": 0.3927, + "num_input_tokens_seen": 36187904, + "step": 11495 + }, + { + "epoch": 0.7361884642468471, + "grad_norm": 74.60730743408203, + "learning_rate": 3.951018819537476e-07, + "loss": 0.3596, + "num_input_tokens_seen": 36205632, + "step": 11500 + }, + { + "epoch": 0.7365085461878241, + "grad_norm": 36.89905548095703, + "learning_rate": 3.942124961783616e-07, + "loss": 0.3478, + "num_input_tokens_seen": 36220160, + "step": 11505 + }, + { + "epoch": 0.736828628128801, + "grad_norm": 28.41156005859375, + "learning_rate": 3.933238667357869e-07, + "loss": 0.3164, + "num_input_tokens_seen": 36236416, + "step": 11510 + }, + { + "epoch": 0.7371487100697779, + "grad_norm": 41.953426361083984, + "learning_rate": 3.924359947354876e-07, + "loss": 0.3449, + "num_input_tokens_seen": 36251584, + "step": 11515 + }, + { + "epoch": 0.7374687920107548, + "grad_norm": 15.202105522155762, + "learning_rate": 3.915488812859826e-07, + "loss": 0.3289, + "num_input_tokens_seen": 36265856, + "step": 11520 + }, + { + "epoch": 0.7377888739517316, + "grad_norm": 62.20964050292969, + "learning_rate": 3.90662527494843e-07, + "loss": 0.3927, + "num_input_tokens_seen": 36283904, + "step": 11525 + }, + { + "epoch": 0.7381089558927085, + "grad_norm": 34.758705139160156, + "learning_rate": 3.8977693446869285e-07, + "loss": 0.3627, + "num_input_tokens_seen": 36298432, + "step": 11530 + }, + { + "epoch": 0.7384290378336854, + "grad_norm": 26.30589485168457, + "learning_rate": 3.8889210331320445e-07, + "loss": 0.3247, + "num_input_tokens_seen": 36313728, + "step": 11535 + }, + { + "epoch": 0.7387491197746623, + "grad_norm": 21.73337173461914, + "learning_rate": 3.8800803513310033e-07, + "loss": 0.3595, + "num_input_tokens_seen": 36329088, + "step": 11540 + }, + { + "epoch": 0.7390692017156392, + "grad_norm": 38.45037841796875, + "learning_rate": 3.8712473103214993e-07, + "loss": 0.4255, + "num_input_tokens_seen": 36345024, + "step": 11545 + }, + { + "epoch": 0.739389283656616, + "grad_norm": 21.429397583007812, + "learning_rate": 3.862421921131688e-07, + "loss": 0.3089, + "num_input_tokens_seen": 36361792, + "step": 11550 + }, + { + "epoch": 0.739709365597593, + "grad_norm": 35.84434127807617, + "learning_rate": 3.85360419478017e-07, + "loss": 0.2832, + "num_input_tokens_seen": 36377152, + "step": 11555 + }, + { + "epoch": 0.7400294475385699, + "grad_norm": 23.184368133544922, + "learning_rate": 3.8447941422759786e-07, + "loss": 0.3552, + "num_input_tokens_seen": 36394048, + "step": 11560 + }, + { + "epoch": 0.7403495294795468, + "grad_norm": 34.779685974121094, + "learning_rate": 3.835991774618579e-07, + "loss": 0.3684, + "num_input_tokens_seen": 36409152, + "step": 11565 + }, + { + "epoch": 0.7406696114205237, + "grad_norm": 88.90668487548828, + "learning_rate": 3.827197102797818e-07, + "loss": 0.3859, + "num_input_tokens_seen": 36427072, + "step": 11570 + }, + { + "epoch": 0.7409896933615006, + "grad_norm": 66.85957336425781, + "learning_rate": 3.818410137793947e-07, + "loss": 0.4771, + "num_input_tokens_seen": 36444288, + "step": 11575 + }, + { + "epoch": 0.7413097753024774, + "grad_norm": 20.060245513916016, + "learning_rate": 3.809630890577602e-07, + "loss": 0.4402, + "num_input_tokens_seen": 36460096, + "step": 11580 + }, + { + "epoch": 0.7416298572434543, + "grad_norm": 126.41194152832031, + "learning_rate": 3.800859372109777e-07, + "loss": 0.3388, + "num_input_tokens_seen": 36475264, + "step": 11585 + }, + { + "epoch": 0.7419499391844312, + "grad_norm": 17.512025833129883, + "learning_rate": 3.7920955933418055e-07, + "loss": 0.325, + "num_input_tokens_seen": 36491264, + "step": 11590 + }, + { + "epoch": 0.7422700211254081, + "grad_norm": 46.947872161865234, + "learning_rate": 3.7833395652153775e-07, + "loss": 0.3245, + "num_input_tokens_seen": 36506368, + "step": 11595 + }, + { + "epoch": 0.742590103066385, + "grad_norm": 33.25145721435547, + "learning_rate": 3.774591298662497e-07, + "loss": 0.3117, + "num_input_tokens_seen": 36522432, + "step": 11600 + }, + { + "epoch": 0.7429101850073618, + "grad_norm": 64.40025329589844, + "learning_rate": 3.765850804605468e-07, + "loss": 0.4221, + "num_input_tokens_seen": 36539008, + "step": 11605 + }, + { + "epoch": 0.7432302669483388, + "grad_norm": 28.040822982788086, + "learning_rate": 3.7571180939569104e-07, + "loss": 0.2818, + "num_input_tokens_seen": 36554240, + "step": 11610 + }, + { + "epoch": 0.7435503488893157, + "grad_norm": 36.580570220947266, + "learning_rate": 3.748393177619711e-07, + "loss": 0.3181, + "num_input_tokens_seen": 36569920, + "step": 11615 + }, + { + "epoch": 0.7438704308302926, + "grad_norm": 33.55031204223633, + "learning_rate": 3.739676066487032e-07, + "loss": 0.3139, + "num_input_tokens_seen": 36585792, + "step": 11620 + }, + { + "epoch": 0.7441905127712695, + "grad_norm": 23.285324096679688, + "learning_rate": 3.730966771442289e-07, + "loss": 0.2923, + "num_input_tokens_seen": 36601280, + "step": 11625 + }, + { + "epoch": 0.7445105947122463, + "grad_norm": 33.82448196411133, + "learning_rate": 3.722265303359137e-07, + "loss": 0.5229, + "num_input_tokens_seen": 36617152, + "step": 11630 + }, + { + "epoch": 0.7448306766532232, + "grad_norm": 60.476661682128906, + "learning_rate": 3.713571673101463e-07, + "loss": 0.4046, + "num_input_tokens_seen": 36632512, + "step": 11635 + }, + { + "epoch": 0.7451507585942001, + "grad_norm": 15.05838394165039, + "learning_rate": 3.704885891523366e-07, + "loss": 0.344, + "num_input_tokens_seen": 36647744, + "step": 11640 + }, + { + "epoch": 0.745470840535177, + "grad_norm": 34.84885025024414, + "learning_rate": 3.696207969469146e-07, + "loss": 0.3938, + "num_input_tokens_seen": 36663360, + "step": 11645 + }, + { + "epoch": 0.7457909224761539, + "grad_norm": 29.558528900146484, + "learning_rate": 3.6875379177732913e-07, + "loss": 0.373, + "num_input_tokens_seen": 36678656, + "step": 11650 + }, + { + "epoch": 0.7461110044171307, + "grad_norm": 73.68892669677734, + "learning_rate": 3.6788757472604634e-07, + "loss": 0.5096, + "num_input_tokens_seen": 36693952, + "step": 11655 + }, + { + "epoch": 0.7464310863581076, + "grad_norm": 35.93594741821289, + "learning_rate": 3.6702214687454825e-07, + "loss": 0.3264, + "num_input_tokens_seen": 36709888, + "step": 11660 + }, + { + "epoch": 0.7467511682990846, + "grad_norm": 28.113248825073242, + "learning_rate": 3.6615750930333177e-07, + "loss": 0.3066, + "num_input_tokens_seen": 36725504, + "step": 11665 + }, + { + "epoch": 0.7470712502400615, + "grad_norm": 11.288246154785156, + "learning_rate": 3.65293663091907e-07, + "loss": 0.3025, + "num_input_tokens_seen": 36741376, + "step": 11670 + }, + { + "epoch": 0.7473913321810384, + "grad_norm": 31.954673767089844, + "learning_rate": 3.6443060931879623e-07, + "loss": 0.435, + "num_input_tokens_seen": 36756864, + "step": 11675 + }, + { + "epoch": 0.7477114141220152, + "grad_norm": 29.685522079467773, + "learning_rate": 3.635683490615321e-07, + "loss": 0.4612, + "num_input_tokens_seen": 36772608, + "step": 11680 + }, + { + "epoch": 0.7480314960629921, + "grad_norm": 76.63870239257812, + "learning_rate": 3.6270688339665634e-07, + "loss": 0.3057, + "num_input_tokens_seen": 36788352, + "step": 11685 + }, + { + "epoch": 0.748351578003969, + "grad_norm": 41.03104782104492, + "learning_rate": 3.6184621339972e-07, + "loss": 0.3581, + "num_input_tokens_seen": 36804096, + "step": 11690 + }, + { + "epoch": 0.7486716599449459, + "grad_norm": 41.0787353515625, + "learning_rate": 3.609863401452786e-07, + "loss": 0.3592, + "num_input_tokens_seen": 36819776, + "step": 11695 + }, + { + "epoch": 0.7489917418859228, + "grad_norm": 36.56906509399414, + "learning_rate": 3.6012726470689416e-07, + "loss": 0.4102, + "num_input_tokens_seen": 36835072, + "step": 11700 + }, + { + "epoch": 0.7493118238268996, + "grad_norm": 26.697582244873047, + "learning_rate": 3.592689881571329e-07, + "loss": 0.3346, + "num_input_tokens_seen": 36850816, + "step": 11705 + }, + { + "epoch": 0.7496319057678765, + "grad_norm": 39.08557891845703, + "learning_rate": 3.5841151156756334e-07, + "loss": 0.4205, + "num_input_tokens_seen": 36866368, + "step": 11710 + }, + { + "epoch": 0.7499519877088535, + "grad_norm": 40.700199127197266, + "learning_rate": 3.575548360087539e-07, + "loss": 0.4196, + "num_input_tokens_seen": 36885376, + "step": 11715 + }, + { + "epoch": 0.7502720696498304, + "grad_norm": 18.3825626373291, + "learning_rate": 3.5669896255027533e-07, + "loss": 0.3191, + "num_input_tokens_seen": 36900288, + "step": 11720 + }, + { + "epoch": 0.7505921515908073, + "grad_norm": 17.36146354675293, + "learning_rate": 3.5584389226069543e-07, + "loss": 0.3892, + "num_input_tokens_seen": 36916224, + "step": 11725 + }, + { + "epoch": 0.7509122335317842, + "grad_norm": 20.827890396118164, + "learning_rate": 3.5498962620757866e-07, + "loss": 0.3097, + "num_input_tokens_seen": 36931648, + "step": 11730 + }, + { + "epoch": 0.7509122335317842, + "eval_loss": 0.36731547117233276, + "eval_runtime": 50.5825, + "eval_samples_per_second": 274.522, + "eval_steps_per_second": 34.32, + "num_input_tokens_seen": 36931648, + "step": 11730 + }, + { + "epoch": 0.751232315472761, + "grad_norm": 83.0255126953125, + "learning_rate": 3.5413616545748713e-07, + "loss": 0.4301, + "num_input_tokens_seen": 36945856, + "step": 11735 + }, + { + "epoch": 0.7515523974137379, + "grad_norm": 25.14464569091797, + "learning_rate": 3.532835110759763e-07, + "loss": 0.509, + "num_input_tokens_seen": 36961792, + "step": 11740 + }, + { + "epoch": 0.7518724793547148, + "grad_norm": 25.07977294921875, + "learning_rate": 3.524316641275955e-07, + "loss": 0.3072, + "num_input_tokens_seen": 36977152, + "step": 11745 + }, + { + "epoch": 0.7521925612956917, + "grad_norm": 20.042966842651367, + "learning_rate": 3.5158062567588467e-07, + "loss": 0.4213, + "num_input_tokens_seen": 36991936, + "step": 11750 + }, + { + "epoch": 0.7525126432366686, + "grad_norm": 94.77703857421875, + "learning_rate": 3.5073039678337633e-07, + "loss": 0.4065, + "num_input_tokens_seen": 37006784, + "step": 11755 + }, + { + "epoch": 0.7528327251776454, + "grad_norm": 38.61663055419922, + "learning_rate": 3.498809785115908e-07, + "loss": 0.3394, + "num_input_tokens_seen": 37022208, + "step": 11760 + }, + { + "epoch": 0.7531528071186223, + "grad_norm": 12.624959945678711, + "learning_rate": 3.4903237192103697e-07, + "loss": 0.3495, + "num_input_tokens_seen": 37039488, + "step": 11765 + }, + { + "epoch": 0.7534728890595993, + "grad_norm": 40.64104080200195, + "learning_rate": 3.481845780712099e-07, + "loss": 0.3453, + "num_input_tokens_seen": 37056064, + "step": 11770 + }, + { + "epoch": 0.7537929710005762, + "grad_norm": 36.66989517211914, + "learning_rate": 3.4733759802059037e-07, + "loss": 0.3434, + "num_input_tokens_seen": 37072256, + "step": 11775 + }, + { + "epoch": 0.7541130529415531, + "grad_norm": 63.55211639404297, + "learning_rate": 3.4649143282664273e-07, + "loss": 0.428, + "num_input_tokens_seen": 37087360, + "step": 11780 + }, + { + "epoch": 0.7544331348825299, + "grad_norm": 27.92076873779297, + "learning_rate": 3.456460835458143e-07, + "loss": 0.3164, + "num_input_tokens_seen": 37102144, + "step": 11785 + }, + { + "epoch": 0.7547532168235068, + "grad_norm": 40.95823669433594, + "learning_rate": 3.4480155123353337e-07, + "loss": 0.3131, + "num_input_tokens_seen": 37117568, + "step": 11790 + }, + { + "epoch": 0.7550732987644837, + "grad_norm": 38.527374267578125, + "learning_rate": 3.4395783694420875e-07, + "loss": 0.4608, + "num_input_tokens_seen": 37132800, + "step": 11795 + }, + { + "epoch": 0.7553933807054606, + "grad_norm": 22.126087188720703, + "learning_rate": 3.4311494173122743e-07, + "loss": 0.4036, + "num_input_tokens_seen": 37147776, + "step": 11800 + }, + { + "epoch": 0.7557134626464375, + "grad_norm": 26.484477996826172, + "learning_rate": 3.422728666469541e-07, + "loss": 0.3944, + "num_input_tokens_seen": 37163904, + "step": 11805 + }, + { + "epoch": 0.7560335445874143, + "grad_norm": 43.04818344116211, + "learning_rate": 3.41431612742729e-07, + "loss": 0.4316, + "num_input_tokens_seen": 37180416, + "step": 11810 + }, + { + "epoch": 0.7563536265283912, + "grad_norm": 23.99943733215332, + "learning_rate": 3.4059118106886855e-07, + "loss": 0.4235, + "num_input_tokens_seen": 37196480, + "step": 11815 + }, + { + "epoch": 0.7566737084693682, + "grad_norm": 76.9852066040039, + "learning_rate": 3.3975157267466036e-07, + "loss": 0.5208, + "num_input_tokens_seen": 37211648, + "step": 11820 + }, + { + "epoch": 0.7569937904103451, + "grad_norm": 28.845922470092773, + "learning_rate": 3.389127886083656e-07, + "loss": 0.2942, + "num_input_tokens_seen": 37227072, + "step": 11825 + }, + { + "epoch": 0.757313872351322, + "grad_norm": 23.833080291748047, + "learning_rate": 3.3807482991721667e-07, + "loss": 0.3342, + "num_input_tokens_seen": 37243968, + "step": 11830 + }, + { + "epoch": 0.7576339542922989, + "grad_norm": 19.469358444213867, + "learning_rate": 3.3723769764741474e-07, + "loss": 0.32, + "num_input_tokens_seen": 37259200, + "step": 11835 + }, + { + "epoch": 0.7579540362332757, + "grad_norm": 17.347503662109375, + "learning_rate": 3.3640139284412825e-07, + "loss": 0.2946, + "num_input_tokens_seen": 37275072, + "step": 11840 + }, + { + "epoch": 0.7582741181742526, + "grad_norm": 43.63488006591797, + "learning_rate": 3.355659165514948e-07, + "loss": 0.4, + "num_input_tokens_seen": 37291392, + "step": 11845 + }, + { + "epoch": 0.7585942001152295, + "grad_norm": 18.54771614074707, + "learning_rate": 3.347312698126161e-07, + "loss": 0.2828, + "num_input_tokens_seen": 37307648, + "step": 11850 + }, + { + "epoch": 0.7589142820562064, + "grad_norm": 14.965799331665039, + "learning_rate": 3.338974536695578e-07, + "loss": 0.2188, + "num_input_tokens_seen": 37323136, + "step": 11855 + }, + { + "epoch": 0.7592343639971832, + "grad_norm": 22.427433013916016, + "learning_rate": 3.330644691633492e-07, + "loss": 0.3193, + "num_input_tokens_seen": 37338496, + "step": 11860 + }, + { + "epoch": 0.7595544459381601, + "grad_norm": 11.61136245727539, + "learning_rate": 3.322323173339818e-07, + "loss": 0.2764, + "num_input_tokens_seen": 37356800, + "step": 11865 + }, + { + "epoch": 0.759874527879137, + "grad_norm": 27.630123138427734, + "learning_rate": 3.314009992204071e-07, + "loss": 0.4461, + "num_input_tokens_seen": 37372800, + "step": 11870 + }, + { + "epoch": 0.760194609820114, + "grad_norm": 63.511077880859375, + "learning_rate": 3.3057051586053443e-07, + "loss": 0.3172, + "num_input_tokens_seen": 37388608, + "step": 11875 + }, + { + "epoch": 0.7605146917610909, + "grad_norm": 34.02855682373047, + "learning_rate": 3.297408682912329e-07, + "loss": 0.4503, + "num_input_tokens_seen": 37405184, + "step": 11880 + }, + { + "epoch": 0.7608347737020678, + "grad_norm": 21.150548934936523, + "learning_rate": 3.289120575483271e-07, + "loss": 0.2743, + "num_input_tokens_seen": 37420096, + "step": 11885 + }, + { + "epoch": 0.7611548556430446, + "grad_norm": 33.34170150756836, + "learning_rate": 3.280840846665969e-07, + "loss": 0.4177, + "num_input_tokens_seen": 37434368, + "step": 11890 + }, + { + "epoch": 0.7614749375840215, + "grad_norm": 32.07157897949219, + "learning_rate": 3.272569506797761e-07, + "loss": 0.3019, + "num_input_tokens_seen": 37449344, + "step": 11895 + }, + { + "epoch": 0.7617950195249984, + "grad_norm": 29.10479736328125, + "learning_rate": 3.2643065662055136e-07, + "loss": 0.3364, + "num_input_tokens_seen": 37464448, + "step": 11900 + }, + { + "epoch": 0.7621151014659753, + "grad_norm": 68.0009765625, + "learning_rate": 3.2560520352056033e-07, + "loss": 0.2844, + "num_input_tokens_seen": 37481856, + "step": 11905 + }, + { + "epoch": 0.7624351834069522, + "grad_norm": 19.72753143310547, + "learning_rate": 3.24780592410391e-07, + "loss": 0.3952, + "num_input_tokens_seen": 37497856, + "step": 11910 + }, + { + "epoch": 0.762755265347929, + "grad_norm": 40.24898147583008, + "learning_rate": 3.2395682431957994e-07, + "loss": 0.4545, + "num_input_tokens_seen": 37513600, + "step": 11915 + }, + { + "epoch": 0.7630753472889059, + "grad_norm": 36.124610900878906, + "learning_rate": 3.231339002766115e-07, + "loss": 0.3272, + "num_input_tokens_seen": 37529408, + "step": 11920 + }, + { + "epoch": 0.7633954292298829, + "grad_norm": 30.874311447143555, + "learning_rate": 3.2231182130891564e-07, + "loss": 0.3396, + "num_input_tokens_seen": 37545984, + "step": 11925 + }, + { + "epoch": 0.7637155111708598, + "grad_norm": 70.74797058105469, + "learning_rate": 3.214905884428679e-07, + "loss": 0.3342, + "num_input_tokens_seen": 37561856, + "step": 11930 + }, + { + "epoch": 0.7640355931118367, + "grad_norm": 29.5023136138916, + "learning_rate": 3.206702027037868e-07, + "loss": 0.3292, + "num_input_tokens_seen": 37578624, + "step": 11935 + }, + { + "epoch": 0.7643556750528135, + "grad_norm": 45.15686798095703, + "learning_rate": 3.198506651159344e-07, + "loss": 0.3962, + "num_input_tokens_seen": 37593920, + "step": 11940 + }, + { + "epoch": 0.7646757569937904, + "grad_norm": 24.18119239807129, + "learning_rate": 3.190319767025121e-07, + "loss": 0.3658, + "num_input_tokens_seen": 37609664, + "step": 11945 + }, + { + "epoch": 0.7649958389347673, + "grad_norm": 58.468135833740234, + "learning_rate": 3.1821413848566213e-07, + "loss": 0.4959, + "num_input_tokens_seen": 37626048, + "step": 11950 + }, + { + "epoch": 0.7653159208757442, + "grad_norm": 19.08656883239746, + "learning_rate": 3.1739715148646564e-07, + "loss": 0.3753, + "num_input_tokens_seen": 37641792, + "step": 11955 + }, + { + "epoch": 0.7656360028167211, + "grad_norm": 54.501651763916016, + "learning_rate": 3.1658101672494043e-07, + "loss": 0.4534, + "num_input_tokens_seen": 37656512, + "step": 11960 + }, + { + "epoch": 0.7659560847576979, + "grad_norm": 47.72370910644531, + "learning_rate": 3.157657352200397e-07, + "loss": 0.3377, + "num_input_tokens_seen": 37672000, + "step": 11965 + }, + { + "epoch": 0.7662761666986748, + "grad_norm": 40.14820861816406, + "learning_rate": 3.149513079896521e-07, + "loss": 0.3278, + "num_input_tokens_seen": 37687232, + "step": 11970 + }, + { + "epoch": 0.7665962486396517, + "grad_norm": 19.0128116607666, + "learning_rate": 3.1413773605060034e-07, + "loss": 0.3237, + "num_input_tokens_seen": 37702656, + "step": 11975 + }, + { + "epoch": 0.7669163305806287, + "grad_norm": 59.66189956665039, + "learning_rate": 3.1332502041863783e-07, + "loss": 0.4234, + "num_input_tokens_seen": 37718080, + "step": 11980 + }, + { + "epoch": 0.7672364125216056, + "grad_norm": 28.364665985107422, + "learning_rate": 3.1251316210844946e-07, + "loss": 0.3181, + "num_input_tokens_seen": 37735680, + "step": 11985 + }, + { + "epoch": 0.7675564944625825, + "grad_norm": 51.28329849243164, + "learning_rate": 3.1170216213365055e-07, + "loss": 0.2871, + "num_input_tokens_seen": 37749952, + "step": 11990 + }, + { + "epoch": 0.7678765764035593, + "grad_norm": 34.2696647644043, + "learning_rate": 3.1089202150678397e-07, + "loss": 0.4582, + "num_input_tokens_seen": 37765312, + "step": 11995 + }, + { + "epoch": 0.7681966583445362, + "grad_norm": 50.565311431884766, + "learning_rate": 3.1008274123931886e-07, + "loss": 0.4919, + "num_input_tokens_seen": 37780160, + "step": 12000 + }, + { + "epoch": 0.7685167402855131, + "grad_norm": 28.518428802490234, + "learning_rate": 3.092743223416523e-07, + "loss": 0.2657, + "num_input_tokens_seen": 37796352, + "step": 12005 + }, + { + "epoch": 0.76883682222649, + "grad_norm": 60.307430267333984, + "learning_rate": 3.0846676582310413e-07, + "loss": 0.3551, + "num_input_tokens_seen": 37812864, + "step": 12010 + }, + { + "epoch": 0.7691569041674668, + "grad_norm": 45.83395767211914, + "learning_rate": 3.076600726919185e-07, + "loss": 0.3818, + "num_input_tokens_seen": 37827840, + "step": 12015 + }, + { + "epoch": 0.7694769861084437, + "grad_norm": 32.84312438964844, + "learning_rate": 3.0685424395526106e-07, + "loss": 0.3599, + "num_input_tokens_seen": 37847040, + "step": 12020 + }, + { + "epoch": 0.7697970680494206, + "grad_norm": 38.15679168701172, + "learning_rate": 3.060492806192184e-07, + "loss": 0.2875, + "num_input_tokens_seen": 37862464, + "step": 12025 + }, + { + "epoch": 0.7701171499903975, + "grad_norm": 36.667755126953125, + "learning_rate": 3.052451836887968e-07, + "loss": 0.3826, + "num_input_tokens_seen": 37877760, + "step": 12030 + }, + { + "epoch": 0.7704372319313745, + "grad_norm": 28.926128387451172, + "learning_rate": 3.044419541679207e-07, + "loss": 0.2867, + "num_input_tokens_seen": 37892800, + "step": 12035 + }, + { + "epoch": 0.7707573138723514, + "grad_norm": 59.06888961791992, + "learning_rate": 3.0363959305943153e-07, + "loss": 0.4353, + "num_input_tokens_seen": 37909056, + "step": 12040 + }, + { + "epoch": 0.7710773958133282, + "grad_norm": 42.93339157104492, + "learning_rate": 3.028381013650867e-07, + "loss": 0.3447, + "num_input_tokens_seen": 37925376, + "step": 12045 + }, + { + "epoch": 0.7713974777543051, + "grad_norm": 39.62418746948242, + "learning_rate": 3.0203748008555783e-07, + "loss": 0.3705, + "num_input_tokens_seen": 37941632, + "step": 12050 + }, + { + "epoch": 0.771717559695282, + "grad_norm": 38.263912200927734, + "learning_rate": 3.012377302204301e-07, + "loss": 0.374, + "num_input_tokens_seen": 37957056, + "step": 12055 + }, + { + "epoch": 0.7720376416362589, + "grad_norm": 47.257015228271484, + "learning_rate": 3.0043885276820046e-07, + "loss": 0.3959, + "num_input_tokens_seen": 37973184, + "step": 12060 + }, + { + "epoch": 0.7723577235772358, + "grad_norm": 25.244918823242188, + "learning_rate": 2.99640848726277e-07, + "loss": 0.3027, + "num_input_tokens_seen": 37988288, + "step": 12065 + }, + { + "epoch": 0.7726778055182126, + "grad_norm": 25.476991653442383, + "learning_rate": 2.9884371909097704e-07, + "loss": 0.3723, + "num_input_tokens_seen": 38004224, + "step": 12070 + }, + { + "epoch": 0.7729978874591895, + "grad_norm": 23.3084774017334, + "learning_rate": 2.9804746485752616e-07, + "loss": 0.3721, + "num_input_tokens_seen": 38019456, + "step": 12075 + }, + { + "epoch": 0.7733179694001664, + "grad_norm": 28.834396362304688, + "learning_rate": 2.972520870200573e-07, + "loss": 0.4237, + "num_input_tokens_seen": 38035264, + "step": 12080 + }, + { + "epoch": 0.7736380513411434, + "grad_norm": 32.87154006958008, + "learning_rate": 2.9645758657160904e-07, + "loss": 0.4166, + "num_input_tokens_seen": 38051072, + "step": 12085 + }, + { + "epoch": 0.7739581332821203, + "grad_norm": 16.961706161499023, + "learning_rate": 2.9566396450412444e-07, + "loss": 0.3573, + "num_input_tokens_seen": 38066688, + "step": 12090 + }, + { + "epoch": 0.7742782152230971, + "grad_norm": 22.967132568359375, + "learning_rate": 2.9487122180844957e-07, + "loss": 0.3237, + "num_input_tokens_seen": 38082048, + "step": 12095 + }, + { + "epoch": 0.774598297164074, + "grad_norm": 57.26237487792969, + "learning_rate": 2.9407935947433406e-07, + "loss": 0.3143, + "num_input_tokens_seen": 38097344, + "step": 12100 + }, + { + "epoch": 0.7749183791050509, + "grad_norm": 45.43290328979492, + "learning_rate": 2.932883784904264e-07, + "loss": 0.4448, + "num_input_tokens_seen": 38112320, + "step": 12105 + }, + { + "epoch": 0.7752384610460278, + "grad_norm": 15.891319274902344, + "learning_rate": 2.9249827984427555e-07, + "loss": 0.244, + "num_input_tokens_seen": 38128000, + "step": 12110 + }, + { + "epoch": 0.7755585429870047, + "grad_norm": 30.146347045898438, + "learning_rate": 2.917090645223297e-07, + "loss": 0.3049, + "num_input_tokens_seen": 38143168, + "step": 12115 + }, + { + "epoch": 0.7758786249279815, + "grad_norm": 28.579742431640625, + "learning_rate": 2.909207335099332e-07, + "loss": 0.301, + "num_input_tokens_seen": 38157824, + "step": 12120 + }, + { + "epoch": 0.7761987068689584, + "grad_norm": 32.7314567565918, + "learning_rate": 2.9013328779132595e-07, + "loss": 0.3329, + "num_input_tokens_seen": 38172864, + "step": 12125 + }, + { + "epoch": 0.7765187888099353, + "grad_norm": 102.57218170166016, + "learning_rate": 2.893467283496439e-07, + "loss": 0.4221, + "num_input_tokens_seen": 38187264, + "step": 12130 + }, + { + "epoch": 0.7768388707509122, + "grad_norm": 21.075590133666992, + "learning_rate": 2.885610561669155e-07, + "loss": 0.3534, + "num_input_tokens_seen": 38204288, + "step": 12135 + }, + { + "epoch": 0.7771589526918892, + "grad_norm": 29.551855087280273, + "learning_rate": 2.8777627222406163e-07, + "loss": 0.3447, + "num_input_tokens_seen": 38219264, + "step": 12140 + }, + { + "epoch": 0.777479034632866, + "grad_norm": 41.8278923034668, + "learning_rate": 2.869923775008943e-07, + "loss": 0.3845, + "num_input_tokens_seen": 38234496, + "step": 12145 + }, + { + "epoch": 0.7777991165738429, + "grad_norm": 41.65421676635742, + "learning_rate": 2.862093729761155e-07, + "loss": 0.2729, + "num_input_tokens_seen": 38251072, + "step": 12150 + }, + { + "epoch": 0.7781191985148198, + "grad_norm": 29.3076114654541, + "learning_rate": 2.854272596273152e-07, + "loss": 0.3971, + "num_input_tokens_seen": 38266560, + "step": 12155 + }, + { + "epoch": 0.7784392804557967, + "grad_norm": 44.304229736328125, + "learning_rate": 2.8464603843097134e-07, + "loss": 0.331, + "num_input_tokens_seen": 38282944, + "step": 12160 + }, + { + "epoch": 0.7787593623967736, + "grad_norm": 33.777957916259766, + "learning_rate": 2.8386571036244764e-07, + "loss": 0.3274, + "num_input_tokens_seen": 38299264, + "step": 12165 + }, + { + "epoch": 0.7790794443377504, + "grad_norm": 51.79270553588867, + "learning_rate": 2.830862763959929e-07, + "loss": 0.3866, + "num_input_tokens_seen": 38314368, + "step": 12170 + }, + { + "epoch": 0.7793995262787273, + "grad_norm": 11.458423614501953, + "learning_rate": 2.8230773750473956e-07, + "loss": 0.3108, + "num_input_tokens_seen": 38329664, + "step": 12175 + }, + { + "epoch": 0.7797196082197042, + "grad_norm": 28.3763427734375, + "learning_rate": 2.8153009466070267e-07, + "loss": 0.3067, + "num_input_tokens_seen": 38345408, + "step": 12180 + }, + { + "epoch": 0.7800396901606811, + "grad_norm": 32.849178314208984, + "learning_rate": 2.807533488347783e-07, + "loss": 0.2959, + "num_input_tokens_seen": 38362688, + "step": 12185 + }, + { + "epoch": 0.7803597721016581, + "grad_norm": 24.91496467590332, + "learning_rate": 2.7997750099674277e-07, + "loss": 0.2508, + "num_input_tokens_seen": 38377600, + "step": 12190 + }, + { + "epoch": 0.780679854042635, + "grad_norm": 38.691551208496094, + "learning_rate": 2.792025521152512e-07, + "loss": 0.5263, + "num_input_tokens_seen": 38392640, + "step": 12195 + }, + { + "epoch": 0.7809999359836118, + "grad_norm": 34.44416427612305, + "learning_rate": 2.784285031578365e-07, + "loss": 0.4457, + "num_input_tokens_seen": 38408448, + "step": 12200 + }, + { + "epoch": 0.7813200179245887, + "grad_norm": 26.372634887695312, + "learning_rate": 2.7765535509090786e-07, + "loss": 0.3649, + "num_input_tokens_seen": 38424512, + "step": 12205 + }, + { + "epoch": 0.7816400998655656, + "grad_norm": 31.301618576049805, + "learning_rate": 2.768831088797495e-07, + "loss": 0.4661, + "num_input_tokens_seen": 38439296, + "step": 12210 + }, + { + "epoch": 0.7819601818065425, + "grad_norm": 16.063852310180664, + "learning_rate": 2.761117654885201e-07, + "loss": 0.247, + "num_input_tokens_seen": 38455424, + "step": 12215 + }, + { + "epoch": 0.7822802637475194, + "grad_norm": 25.676212310791016, + "learning_rate": 2.7534132588025063e-07, + "loss": 0.3314, + "num_input_tokens_seen": 38470976, + "step": 12220 + }, + { + "epoch": 0.7826003456884962, + "grad_norm": 28.28862762451172, + "learning_rate": 2.7457179101684483e-07, + "loss": 0.5088, + "num_input_tokens_seen": 38486016, + "step": 12225 + }, + { + "epoch": 0.7829204276294731, + "grad_norm": 23.850549697875977, + "learning_rate": 2.7380316185907506e-07, + "loss": 0.2958, + "num_input_tokens_seen": 38501248, + "step": 12230 + }, + { + "epoch": 0.78324050957045, + "grad_norm": 19.70224380493164, + "learning_rate": 2.730354393665839e-07, + "loss": 0.3508, + "num_input_tokens_seen": 38516992, + "step": 12235 + }, + { + "epoch": 0.7835605915114269, + "grad_norm": 30.95526123046875, + "learning_rate": 2.7226862449788245e-07, + "loss": 0.3871, + "num_input_tokens_seen": 38531456, + "step": 12240 + }, + { + "epoch": 0.7838806734524039, + "grad_norm": 39.920440673828125, + "learning_rate": 2.715027182103482e-07, + "loss": 0.3283, + "num_input_tokens_seen": 38546880, + "step": 12245 + }, + { + "epoch": 0.7842007553933807, + "grad_norm": 22.294261932373047, + "learning_rate": 2.707377214602232e-07, + "loss": 0.3104, + "num_input_tokens_seen": 38562176, + "step": 12250 + }, + { + "epoch": 0.7845208373343576, + "grad_norm": 38.912017822265625, + "learning_rate": 2.699736352026157e-07, + "loss": 0.4304, + "num_input_tokens_seen": 38577472, + "step": 12255 + }, + { + "epoch": 0.7848409192753345, + "grad_norm": 22.714643478393555, + "learning_rate": 2.6921046039149645e-07, + "loss": 0.3265, + "num_input_tokens_seen": 38593088, + "step": 12260 + }, + { + "epoch": 0.7851610012163114, + "grad_norm": 32.15650939941406, + "learning_rate": 2.6844819797969744e-07, + "loss": 0.3378, + "num_input_tokens_seen": 38607936, + "step": 12265 + }, + { + "epoch": 0.7854810831572883, + "grad_norm": 41.70045471191406, + "learning_rate": 2.6768684891891236e-07, + "loss": 0.2504, + "num_input_tokens_seen": 38625024, + "step": 12270 + }, + { + "epoch": 0.7858011650982651, + "grad_norm": 31.758371353149414, + "learning_rate": 2.6692641415969497e-07, + "loss": 0.3268, + "num_input_tokens_seen": 38641792, + "step": 12275 + }, + { + "epoch": 0.786121247039242, + "grad_norm": 50.608848571777344, + "learning_rate": 2.66166894651457e-07, + "loss": 0.4112, + "num_input_tokens_seen": 38656896, + "step": 12280 + }, + { + "epoch": 0.7864413289802189, + "grad_norm": 43.49479675292969, + "learning_rate": 2.654082913424668e-07, + "loss": 0.343, + "num_input_tokens_seen": 38672448, + "step": 12285 + }, + { + "epoch": 0.7867614109211958, + "grad_norm": 28.721969604492188, + "learning_rate": 2.6465060517985003e-07, + "loss": 0.305, + "num_input_tokens_seen": 38688576, + "step": 12290 + }, + { + "epoch": 0.7870814928621728, + "grad_norm": 45.75242233276367, + "learning_rate": 2.638938371095867e-07, + "loss": 0.5196, + "num_input_tokens_seen": 38704064, + "step": 12295 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 20.558774948120117, + "learning_rate": 2.6313798807651065e-07, + "loss": 0.3756, + "num_input_tokens_seen": 38718976, + "step": 12300 + }, + { + "epoch": 0.7877216567441265, + "grad_norm": 19.721187591552734, + "learning_rate": 2.6238305902430813e-07, + "loss": 0.3578, + "num_input_tokens_seen": 38734272, + "step": 12305 + }, + { + "epoch": 0.7880417386851034, + "grad_norm": 14.175429344177246, + "learning_rate": 2.61629050895517e-07, + "loss": 0.3147, + "num_input_tokens_seen": 38749504, + "step": 12310 + }, + { + "epoch": 0.7883618206260803, + "grad_norm": 29.975229263305664, + "learning_rate": 2.608759646315253e-07, + "loss": 0.3237, + "num_input_tokens_seen": 38764352, + "step": 12315 + }, + { + "epoch": 0.7886819025670572, + "grad_norm": 21.73525619506836, + "learning_rate": 2.6012380117257005e-07, + "loss": 0.3771, + "num_input_tokens_seen": 38780096, + "step": 12320 + }, + { + "epoch": 0.789001984508034, + "grad_norm": 28.346630096435547, + "learning_rate": 2.5937256145773613e-07, + "loss": 0.3853, + "num_input_tokens_seen": 38795712, + "step": 12325 + }, + { + "epoch": 0.7893220664490109, + "grad_norm": 38.748958587646484, + "learning_rate": 2.586222464249551e-07, + "loss": 0.3191, + "num_input_tokens_seen": 38811328, + "step": 12330 + }, + { + "epoch": 0.7896421483899878, + "grad_norm": 39.744686126708984, + "learning_rate": 2.5787285701100413e-07, + "loss": 0.2067, + "num_input_tokens_seen": 38826240, + "step": 12335 + }, + { + "epoch": 0.7899622303309647, + "grad_norm": 39.24365234375, + "learning_rate": 2.571243941515048e-07, + "loss": 0.3655, + "num_input_tokens_seen": 38842624, + "step": 12340 + }, + { + "epoch": 0.7902823122719416, + "grad_norm": 24.49032211303711, + "learning_rate": 2.563768587809213e-07, + "loss": 0.278, + "num_input_tokens_seen": 38857472, + "step": 12345 + }, + { + "epoch": 0.7906023942129186, + "grad_norm": 60.89975357055664, + "learning_rate": 2.5563025183256137e-07, + "loss": 0.4174, + "num_input_tokens_seen": 38872256, + "step": 12350 + }, + { + "epoch": 0.7909224761538954, + "grad_norm": 39.74103927612305, + "learning_rate": 2.548845742385717e-07, + "loss": 0.5513, + "num_input_tokens_seen": 38890048, + "step": 12355 + }, + { + "epoch": 0.7912425580948723, + "grad_norm": 38.84343719482422, + "learning_rate": 2.541398269299393e-07, + "loss": 0.2424, + "num_input_tokens_seen": 38905664, + "step": 12360 + }, + { + "epoch": 0.7915626400358492, + "grad_norm": 14.47574234008789, + "learning_rate": 2.5339601083649063e-07, + "loss": 0.3106, + "num_input_tokens_seen": 38926144, + "step": 12365 + }, + { + "epoch": 0.7918827219768261, + "grad_norm": 37.022544860839844, + "learning_rate": 2.526531268868889e-07, + "loss": 0.5144, + "num_input_tokens_seen": 38942720, + "step": 12370 + }, + { + "epoch": 0.792202803917803, + "grad_norm": 26.23043441772461, + "learning_rate": 2.5191117600863266e-07, + "loss": 0.3388, + "num_input_tokens_seen": 38958144, + "step": 12375 + }, + { + "epoch": 0.7925228858587798, + "grad_norm": 19.583799362182617, + "learning_rate": 2.511701591280565e-07, + "loss": 0.2559, + "num_input_tokens_seen": 38973376, + "step": 12380 + }, + { + "epoch": 0.7928429677997567, + "grad_norm": 40.05327224731445, + "learning_rate": 2.504300771703295e-07, + "loss": 0.3501, + "num_input_tokens_seen": 38989504, + "step": 12385 + }, + { + "epoch": 0.7931630497407336, + "grad_norm": 64.94096374511719, + "learning_rate": 2.496909310594517e-07, + "loss": 0.3819, + "num_input_tokens_seen": 39005056, + "step": 12390 + }, + { + "epoch": 0.7934831316817105, + "grad_norm": 41.124534606933594, + "learning_rate": 2.4895272171825587e-07, + "loss": 0.4581, + "num_input_tokens_seen": 39020608, + "step": 12395 + }, + { + "epoch": 0.7938032136226874, + "grad_norm": 34.6364860534668, + "learning_rate": 2.482154500684055e-07, + "loss": 0.4464, + "num_input_tokens_seen": 39035712, + "step": 12400 + }, + { + "epoch": 0.7941232955636643, + "grad_norm": 29.507856369018555, + "learning_rate": 2.4747911703039293e-07, + "loss": 0.3431, + "num_input_tokens_seen": 39050880, + "step": 12405 + }, + { + "epoch": 0.7944433775046412, + "grad_norm": 35.2116813659668, + "learning_rate": 2.467437235235378e-07, + "loss": 0.3737, + "num_input_tokens_seen": 39065792, + "step": 12410 + }, + { + "epoch": 0.7947634594456181, + "grad_norm": 32.397830963134766, + "learning_rate": 2.460092704659883e-07, + "loss": 0.3441, + "num_input_tokens_seen": 39080960, + "step": 12415 + }, + { + "epoch": 0.795083541386595, + "grad_norm": 16.87535285949707, + "learning_rate": 2.452757587747174e-07, + "loss": 0.2641, + "num_input_tokens_seen": 39097216, + "step": 12420 + }, + { + "epoch": 0.7954036233275719, + "grad_norm": 24.034717559814453, + "learning_rate": 2.445431893655232e-07, + "loss": 0.182, + "num_input_tokens_seen": 39113152, + "step": 12425 + }, + { + "epoch": 0.7957237052685487, + "grad_norm": 36.59601593017578, + "learning_rate": 2.438115631530271e-07, + "loss": 0.3652, + "num_input_tokens_seen": 39130176, + "step": 12430 + }, + { + "epoch": 0.7960437872095256, + "grad_norm": 28.420482635498047, + "learning_rate": 2.4308088105067305e-07, + "loss": 0.2338, + "num_input_tokens_seen": 39145792, + "step": 12435 + }, + { + "epoch": 0.7963638691505025, + "grad_norm": 68.1366958618164, + "learning_rate": 2.423511439707262e-07, + "loss": 0.4227, + "num_input_tokens_seen": 39161280, + "step": 12440 + }, + { + "epoch": 0.7966839510914794, + "grad_norm": 24.002521514892578, + "learning_rate": 2.4162235282427177e-07, + "loss": 0.2807, + "num_input_tokens_seen": 39176512, + "step": 12445 + }, + { + "epoch": 0.7970040330324563, + "grad_norm": 42.29568099975586, + "learning_rate": 2.408945085212144e-07, + "loss": 0.353, + "num_input_tokens_seen": 39191808, + "step": 12450 + }, + { + "epoch": 0.7973241149734333, + "grad_norm": 36.45928955078125, + "learning_rate": 2.401676119702759e-07, + "loss": 0.2507, + "num_input_tokens_seen": 39208640, + "step": 12455 + }, + { + "epoch": 0.7976441969144101, + "grad_norm": 20.824121475219727, + "learning_rate": 2.394416640789952e-07, + "loss": 0.3667, + "num_input_tokens_seen": 39223232, + "step": 12460 + }, + { + "epoch": 0.797964278855387, + "grad_norm": 40.27499008178711, + "learning_rate": 2.3871666575372696e-07, + "loss": 0.3149, + "num_input_tokens_seen": 39238656, + "step": 12465 + }, + { + "epoch": 0.7982843607963639, + "grad_norm": 55.65762710571289, + "learning_rate": 2.3799261789963964e-07, + "loss": 0.5348, + "num_input_tokens_seen": 39255872, + "step": 12470 + }, + { + "epoch": 0.7986044427373408, + "grad_norm": 21.946813583374023, + "learning_rate": 2.3726952142071644e-07, + "loss": 0.269, + "num_input_tokens_seen": 39270784, + "step": 12475 + }, + { + "epoch": 0.7989245246783176, + "grad_norm": 42.15665054321289, + "learning_rate": 2.365473772197508e-07, + "loss": 0.3524, + "num_input_tokens_seen": 39286080, + "step": 12480 + }, + { + "epoch": 0.7992446066192945, + "grad_norm": 35.08050537109375, + "learning_rate": 2.3582618619834883e-07, + "loss": 0.3557, + "num_input_tokens_seen": 39301312, + "step": 12485 + }, + { + "epoch": 0.7995646885602714, + "grad_norm": 16.935348510742188, + "learning_rate": 2.3510594925692528e-07, + "loss": 0.2214, + "num_input_tokens_seen": 39316736, + "step": 12490 + }, + { + "epoch": 0.7998847705012483, + "grad_norm": 32.69172668457031, + "learning_rate": 2.343866672947057e-07, + "loss": 0.3518, + "num_input_tokens_seen": 39331264, + "step": 12495 + }, + { + "epoch": 0.8002048524422252, + "grad_norm": 34.507137298583984, + "learning_rate": 2.336683412097209e-07, + "loss": 0.2711, + "num_input_tokens_seen": 39345856, + "step": 12500 + }, + { + "epoch": 0.800524934383202, + "grad_norm": 24.427064895629883, + "learning_rate": 2.329509718988095e-07, + "loss": 0.3662, + "num_input_tokens_seen": 39361280, + "step": 12505 + }, + { + "epoch": 0.800845016324179, + "grad_norm": 34.9528694152832, + "learning_rate": 2.3223456025761645e-07, + "loss": 0.3395, + "num_input_tokens_seen": 39375872, + "step": 12510 + }, + { + "epoch": 0.8009730491005698, + "eval_loss": 0.3655269742012024, + "eval_runtime": 50.6164, + "eval_samples_per_second": 274.338, + "eval_steps_per_second": 34.297, + "num_input_tokens_seen": 39382144, + "step": 12512 + }, + { + "epoch": 0.8011650982651559, + "grad_norm": 20.289682388305664, + "learning_rate": 2.315191071805892e-07, + "loss": 0.3043, + "num_input_tokens_seen": 39392320, + "step": 12515 + }, + { + "epoch": 0.8014851802061328, + "grad_norm": 68.8118667602539, + "learning_rate": 2.3080461356097937e-07, + "loss": 0.3619, + "num_input_tokens_seen": 39407680, + "step": 12520 + }, + { + "epoch": 0.8018052621471097, + "grad_norm": 20.164321899414062, + "learning_rate": 2.30091080290841e-07, + "loss": 0.2951, + "num_input_tokens_seen": 39424512, + "step": 12525 + }, + { + "epoch": 0.8021253440880866, + "grad_norm": 48.468223571777344, + "learning_rate": 2.29378508261029e-07, + "loss": 0.3417, + "num_input_tokens_seen": 39439296, + "step": 12530 + }, + { + "epoch": 0.8024454260290634, + "grad_norm": 51.00064468383789, + "learning_rate": 2.2866689836119702e-07, + "loss": 0.3672, + "num_input_tokens_seen": 39456576, + "step": 12535 + }, + { + "epoch": 0.8027655079700403, + "grad_norm": 64.20645141601562, + "learning_rate": 2.2795625147979913e-07, + "loss": 0.3553, + "num_input_tokens_seen": 39472512, + "step": 12540 + }, + { + "epoch": 0.8030855899110172, + "grad_norm": 22.278350830078125, + "learning_rate": 2.2724656850408597e-07, + "loss": 0.2351, + "num_input_tokens_seen": 39488192, + "step": 12545 + }, + { + "epoch": 0.8034056718519941, + "grad_norm": 44.79075622558594, + "learning_rate": 2.2653785032010532e-07, + "loss": 0.3808, + "num_input_tokens_seen": 39503552, + "step": 12550 + }, + { + "epoch": 0.803725753792971, + "grad_norm": 40.77724838256836, + "learning_rate": 2.258300978126999e-07, + "loss": 0.3368, + "num_input_tokens_seen": 39519744, + "step": 12555 + }, + { + "epoch": 0.804045835733948, + "grad_norm": 23.474609375, + "learning_rate": 2.2512331186550715e-07, + "loss": 0.4903, + "num_input_tokens_seen": 39535232, + "step": 12560 + }, + { + "epoch": 0.8043659176749248, + "grad_norm": 45.52729415893555, + "learning_rate": 2.244174933609575e-07, + "loss": 0.3867, + "num_input_tokens_seen": 39549568, + "step": 12565 + }, + { + "epoch": 0.8046859996159017, + "grad_norm": 27.22245216369629, + "learning_rate": 2.2371264318027383e-07, + "loss": 0.2726, + "num_input_tokens_seen": 39566016, + "step": 12570 + }, + { + "epoch": 0.8050060815568786, + "grad_norm": 28.36591339111328, + "learning_rate": 2.2300876220346975e-07, + "loss": 0.2337, + "num_input_tokens_seen": 39581760, + "step": 12575 + }, + { + "epoch": 0.8053261634978555, + "grad_norm": 38.8742561340332, + "learning_rate": 2.2230585130934897e-07, + "loss": 0.2888, + "num_input_tokens_seen": 39597888, + "step": 12580 + }, + { + "epoch": 0.8056462454388323, + "grad_norm": 25.22014045715332, + "learning_rate": 2.2160391137550394e-07, + "loss": 0.4469, + "num_input_tokens_seen": 39613568, + "step": 12585 + }, + { + "epoch": 0.8059663273798092, + "grad_norm": 59.23908996582031, + "learning_rate": 2.2090294327831494e-07, + "loss": 0.4226, + "num_input_tokens_seen": 39628096, + "step": 12590 + }, + { + "epoch": 0.8062864093207861, + "grad_norm": 41.97724914550781, + "learning_rate": 2.202029478929488e-07, + "loss": 0.2881, + "num_input_tokens_seen": 39642560, + "step": 12595 + }, + { + "epoch": 0.806606491261763, + "grad_norm": 18.373050689697266, + "learning_rate": 2.195039260933581e-07, + "loss": 0.2958, + "num_input_tokens_seen": 39658112, + "step": 12600 + }, + { + "epoch": 0.8069265732027399, + "grad_norm": 31.841543197631836, + "learning_rate": 2.1880587875227973e-07, + "loss": 0.2724, + "num_input_tokens_seen": 39674112, + "step": 12605 + }, + { + "epoch": 0.8072466551437167, + "grad_norm": 28.09347915649414, + "learning_rate": 2.18108806741234e-07, + "loss": 0.3308, + "num_input_tokens_seen": 39690432, + "step": 12610 + }, + { + "epoch": 0.8075667370846937, + "grad_norm": 23.086257934570312, + "learning_rate": 2.1741271093052315e-07, + "loss": 0.3547, + "num_input_tokens_seen": 39705792, + "step": 12615 + }, + { + "epoch": 0.8078868190256706, + "grad_norm": 46.950721740722656, + "learning_rate": 2.167175921892318e-07, + "loss": 0.4658, + "num_input_tokens_seen": 39722048, + "step": 12620 + }, + { + "epoch": 0.8082069009666475, + "grad_norm": 28.440935134887695, + "learning_rate": 2.1602345138522314e-07, + "loss": 0.4219, + "num_input_tokens_seen": 39738304, + "step": 12625 + }, + { + "epoch": 0.8085269829076244, + "grad_norm": 31.971548080444336, + "learning_rate": 2.1533028938514008e-07, + "loss": 0.3551, + "num_input_tokens_seen": 39753728, + "step": 12630 + }, + { + "epoch": 0.8088470648486012, + "grad_norm": 43.10588836669922, + "learning_rate": 2.1463810705440433e-07, + "loss": 0.3441, + "num_input_tokens_seen": 39769600, + "step": 12635 + }, + { + "epoch": 0.8091671467895781, + "grad_norm": 35.64780044555664, + "learning_rate": 2.139469052572127e-07, + "loss": 0.3571, + "num_input_tokens_seen": 39784000, + "step": 12640 + }, + { + "epoch": 0.809487228730555, + "grad_norm": 46.72938537597656, + "learning_rate": 2.1325668485653891e-07, + "loss": 0.3587, + "num_input_tokens_seen": 39800320, + "step": 12645 + }, + { + "epoch": 0.8098073106715319, + "grad_norm": 29.774227142333984, + "learning_rate": 2.1256744671413173e-07, + "loss": 0.4617, + "num_input_tokens_seen": 39815360, + "step": 12650 + }, + { + "epoch": 0.8101273926125088, + "grad_norm": 32.227561950683594, + "learning_rate": 2.1187919169051316e-07, + "loss": 0.3819, + "num_input_tokens_seen": 39829952, + "step": 12655 + }, + { + "epoch": 0.8104474745534856, + "grad_norm": 31.437002182006836, + "learning_rate": 2.111919206449767e-07, + "loss": 0.3505, + "num_input_tokens_seen": 39845376, + "step": 12660 + }, + { + "epoch": 0.8107675564944626, + "grad_norm": 27.419315338134766, + "learning_rate": 2.1050563443558922e-07, + "loss": 0.4955, + "num_input_tokens_seen": 39861696, + "step": 12665 + }, + { + "epoch": 0.8110876384354395, + "grad_norm": 41.58053970336914, + "learning_rate": 2.0982033391918697e-07, + "loss": 0.3, + "num_input_tokens_seen": 39877440, + "step": 12670 + }, + { + "epoch": 0.8114077203764164, + "grad_norm": 58.020626068115234, + "learning_rate": 2.0913601995137543e-07, + "loss": 0.3292, + "num_input_tokens_seen": 39893760, + "step": 12675 + }, + { + "epoch": 0.8117278023173933, + "grad_norm": 15.215536117553711, + "learning_rate": 2.084526933865287e-07, + "loss": 0.2889, + "num_input_tokens_seen": 39909568, + "step": 12680 + }, + { + "epoch": 0.8120478842583702, + "grad_norm": 30.34135627746582, + "learning_rate": 2.0777035507778817e-07, + "loss": 0.4667, + "num_input_tokens_seen": 39923648, + "step": 12685 + }, + { + "epoch": 0.812367966199347, + "grad_norm": 18.02565574645996, + "learning_rate": 2.0708900587706135e-07, + "loss": 0.4268, + "num_input_tokens_seen": 39939008, + "step": 12690 + }, + { + "epoch": 0.8126880481403239, + "grad_norm": 44.681556701660156, + "learning_rate": 2.0640864663502e-07, + "loss": 0.3356, + "num_input_tokens_seen": 39955072, + "step": 12695 + }, + { + "epoch": 0.8130081300813008, + "grad_norm": 30.91301727294922, + "learning_rate": 2.057292782011013e-07, + "loss": 0.4563, + "num_input_tokens_seen": 39970880, + "step": 12700 + }, + { + "epoch": 0.8133282120222777, + "grad_norm": 25.404428482055664, + "learning_rate": 2.0505090142350468e-07, + "loss": 0.3045, + "num_input_tokens_seen": 39986240, + "step": 12705 + }, + { + "epoch": 0.8136482939632546, + "grad_norm": 28.93308448791504, + "learning_rate": 2.0437351714919127e-07, + "loss": 0.3426, + "num_input_tokens_seen": 40001856, + "step": 12710 + }, + { + "epoch": 0.8139683759042314, + "grad_norm": 20.265243530273438, + "learning_rate": 2.0369712622388336e-07, + "loss": 0.3084, + "num_input_tokens_seen": 40018112, + "step": 12715 + }, + { + "epoch": 0.8142884578452084, + "grad_norm": 41.139366149902344, + "learning_rate": 2.0302172949206298e-07, + "loss": 0.2869, + "num_input_tokens_seen": 40033664, + "step": 12720 + }, + { + "epoch": 0.8146085397861853, + "grad_norm": 60.38472366333008, + "learning_rate": 2.0234732779697094e-07, + "loss": 0.3069, + "num_input_tokens_seen": 40048768, + "step": 12725 + }, + { + "epoch": 0.8149286217271622, + "grad_norm": 42.53269577026367, + "learning_rate": 2.016739219806056e-07, + "loss": 0.3267, + "num_input_tokens_seen": 40063232, + "step": 12730 + }, + { + "epoch": 0.8152487036681391, + "grad_norm": 20.74918556213379, + "learning_rate": 2.0100151288372215e-07, + "loss": 0.3839, + "num_input_tokens_seen": 40079296, + "step": 12735 + }, + { + "epoch": 0.8155687856091159, + "grad_norm": 59.69536209106445, + "learning_rate": 2.0033010134583084e-07, + "loss": 0.5609, + "num_input_tokens_seen": 40094976, + "step": 12740 + }, + { + "epoch": 0.8158888675500928, + "grad_norm": 32.35287857055664, + "learning_rate": 1.9965968820519763e-07, + "loss": 0.314, + "num_input_tokens_seen": 40110464, + "step": 12745 + }, + { + "epoch": 0.8162089494910697, + "grad_norm": 47.17245864868164, + "learning_rate": 1.9899027429884042e-07, + "loss": 0.4042, + "num_input_tokens_seen": 40125568, + "step": 12750 + }, + { + "epoch": 0.8165290314320466, + "grad_norm": 38.48648452758789, + "learning_rate": 1.983218604625305e-07, + "loss": 0.4302, + "num_input_tokens_seen": 40141440, + "step": 12755 + }, + { + "epoch": 0.8168491133730235, + "grad_norm": 16.031692504882812, + "learning_rate": 1.9765444753079096e-07, + "loss": 0.3288, + "num_input_tokens_seen": 40156416, + "step": 12760 + }, + { + "epoch": 0.8171691953140003, + "grad_norm": 32.27566909790039, + "learning_rate": 1.9698803633689408e-07, + "loss": 0.3985, + "num_input_tokens_seen": 40172928, + "step": 12765 + }, + { + "epoch": 0.8174892772549772, + "grad_norm": 22.818599700927734, + "learning_rate": 1.963226277128619e-07, + "loss": 0.2404, + "num_input_tokens_seen": 40188096, + "step": 12770 + }, + { + "epoch": 0.8178093591959542, + "grad_norm": 30.646739959716797, + "learning_rate": 1.956582224894655e-07, + "loss": 0.3559, + "num_input_tokens_seen": 40204032, + "step": 12775 + }, + { + "epoch": 0.8181294411369311, + "grad_norm": 46.02298355102539, + "learning_rate": 1.949948214962227e-07, + "loss": 0.369, + "num_input_tokens_seen": 40218944, + "step": 12780 + }, + { + "epoch": 0.818449523077908, + "grad_norm": 50.748172760009766, + "learning_rate": 1.943324255613964e-07, + "loss": 0.358, + "num_input_tokens_seen": 40235456, + "step": 12785 + }, + { + "epoch": 0.8187696050188848, + "grad_norm": 25.575429916381836, + "learning_rate": 1.936710355119967e-07, + "loss": 0.4564, + "num_input_tokens_seen": 40250176, + "step": 12790 + }, + { + "epoch": 0.8190896869598617, + "grad_norm": 34.35418701171875, + "learning_rate": 1.9301065217377655e-07, + "loss": 0.3312, + "num_input_tokens_seen": 40265472, + "step": 12795 + }, + { + "epoch": 0.8194097689008386, + "grad_norm": 24.442747116088867, + "learning_rate": 1.9235127637123249e-07, + "loss": 0.3995, + "num_input_tokens_seen": 40281728, + "step": 12800 + }, + { + "epoch": 0.8197298508418155, + "grad_norm": 51.47005081176758, + "learning_rate": 1.9169290892760225e-07, + "loss": 0.3221, + "num_input_tokens_seen": 40296768, + "step": 12805 + }, + { + "epoch": 0.8200499327827924, + "grad_norm": 47.755516052246094, + "learning_rate": 1.91035550664866e-07, + "loss": 0.3295, + "num_input_tokens_seen": 40311488, + "step": 12810 + }, + { + "epoch": 0.8203700147237692, + "grad_norm": 50.54772186279297, + "learning_rate": 1.903792024037433e-07, + "loss": 0.3238, + "num_input_tokens_seen": 40327232, + "step": 12815 + }, + { + "epoch": 0.8206900966647461, + "grad_norm": 33.594635009765625, + "learning_rate": 1.8972386496369185e-07, + "loss": 0.4338, + "num_input_tokens_seen": 40344064, + "step": 12820 + }, + { + "epoch": 0.8210101786057231, + "grad_norm": 40.6557502746582, + "learning_rate": 1.89069539162909e-07, + "loss": 0.3917, + "num_input_tokens_seen": 40359040, + "step": 12825 + }, + { + "epoch": 0.8213302605467, + "grad_norm": 20.805389404296875, + "learning_rate": 1.8841622581832783e-07, + "loss": 0.4034, + "num_input_tokens_seen": 40376384, + "step": 12830 + }, + { + "epoch": 0.8216503424876769, + "grad_norm": 27.590456008911133, + "learning_rate": 1.8776392574561783e-07, + "loss": 0.5928, + "num_input_tokens_seen": 40391936, + "step": 12835 + }, + { + "epoch": 0.8219704244286538, + "grad_norm": 17.456087112426758, + "learning_rate": 1.8711263975918322e-07, + "loss": 0.4702, + "num_input_tokens_seen": 40408832, + "step": 12840 + }, + { + "epoch": 0.8222905063696306, + "grad_norm": 37.28561019897461, + "learning_rate": 1.8646236867216215e-07, + "loss": 0.4516, + "num_input_tokens_seen": 40425280, + "step": 12845 + }, + { + "epoch": 0.8226105883106075, + "grad_norm": 36.03346633911133, + "learning_rate": 1.8581311329642591e-07, + "loss": 0.3451, + "num_input_tokens_seen": 40440832, + "step": 12850 + }, + { + "epoch": 0.8229306702515844, + "grad_norm": 30.598731994628906, + "learning_rate": 1.8516487444257723e-07, + "loss": 0.2711, + "num_input_tokens_seen": 40458624, + "step": 12855 + }, + { + "epoch": 0.8232507521925613, + "grad_norm": 34.196533203125, + "learning_rate": 1.8451765291995004e-07, + "loss": 0.4068, + "num_input_tokens_seen": 40474688, + "step": 12860 + }, + { + "epoch": 0.8235708341335382, + "grad_norm": 36.64088439941406, + "learning_rate": 1.8387144953660806e-07, + "loss": 0.3591, + "num_input_tokens_seen": 40490816, + "step": 12865 + }, + { + "epoch": 0.823890916074515, + "grad_norm": 39.154510498046875, + "learning_rate": 1.832262650993437e-07, + "loss": 0.4492, + "num_input_tokens_seen": 40506112, + "step": 12870 + }, + { + "epoch": 0.8242109980154919, + "grad_norm": 20.44598388671875, + "learning_rate": 1.825821004136774e-07, + "loss": 0.2973, + "num_input_tokens_seen": 40521344, + "step": 12875 + }, + { + "epoch": 0.8245310799564689, + "grad_norm": 30.141361236572266, + "learning_rate": 1.819389562838559e-07, + "loss": 0.2799, + "num_input_tokens_seen": 40537024, + "step": 12880 + }, + { + "epoch": 0.8248511618974458, + "grad_norm": 45.2744026184082, + "learning_rate": 1.8129683351285319e-07, + "loss": 0.3058, + "num_input_tokens_seen": 40552640, + "step": 12885 + }, + { + "epoch": 0.8251712438384227, + "grad_norm": 35.627498626708984, + "learning_rate": 1.8065573290236626e-07, + "loss": 0.3209, + "num_input_tokens_seen": 40568000, + "step": 12890 + }, + { + "epoch": 0.8254913257793995, + "grad_norm": 24.41234588623047, + "learning_rate": 1.8001565525281682e-07, + "loss": 0.3806, + "num_input_tokens_seen": 40584960, + "step": 12895 + }, + { + "epoch": 0.8258114077203764, + "grad_norm": 26.482351303100586, + "learning_rate": 1.793766013633493e-07, + "loss": 0.3707, + "num_input_tokens_seen": 40600704, + "step": 12900 + }, + { + "epoch": 0.8261314896613533, + "grad_norm": 29.43145751953125, + "learning_rate": 1.7873857203183074e-07, + "loss": 0.3865, + "num_input_tokens_seen": 40615872, + "step": 12905 + }, + { + "epoch": 0.8264515716023302, + "grad_norm": 53.48032760620117, + "learning_rate": 1.7810156805484733e-07, + "loss": 0.4632, + "num_input_tokens_seen": 40632640, + "step": 12910 + }, + { + "epoch": 0.8267716535433071, + "grad_norm": 24.980363845825195, + "learning_rate": 1.7746559022770612e-07, + "loss": 0.3007, + "num_input_tokens_seen": 40648064, + "step": 12915 + }, + { + "epoch": 0.8270917354842839, + "grad_norm": 30.67084503173828, + "learning_rate": 1.7683063934443342e-07, + "loss": 0.3833, + "num_input_tokens_seen": 40664704, + "step": 12920 + }, + { + "epoch": 0.8274118174252608, + "grad_norm": 40.46763610839844, + "learning_rate": 1.7619671619777277e-07, + "loss": 0.4074, + "num_input_tokens_seen": 40681024, + "step": 12925 + }, + { + "epoch": 0.8277318993662378, + "grad_norm": 31.0588321685791, + "learning_rate": 1.7556382157918404e-07, + "loss": 0.4121, + "num_input_tokens_seen": 40695936, + "step": 12930 + }, + { + "epoch": 0.8280519813072147, + "grad_norm": 27.82343292236328, + "learning_rate": 1.7493195627884427e-07, + "loss": 0.3177, + "num_input_tokens_seen": 40713472, + "step": 12935 + }, + { + "epoch": 0.8283720632481916, + "grad_norm": 41.197757720947266, + "learning_rate": 1.7430112108564465e-07, + "loss": 0.3141, + "num_input_tokens_seen": 40729344, + "step": 12940 + }, + { + "epoch": 0.8286921451891684, + "grad_norm": 38.33913040161133, + "learning_rate": 1.736713167871896e-07, + "loss": 0.3983, + "num_input_tokens_seen": 40745856, + "step": 12945 + }, + { + "epoch": 0.8290122271301453, + "grad_norm": 19.74918556213379, + "learning_rate": 1.7304254416979803e-07, + "loss": 0.2973, + "num_input_tokens_seen": 40761920, + "step": 12950 + }, + { + "epoch": 0.8293323090711222, + "grad_norm": 19.813852310180664, + "learning_rate": 1.7241480401849963e-07, + "loss": 0.263, + "num_input_tokens_seen": 40776960, + "step": 12955 + }, + { + "epoch": 0.8296523910120991, + "grad_norm": 21.785139083862305, + "learning_rate": 1.7178809711703524e-07, + "loss": 0.3413, + "num_input_tokens_seen": 40792192, + "step": 12960 + }, + { + "epoch": 0.829972472953076, + "grad_norm": 27.10121726989746, + "learning_rate": 1.7116242424785599e-07, + "loss": 0.36, + "num_input_tokens_seen": 40808256, + "step": 12965 + }, + { + "epoch": 0.8302925548940528, + "grad_norm": 42.24668884277344, + "learning_rate": 1.7053778619212166e-07, + "loss": 0.4272, + "num_input_tokens_seen": 40823424, + "step": 12970 + }, + { + "epoch": 0.8306126368350297, + "grad_norm": 39.586917877197266, + "learning_rate": 1.6991418372970022e-07, + "loss": 0.4132, + "num_input_tokens_seen": 40840960, + "step": 12975 + }, + { + "epoch": 0.8309327187760066, + "grad_norm": 28.66804313659668, + "learning_rate": 1.6929161763916666e-07, + "loss": 0.3849, + "num_input_tokens_seen": 40857536, + "step": 12980 + }, + { + "epoch": 0.8312528007169836, + "grad_norm": 26.58046531677246, + "learning_rate": 1.686700886978021e-07, + "loss": 0.3582, + "num_input_tokens_seen": 40874240, + "step": 12985 + }, + { + "epoch": 0.8315728826579605, + "grad_norm": 36.41627883911133, + "learning_rate": 1.6804959768159266e-07, + "loss": 0.3579, + "num_input_tokens_seen": 40888960, + "step": 12990 + }, + { + "epoch": 0.8318929645989374, + "grad_norm": 53.56745529174805, + "learning_rate": 1.674301453652287e-07, + "loss": 0.5373, + "num_input_tokens_seen": 40904512, + "step": 12995 + }, + { + "epoch": 0.8322130465399142, + "grad_norm": 42.032283782958984, + "learning_rate": 1.6681173252210378e-07, + "loss": 0.2969, + "num_input_tokens_seen": 40921856, + "step": 13000 + }, + { + "epoch": 0.8325331284808911, + "grad_norm": 49.92417526245117, + "learning_rate": 1.6619435992431342e-07, + "loss": 0.3801, + "num_input_tokens_seen": 40938752, + "step": 13005 + }, + { + "epoch": 0.832853210421868, + "grad_norm": 48.38226318359375, + "learning_rate": 1.6557802834265466e-07, + "loss": 0.3026, + "num_input_tokens_seen": 40954048, + "step": 13010 + }, + { + "epoch": 0.8331732923628449, + "grad_norm": 26.939504623413086, + "learning_rate": 1.649627385466248e-07, + "loss": 0.3634, + "num_input_tokens_seen": 40972672, + "step": 13015 + }, + { + "epoch": 0.8334933743038218, + "grad_norm": 20.158533096313477, + "learning_rate": 1.643484913044202e-07, + "loss": 0.2467, + "num_input_tokens_seen": 40987648, + "step": 13020 + }, + { + "epoch": 0.8338134562447986, + "grad_norm": 14.96458911895752, + "learning_rate": 1.6373528738293564e-07, + "loss": 0.3171, + "num_input_tokens_seen": 41003328, + "step": 13025 + }, + { + "epoch": 0.8341335381857755, + "grad_norm": 33.328121185302734, + "learning_rate": 1.6312312754776404e-07, + "loss": 0.2939, + "num_input_tokens_seen": 41018624, + "step": 13030 + }, + { + "epoch": 0.8344536201267524, + "grad_norm": 18.032512664794922, + "learning_rate": 1.6251201256319357e-07, + "loss": 0.3318, + "num_input_tokens_seen": 41034624, + "step": 13035 + }, + { + "epoch": 0.8347737020677294, + "grad_norm": 27.04534339904785, + "learning_rate": 1.619019431922083e-07, + "loss": 0.3699, + "num_input_tokens_seen": 41049664, + "step": 13040 + }, + { + "epoch": 0.8350937840087063, + "grad_norm": 34.27926254272461, + "learning_rate": 1.6129292019648754e-07, + "loss": 0.3494, + "num_input_tokens_seen": 41066368, + "step": 13045 + }, + { + "epoch": 0.8354138659496831, + "grad_norm": 26.79369354248047, + "learning_rate": 1.606849443364038e-07, + "loss": 0.2975, + "num_input_tokens_seen": 41082048, + "step": 13050 + }, + { + "epoch": 0.83573394789066, + "grad_norm": 17.270774841308594, + "learning_rate": 1.6007801637102104e-07, + "loss": 0.3425, + "num_input_tokens_seen": 41098048, + "step": 13055 + }, + { + "epoch": 0.8360540298316369, + "grad_norm": 20.079627990722656, + "learning_rate": 1.594721370580969e-07, + "loss": 0.3858, + "num_input_tokens_seen": 41112768, + "step": 13060 + }, + { + "epoch": 0.8363741117726138, + "grad_norm": 21.468425750732422, + "learning_rate": 1.588673071540788e-07, + "loss": 0.4241, + "num_input_tokens_seen": 41127488, + "step": 13065 + }, + { + "epoch": 0.8366941937135907, + "grad_norm": 35.24139404296875, + "learning_rate": 1.5826352741410332e-07, + "loss": 0.3195, + "num_input_tokens_seen": 41142272, + "step": 13070 + }, + { + "epoch": 0.8370142756545675, + "grad_norm": 55.96588134765625, + "learning_rate": 1.576607985919971e-07, + "loss": 0.2947, + "num_input_tokens_seen": 41157952, + "step": 13075 + }, + { + "epoch": 0.8373343575955444, + "grad_norm": 38.237064361572266, + "learning_rate": 1.57059121440274e-07, + "loss": 0.3547, + "num_input_tokens_seen": 41172992, + "step": 13080 + }, + { + "epoch": 0.8376544395365213, + "grad_norm": 53.96684646606445, + "learning_rate": 1.56458496710135e-07, + "loss": 0.3823, + "num_input_tokens_seen": 41187776, + "step": 13085 + }, + { + "epoch": 0.8379745214774983, + "grad_norm": 36.738887786865234, + "learning_rate": 1.5585892515146716e-07, + "loss": 0.3403, + "num_input_tokens_seen": 41204416, + "step": 13090 + }, + { + "epoch": 0.8382946034184752, + "grad_norm": 23.33167839050293, + "learning_rate": 1.5526040751284253e-07, + "loss": 0.4214, + "num_input_tokens_seen": 41220032, + "step": 13095 + }, + { + "epoch": 0.838614685359452, + "grad_norm": 37.20791244506836, + "learning_rate": 1.546629445415174e-07, + "loss": 0.3168, + "num_input_tokens_seen": 41235776, + "step": 13100 + }, + { + "epoch": 0.8389347673004289, + "grad_norm": 41.227115631103516, + "learning_rate": 1.5406653698343141e-07, + "loss": 0.3724, + "num_input_tokens_seen": 41252160, + "step": 13105 + }, + { + "epoch": 0.8392548492414058, + "grad_norm": 35.0400276184082, + "learning_rate": 1.5347118558320637e-07, + "loss": 0.3591, + "num_input_tokens_seen": 41269056, + "step": 13110 + }, + { + "epoch": 0.8395749311823827, + "grad_norm": 25.96977996826172, + "learning_rate": 1.5287689108414558e-07, + "loss": 0.3632, + "num_input_tokens_seen": 41285312, + "step": 13115 + }, + { + "epoch": 0.8398950131233596, + "grad_norm": 38.98981857299805, + "learning_rate": 1.5228365422823242e-07, + "loss": 0.3374, + "num_input_tokens_seen": 41300992, + "step": 13120 + }, + { + "epoch": 0.8402150950643364, + "grad_norm": 29.43157958984375, + "learning_rate": 1.5169147575613038e-07, + "loss": 0.2637, + "num_input_tokens_seen": 41317952, + "step": 13125 + }, + { + "epoch": 0.8405351770053133, + "grad_norm": 14.81241226196289, + "learning_rate": 1.5110035640718098e-07, + "loss": 0.297, + "num_input_tokens_seen": 41333440, + "step": 13130 + }, + { + "epoch": 0.8408552589462902, + "grad_norm": 32.161842346191406, + "learning_rate": 1.5051029691940387e-07, + "loss": 0.3665, + "num_input_tokens_seen": 41349312, + "step": 13135 + }, + { + "epoch": 0.8411753408872671, + "grad_norm": 32.124176025390625, + "learning_rate": 1.4992129802949515e-07, + "loss": 0.356, + "num_input_tokens_seen": 41364288, + "step": 13140 + }, + { + "epoch": 0.8414954228282441, + "grad_norm": 24.080873489379883, + "learning_rate": 1.4933336047282696e-07, + "loss": 0.2884, + "num_input_tokens_seen": 41379904, + "step": 13145 + }, + { + "epoch": 0.841815504769221, + "grad_norm": 34.71171951293945, + "learning_rate": 1.4874648498344579e-07, + "loss": 0.3481, + "num_input_tokens_seen": 41394432, + "step": 13150 + }, + { + "epoch": 0.8421355867101978, + "grad_norm": 57.97336196899414, + "learning_rate": 1.4816067229407348e-07, + "loss": 0.3485, + "num_input_tokens_seen": 41409984, + "step": 13155 + }, + { + "epoch": 0.8424556686511747, + "grad_norm": 19.453880310058594, + "learning_rate": 1.4757592313610322e-07, + "loss": 0.3051, + "num_input_tokens_seen": 41425984, + "step": 13160 + }, + { + "epoch": 0.8427757505921516, + "grad_norm": 19.217065811157227, + "learning_rate": 1.4699223823960128e-07, + "loss": 0.3312, + "num_input_tokens_seen": 41441920, + "step": 13165 + }, + { + "epoch": 0.8430958325331285, + "grad_norm": 39.03798294067383, + "learning_rate": 1.4640961833330579e-07, + "loss": 0.3389, + "num_input_tokens_seen": 41457664, + "step": 13170 + }, + { + "epoch": 0.8434159144741054, + "grad_norm": 18.345247268676758, + "learning_rate": 1.4582806414462378e-07, + "loss": 0.2518, + "num_input_tokens_seen": 41472832, + "step": 13175 + }, + { + "epoch": 0.8437359964150822, + "grad_norm": 24.834247589111328, + "learning_rate": 1.4524757639963258e-07, + "loss": 0.33, + "num_input_tokens_seen": 41490368, + "step": 13180 + }, + { + "epoch": 0.8440560783560591, + "grad_norm": 46.54368209838867, + "learning_rate": 1.4466815582307845e-07, + "loss": 0.4397, + "num_input_tokens_seen": 41506624, + "step": 13185 + }, + { + "epoch": 0.844376160297036, + "grad_norm": 10.444628715515137, + "learning_rate": 1.440898031383746e-07, + "loss": 0.251, + "num_input_tokens_seen": 41523264, + "step": 13190 + }, + { + "epoch": 0.844696242238013, + "grad_norm": 41.213134765625, + "learning_rate": 1.4351251906760064e-07, + "loss": 0.3803, + "num_input_tokens_seen": 41538944, + "step": 13195 + }, + { + "epoch": 0.8450163241789899, + "grad_norm": 39.923397064208984, + "learning_rate": 1.4293630433150317e-07, + "loss": 0.3939, + "num_input_tokens_seen": 41554880, + "step": 13200 + }, + { + "epoch": 0.8453364061199667, + "grad_norm": 47.86785125732422, + "learning_rate": 1.423611596494927e-07, + "loss": 0.4367, + "num_input_tokens_seen": 41569280, + "step": 13205 + }, + { + "epoch": 0.8456564880609436, + "grad_norm": 20.079484939575195, + "learning_rate": 1.4178708573964438e-07, + "loss": 0.3546, + "num_input_tokens_seen": 41584576, + "step": 13210 + }, + { + "epoch": 0.8459765700019205, + "grad_norm": 17.954130172729492, + "learning_rate": 1.4121408331869566e-07, + "loss": 0.3589, + "num_input_tokens_seen": 41600000, + "step": 13215 + }, + { + "epoch": 0.8462966519428974, + "grad_norm": 38.43409729003906, + "learning_rate": 1.406421531020474e-07, + "loss": 0.3603, + "num_input_tokens_seen": 41615040, + "step": 13220 + }, + { + "epoch": 0.8466167338838743, + "grad_norm": 87.97344207763672, + "learning_rate": 1.4007129580376097e-07, + "loss": 0.3551, + "num_input_tokens_seen": 41630208, + "step": 13225 + }, + { + "epoch": 0.8469368158248511, + "grad_norm": 38.874149322509766, + "learning_rate": 1.3950151213655847e-07, + "loss": 0.3672, + "num_input_tokens_seen": 41645440, + "step": 13230 + }, + { + "epoch": 0.847256897765828, + "grad_norm": 45.345767974853516, + "learning_rate": 1.389328028118214e-07, + "loss": 0.3281, + "num_input_tokens_seen": 41661184, + "step": 13235 + }, + { + "epoch": 0.8475769797068049, + "grad_norm": 31.71895980834961, + "learning_rate": 1.3836516853959e-07, + "loss": 0.358, + "num_input_tokens_seen": 41676224, + "step": 13240 + }, + { + "epoch": 0.8478970616477818, + "grad_norm": 20.565414428710938, + "learning_rate": 1.3779861002856242e-07, + "loss": 0.308, + "num_input_tokens_seen": 41690816, + "step": 13245 + }, + { + "epoch": 0.8482171435887588, + "grad_norm": 18.975496292114258, + "learning_rate": 1.3723312798609366e-07, + "loss": 0.3357, + "num_input_tokens_seen": 41706688, + "step": 13250 + }, + { + "epoch": 0.8485372255297357, + "grad_norm": 27.02278709411621, + "learning_rate": 1.3666872311819455e-07, + "loss": 0.349, + "num_input_tokens_seen": 41721920, + "step": 13255 + }, + { + "epoch": 0.8488573074707125, + "grad_norm": 22.8565731048584, + "learning_rate": 1.361053961295312e-07, + "loss": 0.285, + "num_input_tokens_seen": 41738112, + "step": 13260 + }, + { + "epoch": 0.8491773894116894, + "grad_norm": 48.835845947265625, + "learning_rate": 1.3554314772342412e-07, + "loss": 0.3445, + "num_input_tokens_seen": 41753792, + "step": 13265 + }, + { + "epoch": 0.8494974713526663, + "grad_norm": 22.680404663085938, + "learning_rate": 1.349819786018469e-07, + "loss": 0.3294, + "num_input_tokens_seen": 41771328, + "step": 13270 + }, + { + "epoch": 0.8498175532936432, + "grad_norm": 38.834434509277344, + "learning_rate": 1.3442188946542566e-07, + "loss": 0.3734, + "num_input_tokens_seen": 41787712, + "step": 13275 + }, + { + "epoch": 0.85013763523462, + "grad_norm": 26.3136043548584, + "learning_rate": 1.338628810134388e-07, + "loss": 0.3099, + "num_input_tokens_seen": 41803072, + "step": 13280 + }, + { + "epoch": 0.8504577171755969, + "grad_norm": 39.59318923950195, + "learning_rate": 1.3330495394381435e-07, + "loss": 0.3624, + "num_input_tokens_seen": 41818688, + "step": 13285 + }, + { + "epoch": 0.8507777991165738, + "grad_norm": 17.34198760986328, + "learning_rate": 1.3274810895313083e-07, + "loss": 0.2868, + "num_input_tokens_seen": 41833792, + "step": 13290 + }, + { + "epoch": 0.8510338646693554, + "eval_loss": 0.35909759998321533, + "eval_runtime": 50.7775, + "eval_samples_per_second": 273.468, + "eval_steps_per_second": 34.188, + "num_input_tokens_seen": 41847872, + "step": 13294 + }, + { + "epoch": 0.8510978810575507, + "grad_norm": 26.860233306884766, + "learning_rate": 1.321923467366164e-07, + "loss": 0.3846, + "num_input_tokens_seen": 41850880, + "step": 13295 + }, + { + "epoch": 0.8514179629985277, + "grad_norm": 15.31477165222168, + "learning_rate": 1.3163766798814603e-07, + "loss": 0.183, + "num_input_tokens_seen": 41866560, + "step": 13300 + }, + { + "epoch": 0.8517380449395046, + "grad_norm": 53.74724197387695, + "learning_rate": 1.3108407340024264e-07, + "loss": 0.3041, + "num_input_tokens_seen": 41882240, + "step": 13305 + }, + { + "epoch": 0.8520581268804814, + "grad_norm": 39.340850830078125, + "learning_rate": 1.3053156366407613e-07, + "loss": 0.3421, + "num_input_tokens_seen": 41898880, + "step": 13310 + }, + { + "epoch": 0.8523782088214583, + "grad_norm": 19.917110443115234, + "learning_rate": 1.2998013946946119e-07, + "loss": 0.2428, + "num_input_tokens_seen": 41915968, + "step": 13315 + }, + { + "epoch": 0.8526982907624352, + "grad_norm": 37.523658752441406, + "learning_rate": 1.2942980150485706e-07, + "loss": 0.3499, + "num_input_tokens_seen": 41930816, + "step": 13320 + }, + { + "epoch": 0.8530183727034121, + "grad_norm": 51.89887237548828, + "learning_rate": 1.2888055045736723e-07, + "loss": 0.3192, + "num_input_tokens_seen": 41947200, + "step": 13325 + }, + { + "epoch": 0.853338454644389, + "grad_norm": 22.378541946411133, + "learning_rate": 1.283323870127384e-07, + "loss": 0.301, + "num_input_tokens_seen": 41962240, + "step": 13330 + }, + { + "epoch": 0.8536585365853658, + "grad_norm": 24.78726577758789, + "learning_rate": 1.2778531185535911e-07, + "loss": 0.3015, + "num_input_tokens_seen": 41978752, + "step": 13335 + }, + { + "epoch": 0.8539786185263427, + "grad_norm": 21.3166446685791, + "learning_rate": 1.2723932566825844e-07, + "loss": 0.3288, + "num_input_tokens_seen": 41994112, + "step": 13340 + }, + { + "epoch": 0.8542987004673196, + "grad_norm": 16.688308715820312, + "learning_rate": 1.2669442913310723e-07, + "loss": 0.294, + "num_input_tokens_seen": 42010432, + "step": 13345 + }, + { + "epoch": 0.8546187824082965, + "grad_norm": 28.67268943786621, + "learning_rate": 1.2615062293021506e-07, + "loss": 0.2745, + "num_input_tokens_seen": 42025984, + "step": 13350 + }, + { + "epoch": 0.8549388643492735, + "grad_norm": 38.47801971435547, + "learning_rate": 1.2560790773853025e-07, + "loss": 0.3147, + "num_input_tokens_seen": 42040832, + "step": 13355 + }, + { + "epoch": 0.8552589462902503, + "grad_norm": 27.922290802001953, + "learning_rate": 1.2506628423563915e-07, + "loss": 0.4083, + "num_input_tokens_seen": 42057536, + "step": 13360 + }, + { + "epoch": 0.8555790282312272, + "grad_norm": 31.082223892211914, + "learning_rate": 1.2452575309776493e-07, + "loss": 0.2828, + "num_input_tokens_seen": 42073152, + "step": 13365 + }, + { + "epoch": 0.8558991101722041, + "grad_norm": 47.76811599731445, + "learning_rate": 1.2398631499976732e-07, + "loss": 0.3032, + "num_input_tokens_seen": 42088512, + "step": 13370 + }, + { + "epoch": 0.856219192113181, + "grad_norm": 23.319290161132812, + "learning_rate": 1.234479706151409e-07, + "loss": 0.4253, + "num_input_tokens_seen": 42103552, + "step": 13375 + }, + { + "epoch": 0.8565392740541579, + "grad_norm": 23.617815017700195, + "learning_rate": 1.2291072061601503e-07, + "loss": 0.3577, + "num_input_tokens_seen": 42119872, + "step": 13380 + }, + { + "epoch": 0.8568593559951347, + "grad_norm": 39.64377975463867, + "learning_rate": 1.2237456567315264e-07, + "loss": 0.4374, + "num_input_tokens_seen": 42136832, + "step": 13385 + }, + { + "epoch": 0.8571794379361116, + "grad_norm": 24.50478172302246, + "learning_rate": 1.2183950645594944e-07, + "loss": 0.3158, + "num_input_tokens_seen": 42152896, + "step": 13390 + }, + { + "epoch": 0.8574995198770885, + "grad_norm": 49.653892517089844, + "learning_rate": 1.2130554363243318e-07, + "loss": 0.3555, + "num_input_tokens_seen": 42168064, + "step": 13395 + }, + { + "epoch": 0.8578196018180654, + "grad_norm": 20.96121597290039, + "learning_rate": 1.207726778692625e-07, + "loss": 0.3738, + "num_input_tokens_seen": 42182784, + "step": 13400 + }, + { + "epoch": 0.8581396837590423, + "grad_norm": 23.166378021240234, + "learning_rate": 1.2024090983172718e-07, + "loss": 0.3228, + "num_input_tokens_seen": 42199744, + "step": 13405 + }, + { + "epoch": 0.8584597657000193, + "grad_norm": 40.654266357421875, + "learning_rate": 1.1971024018374532e-07, + "loss": 0.3631, + "num_input_tokens_seen": 42215040, + "step": 13410 + }, + { + "epoch": 0.8587798476409961, + "grad_norm": 35.028751373291016, + "learning_rate": 1.1918066958786432e-07, + "loss": 0.3079, + "num_input_tokens_seen": 42230144, + "step": 13415 + }, + { + "epoch": 0.859099929581973, + "grad_norm": 69.72550964355469, + "learning_rate": 1.1865219870525922e-07, + "loss": 0.3677, + "num_input_tokens_seen": 42246528, + "step": 13420 + }, + { + "epoch": 0.8594200115229499, + "grad_norm": 21.34828758239746, + "learning_rate": 1.1812482819573222e-07, + "loss": 0.4245, + "num_input_tokens_seen": 42263168, + "step": 13425 + }, + { + "epoch": 0.8597400934639268, + "grad_norm": 32.855438232421875, + "learning_rate": 1.1759855871771163e-07, + "loss": 0.3877, + "num_input_tokens_seen": 42278912, + "step": 13430 + }, + { + "epoch": 0.8600601754049036, + "grad_norm": 48.298091888427734, + "learning_rate": 1.1707339092825075e-07, + "loss": 0.387, + "num_input_tokens_seen": 42294656, + "step": 13435 + }, + { + "epoch": 0.8603802573458805, + "grad_norm": 45.29995346069336, + "learning_rate": 1.1654932548302842e-07, + "loss": 0.3927, + "num_input_tokens_seen": 42311552, + "step": 13440 + }, + { + "epoch": 0.8607003392868574, + "grad_norm": 46.05238723754883, + "learning_rate": 1.1602636303634595e-07, + "loss": 0.365, + "num_input_tokens_seen": 42327552, + "step": 13445 + }, + { + "epoch": 0.8610204212278343, + "grad_norm": 20.932029724121094, + "learning_rate": 1.1550450424112801e-07, + "loss": 0.3526, + "num_input_tokens_seen": 42343360, + "step": 13450 + }, + { + "epoch": 0.8613405031688112, + "grad_norm": 23.107404708862305, + "learning_rate": 1.1498374974892178e-07, + "loss": 0.3455, + "num_input_tokens_seen": 42360064, + "step": 13455 + }, + { + "epoch": 0.8616605851097882, + "grad_norm": 23.93453025817871, + "learning_rate": 1.144641002098955e-07, + "loss": 0.4371, + "num_input_tokens_seen": 42374976, + "step": 13460 + }, + { + "epoch": 0.861980667050765, + "grad_norm": 45.36137771606445, + "learning_rate": 1.1394555627283697e-07, + "loss": 0.3502, + "num_input_tokens_seen": 42391616, + "step": 13465 + }, + { + "epoch": 0.8623007489917419, + "grad_norm": 59.06273651123047, + "learning_rate": 1.134281185851551e-07, + "loss": 0.3075, + "num_input_tokens_seen": 42406528, + "step": 13470 + }, + { + "epoch": 0.8626208309327188, + "grad_norm": 28.857786178588867, + "learning_rate": 1.1291178779287691e-07, + "loss": 0.2948, + "num_input_tokens_seen": 42424320, + "step": 13475 + }, + { + "epoch": 0.8629409128736957, + "grad_norm": 43.43189239501953, + "learning_rate": 1.1239656454064683e-07, + "loss": 0.3616, + "num_input_tokens_seen": 42440960, + "step": 13480 + }, + { + "epoch": 0.8632609948146726, + "grad_norm": 17.330026626586914, + "learning_rate": 1.1188244947172776e-07, + "loss": 0.2464, + "num_input_tokens_seen": 42456448, + "step": 13485 + }, + { + "epoch": 0.8635810767556494, + "grad_norm": 20.37238311767578, + "learning_rate": 1.1136944322799812e-07, + "loss": 0.3201, + "num_input_tokens_seen": 42472448, + "step": 13490 + }, + { + "epoch": 0.8639011586966263, + "grad_norm": 52.59025192260742, + "learning_rate": 1.1085754644995227e-07, + "loss": 0.3177, + "num_input_tokens_seen": 42487808, + "step": 13495 + }, + { + "epoch": 0.8642212406376032, + "grad_norm": 36.363216400146484, + "learning_rate": 1.1034675977669938e-07, + "loss": 0.3577, + "num_input_tokens_seen": 42503744, + "step": 13500 + }, + { + "epoch": 0.8645413225785801, + "grad_norm": 52.50566482543945, + "learning_rate": 1.0983708384596258e-07, + "loss": 0.6111, + "num_input_tokens_seen": 42520768, + "step": 13505 + }, + { + "epoch": 0.864861404519557, + "grad_norm": 19.887405395507812, + "learning_rate": 1.0932851929407827e-07, + "loss": 0.3703, + "num_input_tokens_seen": 42537408, + "step": 13510 + }, + { + "epoch": 0.8651814864605339, + "grad_norm": 47.518165588378906, + "learning_rate": 1.0882106675599534e-07, + "loss": 0.3583, + "num_input_tokens_seen": 42553728, + "step": 13515 + }, + { + "epoch": 0.8655015684015108, + "grad_norm": 14.914018630981445, + "learning_rate": 1.0831472686527409e-07, + "loss": 0.3226, + "num_input_tokens_seen": 42568896, + "step": 13520 + }, + { + "epoch": 0.8658216503424877, + "grad_norm": 14.661453247070312, + "learning_rate": 1.0780950025408586e-07, + "loss": 0.2985, + "num_input_tokens_seen": 42584000, + "step": 13525 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 65.84357452392578, + "learning_rate": 1.0730538755321217e-07, + "loss": 0.3884, + "num_input_tokens_seen": 42600192, + "step": 13530 + }, + { + "epoch": 0.8664618142244415, + "grad_norm": 21.830345153808594, + "learning_rate": 1.0680238939204334e-07, + "loss": 0.2997, + "num_input_tokens_seen": 42614656, + "step": 13535 + }, + { + "epoch": 0.8667818961654183, + "grad_norm": 44.84056854248047, + "learning_rate": 1.0630050639857879e-07, + "loss": 0.402, + "num_input_tokens_seen": 42629504, + "step": 13540 + }, + { + "epoch": 0.8671019781063952, + "grad_norm": 24.932231903076172, + "learning_rate": 1.0579973919942508e-07, + "loss": 0.3165, + "num_input_tokens_seen": 42644224, + "step": 13545 + }, + { + "epoch": 0.8674220600473721, + "grad_norm": 28.818056106567383, + "learning_rate": 1.0530008841979621e-07, + "loss": 0.2452, + "num_input_tokens_seen": 42659584, + "step": 13550 + }, + { + "epoch": 0.867742141988349, + "grad_norm": 36.8608512878418, + "learning_rate": 1.048015546835117e-07, + "loss": 0.272, + "num_input_tokens_seen": 42675776, + "step": 13555 + }, + { + "epoch": 0.8680622239293259, + "grad_norm": 23.82661247253418, + "learning_rate": 1.0430413861299691e-07, + "loss": 0.388, + "num_input_tokens_seen": 42693184, + "step": 13560 + }, + { + "epoch": 0.8683823058703029, + "grad_norm": 46.35121536254883, + "learning_rate": 1.0380784082928196e-07, + "loss": 0.4564, + "num_input_tokens_seen": 42710784, + "step": 13565 + }, + { + "epoch": 0.8687023878112797, + "grad_norm": 37.92681884765625, + "learning_rate": 1.0331266195200006e-07, + "loss": 0.3905, + "num_input_tokens_seen": 42727040, + "step": 13570 + }, + { + "epoch": 0.8690224697522566, + "grad_norm": 20.17000389099121, + "learning_rate": 1.0281860259938779e-07, + "loss": 0.3189, + "num_input_tokens_seen": 42742208, + "step": 13575 + }, + { + "epoch": 0.8693425516932335, + "grad_norm": 20.845090866088867, + "learning_rate": 1.0232566338828452e-07, + "loss": 0.3634, + "num_input_tokens_seen": 42758464, + "step": 13580 + }, + { + "epoch": 0.8696626336342104, + "grad_norm": 47.362613677978516, + "learning_rate": 1.018338449341305e-07, + "loss": 0.4021, + "num_input_tokens_seen": 42774016, + "step": 13585 + }, + { + "epoch": 0.8699827155751872, + "grad_norm": 20.74382972717285, + "learning_rate": 1.0134314785096632e-07, + "loss": 0.3924, + "num_input_tokens_seen": 42789248, + "step": 13590 + }, + { + "epoch": 0.8703027975161641, + "grad_norm": 17.53841781616211, + "learning_rate": 1.0085357275143359e-07, + "loss": 0.3446, + "num_input_tokens_seen": 42804608, + "step": 13595 + }, + { + "epoch": 0.870622879457141, + "grad_norm": 34.560428619384766, + "learning_rate": 1.0036512024677268e-07, + "loss": 0.495, + "num_input_tokens_seen": 42819584, + "step": 13600 + }, + { + "epoch": 0.8709429613981179, + "grad_norm": 11.315316200256348, + "learning_rate": 9.98777909468217e-08, + "loss": 0.2823, + "num_input_tokens_seen": 42835200, + "step": 13605 + }, + { + "epoch": 0.8712630433390948, + "grad_norm": 48.00859069824219, + "learning_rate": 9.939158546001736e-08, + "loss": 0.4072, + "num_input_tokens_seen": 42852672, + "step": 13610 + }, + { + "epoch": 0.8715831252800716, + "grad_norm": 20.85955047607422, + "learning_rate": 9.890650439339299e-08, + "loss": 0.3252, + "num_input_tokens_seen": 42868672, + "step": 13615 + }, + { + "epoch": 0.8719032072210486, + "grad_norm": 58.062744140625, + "learning_rate": 9.842254835257791e-08, + "loss": 0.412, + "num_input_tokens_seen": 42884096, + "step": 13620 + }, + { + "epoch": 0.8722232891620255, + "grad_norm": 33.488006591796875, + "learning_rate": 9.793971794179679e-08, + "loss": 0.374, + "num_input_tokens_seen": 42898752, + "step": 13625 + }, + { + "epoch": 0.8725433711030024, + "grad_norm": 28.78290557861328, + "learning_rate": 9.745801376386931e-08, + "loss": 0.3535, + "num_input_tokens_seen": 42914688, + "step": 13630 + }, + { + "epoch": 0.8728634530439793, + "grad_norm": 47.25395965576172, + "learning_rate": 9.697743642020861e-08, + "loss": 0.3186, + "num_input_tokens_seen": 42930688, + "step": 13635 + }, + { + "epoch": 0.8731835349849562, + "grad_norm": 37.96920394897461, + "learning_rate": 9.649798651082119e-08, + "loss": 0.3329, + "num_input_tokens_seen": 42947008, + "step": 13640 + }, + { + "epoch": 0.873503616925933, + "grad_norm": 15.890763282775879, + "learning_rate": 9.601966463430588e-08, + "loss": 0.3973, + "num_input_tokens_seen": 42962816, + "step": 13645 + }, + { + "epoch": 0.8738236988669099, + "grad_norm": 16.609865188598633, + "learning_rate": 9.554247138785321e-08, + "loss": 0.3428, + "num_input_tokens_seen": 42977664, + "step": 13650 + }, + { + "epoch": 0.8741437808078868, + "grad_norm": 72.1360092163086, + "learning_rate": 9.506640736724447e-08, + "loss": 0.4653, + "num_input_tokens_seen": 42993472, + "step": 13655 + }, + { + "epoch": 0.8744638627488637, + "grad_norm": 31.26497459411621, + "learning_rate": 9.459147316685123e-08, + "loss": 0.3973, + "num_input_tokens_seen": 43010688, + "step": 13660 + }, + { + "epoch": 0.8747839446898406, + "grad_norm": 42.893550872802734, + "learning_rate": 9.41176693796345e-08, + "loss": 0.3411, + "num_input_tokens_seen": 43027392, + "step": 13665 + }, + { + "epoch": 0.8751040266308175, + "grad_norm": 43.10031509399414, + "learning_rate": 9.364499659714364e-08, + "loss": 0.4175, + "num_input_tokens_seen": 43043008, + "step": 13670 + }, + { + "epoch": 0.8754241085717944, + "grad_norm": 38.63743591308594, + "learning_rate": 9.31734554095165e-08, + "loss": 0.3438, + "num_input_tokens_seen": 43059072, + "step": 13675 + }, + { + "epoch": 0.8757441905127713, + "grad_norm": 36.11064529418945, + "learning_rate": 9.270304640547744e-08, + "loss": 0.3456, + "num_input_tokens_seen": 43074624, + "step": 13680 + }, + { + "epoch": 0.8760642724537482, + "grad_norm": 30.64196014404297, + "learning_rate": 9.223377017233768e-08, + "loss": 0.3922, + "num_input_tokens_seen": 43089536, + "step": 13685 + }, + { + "epoch": 0.8763843543947251, + "grad_norm": 29.195018768310547, + "learning_rate": 9.176562729599458e-08, + "loss": 0.361, + "num_input_tokens_seen": 43104512, + "step": 13690 + }, + { + "epoch": 0.8767044363357019, + "grad_norm": 47.08403778076172, + "learning_rate": 9.129861836092944e-08, + "loss": 0.3434, + "num_input_tokens_seen": 43120640, + "step": 13695 + }, + { + "epoch": 0.8770245182766788, + "grad_norm": 22.02703285217285, + "learning_rate": 9.083274395020845e-08, + "loss": 0.4433, + "num_input_tokens_seen": 43136384, + "step": 13700 + }, + { + "epoch": 0.8773446002176557, + "grad_norm": 21.463897705078125, + "learning_rate": 9.036800464548156e-08, + "loss": 0.4021, + "num_input_tokens_seen": 43153216, + "step": 13705 + }, + { + "epoch": 0.8776646821586326, + "grad_norm": 24.22488784790039, + "learning_rate": 8.990440102698138e-08, + "loss": 0.3506, + "num_input_tokens_seen": 43167936, + "step": 13710 + }, + { + "epoch": 0.8779847640996095, + "grad_norm": 39.060020446777344, + "learning_rate": 8.944193367352182e-08, + "loss": 0.2722, + "num_input_tokens_seen": 43183872, + "step": 13715 + }, + { + "epoch": 0.8783048460405863, + "grad_norm": 29.692768096923828, + "learning_rate": 8.898060316249944e-08, + "loss": 0.408, + "num_input_tokens_seen": 43200256, + "step": 13720 + }, + { + "epoch": 0.8786249279815633, + "grad_norm": 49.14101791381836, + "learning_rate": 8.852041006989064e-08, + "loss": 0.3606, + "num_input_tokens_seen": 43217600, + "step": 13725 + }, + { + "epoch": 0.8789450099225402, + "grad_norm": 49.38915252685547, + "learning_rate": 8.80613549702518e-08, + "loss": 0.3858, + "num_input_tokens_seen": 43233344, + "step": 13730 + }, + { + "epoch": 0.8792650918635171, + "grad_norm": 49.839324951171875, + "learning_rate": 8.760343843671824e-08, + "loss": 0.5397, + "num_input_tokens_seen": 43249280, + "step": 13735 + }, + { + "epoch": 0.879585173804494, + "grad_norm": 76.67366790771484, + "learning_rate": 8.714666104100487e-08, + "loss": 0.4595, + "num_input_tokens_seen": 43265024, + "step": 13740 + }, + { + "epoch": 0.8799052557454708, + "grad_norm": 76.83538055419922, + "learning_rate": 8.66910233534034e-08, + "loss": 0.3597, + "num_input_tokens_seen": 43280576, + "step": 13745 + }, + { + "epoch": 0.8802253376864477, + "grad_norm": 31.625341415405273, + "learning_rate": 8.62365259427823e-08, + "loss": 0.3074, + "num_input_tokens_seen": 43296064, + "step": 13750 + }, + { + "epoch": 0.8805454196274246, + "grad_norm": 29.102645874023438, + "learning_rate": 8.578316937658758e-08, + "loss": 0.292, + "num_input_tokens_seen": 43311552, + "step": 13755 + }, + { + "epoch": 0.8808655015684015, + "grad_norm": 20.883947372436523, + "learning_rate": 8.533095422083992e-08, + "loss": 0.3216, + "num_input_tokens_seen": 43326272, + "step": 13760 + }, + { + "epoch": 0.8811855835093784, + "grad_norm": 26.02059555053711, + "learning_rate": 8.487988104013533e-08, + "loss": 0.2926, + "num_input_tokens_seen": 43342592, + "step": 13765 + }, + { + "epoch": 0.8815056654503552, + "grad_norm": 23.774742126464844, + "learning_rate": 8.4429950397644e-08, + "loss": 0.3183, + "num_input_tokens_seen": 43357888, + "step": 13770 + }, + { + "epoch": 0.8818257473913321, + "grad_norm": 23.344446182250977, + "learning_rate": 8.398116285510948e-08, + "loss": 0.272, + "num_input_tokens_seen": 43374272, + "step": 13775 + }, + { + "epoch": 0.8821458293323091, + "grad_norm": 47.986976623535156, + "learning_rate": 8.353351897284844e-08, + "loss": 0.2715, + "num_input_tokens_seen": 43393280, + "step": 13780 + }, + { + "epoch": 0.882465911273286, + "grad_norm": 10.710731506347656, + "learning_rate": 8.308701930974949e-08, + "loss": 0.4713, + "num_input_tokens_seen": 43409600, + "step": 13785 + }, + { + "epoch": 0.8827859932142629, + "grad_norm": 30.01685333251953, + "learning_rate": 8.264166442327269e-08, + "loss": 0.4144, + "num_input_tokens_seen": 43424384, + "step": 13790 + }, + { + "epoch": 0.8831060751552398, + "grad_norm": 44.88009262084961, + "learning_rate": 8.219745486944885e-08, + "loss": 0.2591, + "num_input_tokens_seen": 43440128, + "step": 13795 + }, + { + "epoch": 0.8834261570962166, + "grad_norm": 81.11672973632812, + "learning_rate": 8.175439120287875e-08, + "loss": 0.4706, + "num_input_tokens_seen": 43455168, + "step": 13800 + }, + { + "epoch": 0.8837462390371935, + "grad_norm": 41.99856185913086, + "learning_rate": 8.131247397673269e-08, + "loss": 0.3454, + "num_input_tokens_seen": 43472064, + "step": 13805 + }, + { + "epoch": 0.8840663209781704, + "grad_norm": 84.95421600341797, + "learning_rate": 8.087170374274921e-08, + "loss": 0.4261, + "num_input_tokens_seen": 43488000, + "step": 13810 + }, + { + "epoch": 0.8843864029191473, + "grad_norm": 28.50680923461914, + "learning_rate": 8.043208105123578e-08, + "loss": 0.2942, + "num_input_tokens_seen": 43503488, + "step": 13815 + }, + { + "epoch": 0.8847064848601242, + "grad_norm": 40.26153564453125, + "learning_rate": 7.999360645106579e-08, + "loss": 0.3418, + "num_input_tokens_seen": 43518336, + "step": 13820 + }, + { + "epoch": 0.885026566801101, + "grad_norm": 18.43648910522461, + "learning_rate": 7.955628048968011e-08, + "loss": 0.2716, + "num_input_tokens_seen": 43532800, + "step": 13825 + }, + { + "epoch": 0.885346648742078, + "grad_norm": 31.317567825317383, + "learning_rate": 7.912010371308564e-08, + "loss": 0.2586, + "num_input_tokens_seen": 43547648, + "step": 13830 + }, + { + "epoch": 0.8856667306830549, + "grad_norm": 27.898588180541992, + "learning_rate": 7.868507666585422e-08, + "loss": 0.2934, + "num_input_tokens_seen": 43562688, + "step": 13835 + }, + { + "epoch": 0.8859868126240318, + "grad_norm": 42.73057174682617, + "learning_rate": 7.825119989112172e-08, + "loss": 0.4174, + "num_input_tokens_seen": 43578176, + "step": 13840 + }, + { + "epoch": 0.8863068945650087, + "grad_norm": 33.000614166259766, + "learning_rate": 7.78184739305886e-08, + "loss": 0.2904, + "num_input_tokens_seen": 43593920, + "step": 13845 + }, + { + "epoch": 0.8866269765059855, + "grad_norm": 22.872941970825195, + "learning_rate": 7.73868993245187e-08, + "loss": 0.3606, + "num_input_tokens_seen": 43610944, + "step": 13850 + }, + { + "epoch": 0.8869470584469624, + "grad_norm": 19.526351928710938, + "learning_rate": 7.695647661173754e-08, + "loss": 0.3406, + "num_input_tokens_seen": 43627008, + "step": 13855 + }, + { + "epoch": 0.8872671403879393, + "grad_norm": 46.44065856933594, + "learning_rate": 7.652720632963284e-08, + "loss": 0.3843, + "num_input_tokens_seen": 43642752, + "step": 13860 + }, + { + "epoch": 0.8875872223289162, + "grad_norm": 50.08705520629883, + "learning_rate": 7.609908901415396e-08, + "loss": 0.3506, + "num_input_tokens_seen": 43658496, + "step": 13865 + }, + { + "epoch": 0.8879073042698931, + "grad_norm": 52.07186508178711, + "learning_rate": 7.567212519981047e-08, + "loss": 0.3988, + "num_input_tokens_seen": 43674304, + "step": 13870 + }, + { + "epoch": 0.8882273862108699, + "grad_norm": 19.006641387939453, + "learning_rate": 7.524631541967108e-08, + "loss": 0.3315, + "num_input_tokens_seen": 43689536, + "step": 13875 + }, + { + "epoch": 0.8885474681518468, + "grad_norm": 76.33783721923828, + "learning_rate": 7.482166020536485e-08, + "loss": 0.2984, + "num_input_tokens_seen": 43706496, + "step": 13880 + }, + { + "epoch": 0.8888675500928238, + "grad_norm": 18.04372787475586, + "learning_rate": 7.439816008707877e-08, + "loss": 0.3097, + "num_input_tokens_seen": 43721408, + "step": 13885 + }, + { + "epoch": 0.8891876320338007, + "grad_norm": 19.781387329101562, + "learning_rate": 7.397581559355748e-08, + "loss": 0.3397, + "num_input_tokens_seen": 43737536, + "step": 13890 + }, + { + "epoch": 0.8895077139747776, + "grad_norm": 31.502649307250977, + "learning_rate": 7.355462725210315e-08, + "loss": 0.4171, + "num_input_tokens_seen": 43752640, + "step": 13895 + }, + { + "epoch": 0.8898277959157544, + "grad_norm": 27.26972770690918, + "learning_rate": 7.313459558857438e-08, + "loss": 0.4097, + "num_input_tokens_seen": 43768384, + "step": 13900 + }, + { + "epoch": 0.8901478778567313, + "grad_norm": 28.773365020751953, + "learning_rate": 7.271572112738566e-08, + "loss": 0.3141, + "num_input_tokens_seen": 43784320, + "step": 13905 + }, + { + "epoch": 0.8904679597977082, + "grad_norm": 35.17366409301758, + "learning_rate": 7.229800439150657e-08, + "loss": 0.3635, + "num_input_tokens_seen": 43799232, + "step": 13910 + }, + { + "epoch": 0.8907880417386851, + "grad_norm": 64.01599884033203, + "learning_rate": 7.188144590246148e-08, + "loss": 0.3806, + "num_input_tokens_seen": 43815360, + "step": 13915 + }, + { + "epoch": 0.891108123679662, + "grad_norm": 23.02184295654297, + "learning_rate": 7.146604618032848e-08, + "loss": 0.3317, + "num_input_tokens_seen": 43830336, + "step": 13920 + }, + { + "epoch": 0.8914282056206388, + "grad_norm": 37.91139221191406, + "learning_rate": 7.105180574373904e-08, + "loss": 0.4062, + "num_input_tokens_seen": 43846656, + "step": 13925 + }, + { + "epoch": 0.8917482875616157, + "grad_norm": 22.197053909301758, + "learning_rate": 7.063872510987712e-08, + "loss": 0.3279, + "num_input_tokens_seen": 43862720, + "step": 13930 + }, + { + "epoch": 0.8920683695025927, + "grad_norm": 34.20999526977539, + "learning_rate": 7.022680479447874e-08, + "loss": 0.3541, + "num_input_tokens_seen": 43876800, + "step": 13935 + }, + { + "epoch": 0.8923884514435696, + "grad_norm": 21.582101821899414, + "learning_rate": 6.98160453118316e-08, + "loss": 0.3046, + "num_input_tokens_seen": 43892160, + "step": 13940 + }, + { + "epoch": 0.8927085333845465, + "grad_norm": 39.55995178222656, + "learning_rate": 6.940644717477328e-08, + "loss": 0.3444, + "num_input_tokens_seen": 43908416, + "step": 13945 + }, + { + "epoch": 0.8930286153255234, + "grad_norm": 34.442626953125, + "learning_rate": 6.899801089469204e-08, + "loss": 0.4553, + "num_input_tokens_seen": 43923712, + "step": 13950 + }, + { + "epoch": 0.8933486972665002, + "grad_norm": 21.26590347290039, + "learning_rate": 6.85907369815254e-08, + "loss": 0.3491, + "num_input_tokens_seen": 43939520, + "step": 13955 + }, + { + "epoch": 0.8936687792074771, + "grad_norm": 48.566612243652344, + "learning_rate": 6.81846259437595e-08, + "loss": 0.3771, + "num_input_tokens_seen": 43954688, + "step": 13960 + }, + { + "epoch": 0.893988861148454, + "grad_norm": 53.70441436767578, + "learning_rate": 6.77796782884289e-08, + "loss": 0.3246, + "num_input_tokens_seen": 43969600, + "step": 13965 + }, + { + "epoch": 0.8943089430894309, + "grad_norm": 57.17893981933594, + "learning_rate": 6.737589452111526e-08, + "loss": 0.3885, + "num_input_tokens_seen": 43985472, + "step": 13970 + }, + { + "epoch": 0.8946290250304078, + "grad_norm": 38.82191467285156, + "learning_rate": 6.697327514594786e-08, + "loss": 0.4012, + "num_input_tokens_seen": 44000768, + "step": 13975 + }, + { + "epoch": 0.8949491069713846, + "grad_norm": 41.286277770996094, + "learning_rate": 6.657182066560118e-08, + "loss": 0.4538, + "num_input_tokens_seen": 44017088, + "step": 13980 + }, + { + "epoch": 0.8952691889123615, + "grad_norm": 26.847368240356445, + "learning_rate": 6.617153158129596e-08, + "loss": 0.3715, + "num_input_tokens_seen": 44031488, + "step": 13985 + }, + { + "epoch": 0.8955892708533385, + "grad_norm": 40.50960159301758, + "learning_rate": 6.577240839279807e-08, + "loss": 0.3356, + "num_input_tokens_seen": 44047296, + "step": 13990 + }, + { + "epoch": 0.8959093527943154, + "grad_norm": 33.30146408081055, + "learning_rate": 6.537445159841748e-08, + "loss": 0.3162, + "num_input_tokens_seen": 44063744, + "step": 13995 + }, + { + "epoch": 0.8962294347352923, + "grad_norm": 34.3074836730957, + "learning_rate": 6.497766169500752e-08, + "loss": 0.3898, + "num_input_tokens_seen": 44079168, + "step": 14000 + }, + { + "epoch": 0.8965495166762691, + "grad_norm": 14.267343521118164, + "learning_rate": 6.458203917796546e-08, + "loss": 0.2716, + "num_input_tokens_seen": 44093824, + "step": 14005 + }, + { + "epoch": 0.896869598617246, + "grad_norm": 18.694324493408203, + "learning_rate": 6.418758454123041e-08, + "loss": 0.4511, + "num_input_tokens_seen": 44111296, + "step": 14010 + }, + { + "epoch": 0.8971896805582229, + "grad_norm": 19.927507400512695, + "learning_rate": 6.379429827728377e-08, + "loss": 0.3912, + "num_input_tokens_seen": 44128000, + "step": 14015 + }, + { + "epoch": 0.8975097624991998, + "grad_norm": 18.84311866760254, + "learning_rate": 6.340218087714799e-08, + "loss": 0.3795, + "num_input_tokens_seen": 44143488, + "step": 14020 + }, + { + "epoch": 0.8978298444401767, + "grad_norm": 95.79281616210938, + "learning_rate": 6.301123283038634e-08, + "loss": 0.347, + "num_input_tokens_seen": 44158976, + "step": 14025 + }, + { + "epoch": 0.8981499263811535, + "grad_norm": 20.625757217407227, + "learning_rate": 6.262145462510193e-08, + "loss": 0.3207, + "num_input_tokens_seen": 44175808, + "step": 14030 + }, + { + "epoch": 0.8984700083221304, + "grad_norm": 45.679771423339844, + "learning_rate": 6.223284674793738e-08, + "loss": 0.2917, + "num_input_tokens_seen": 44190336, + "step": 14035 + }, + { + "epoch": 0.8987900902631074, + "grad_norm": 36.569252014160156, + "learning_rate": 6.184540968407437e-08, + "loss": 0.39, + "num_input_tokens_seen": 44205696, + "step": 14040 + }, + { + "epoch": 0.8991101722040843, + "grad_norm": 27.32700538635254, + "learning_rate": 6.145914391723239e-08, + "loss": 0.3515, + "num_input_tokens_seen": 44222016, + "step": 14045 + }, + { + "epoch": 0.8994302541450612, + "grad_norm": 30.203399658203125, + "learning_rate": 6.107404992966902e-08, + "loss": 0.327, + "num_input_tokens_seen": 44238592, + "step": 14050 + }, + { + "epoch": 0.899750336086038, + "grad_norm": 23.242624282836914, + "learning_rate": 6.069012820217856e-08, + "loss": 0.2489, + "num_input_tokens_seen": 44254016, + "step": 14055 + }, + { + "epoch": 0.9000704180270149, + "grad_norm": 30.345199584960938, + "learning_rate": 6.030737921409168e-08, + "loss": 0.3843, + "num_input_tokens_seen": 44269376, + "step": 14060 + }, + { + "epoch": 0.9003904999679918, + "grad_norm": 54.68627166748047, + "learning_rate": 5.992580344327503e-08, + "loss": 0.4579, + "num_input_tokens_seen": 44284672, + "step": 14065 + }, + { + "epoch": 0.9007105819089687, + "grad_norm": 34.02934265136719, + "learning_rate": 5.954540136613051e-08, + "loss": 0.346, + "num_input_tokens_seen": 44300224, + "step": 14070 + }, + { + "epoch": 0.9010306638499456, + "grad_norm": 24.38910675048828, + "learning_rate": 5.916617345759456e-08, + "loss": 0.3511, + "num_input_tokens_seen": 44315264, + "step": 14075 + }, + { + "epoch": 0.901094680238141, + "eval_loss": 0.35641103982925415, + "eval_runtime": 50.6103, + "eval_samples_per_second": 274.371, + "eval_steps_per_second": 34.301, + "num_input_tokens_seen": 44318848, + "step": 14076 + }, + { + "epoch": 0.9013507457909224, + "grad_norm": 41.97049331665039, + "learning_rate": 5.878812019113766e-08, + "loss": 0.4212, + "num_input_tokens_seen": 44330176, + "step": 14080 + }, + { + "epoch": 0.9016708277318993, + "grad_norm": 26.239030838012695, + "learning_rate": 5.84112420387638e-08, + "loss": 0.3065, + "num_input_tokens_seen": 44345152, + "step": 14085 + }, + { + "epoch": 0.9019909096728762, + "grad_norm": 32.693172454833984, + "learning_rate": 5.8035539471009697e-08, + "loss": 0.3625, + "num_input_tokens_seen": 44361152, + "step": 14090 + }, + { + "epoch": 0.9023109916138532, + "grad_norm": 35.59107208251953, + "learning_rate": 5.7661012956944253e-08, + "loss": 0.4095, + "num_input_tokens_seen": 44376128, + "step": 14095 + }, + { + "epoch": 0.9026310735548301, + "grad_norm": 18.91592788696289, + "learning_rate": 5.728766296416876e-08, + "loss": 0.2917, + "num_input_tokens_seen": 44392192, + "step": 14100 + }, + { + "epoch": 0.902951155495807, + "grad_norm": 34.914939880371094, + "learning_rate": 5.6915489958814453e-08, + "loss": 0.4205, + "num_input_tokens_seen": 44407680, + "step": 14105 + }, + { + "epoch": 0.9032712374367838, + "grad_norm": 52.385982513427734, + "learning_rate": 5.654449440554399e-08, + "loss": 0.4106, + "num_input_tokens_seen": 44424384, + "step": 14110 + }, + { + "epoch": 0.9035913193777607, + "grad_norm": 23.303749084472656, + "learning_rate": 5.617467676754972e-08, + "loss": 0.3803, + "num_input_tokens_seen": 44439744, + "step": 14115 + }, + { + "epoch": 0.9039114013187376, + "grad_norm": 23.8924560546875, + "learning_rate": 5.580603750655344e-08, + "loss": 0.296, + "num_input_tokens_seen": 44454272, + "step": 14120 + }, + { + "epoch": 0.9042314832597145, + "grad_norm": 30.840787887573242, + "learning_rate": 5.543857708280497e-08, + "loss": 0.3739, + "num_input_tokens_seen": 44468992, + "step": 14125 + }, + { + "epoch": 0.9045515652006914, + "grad_norm": 41.89210891723633, + "learning_rate": 5.507229595508367e-08, + "loss": 0.4703, + "num_input_tokens_seen": 44484864, + "step": 14130 + }, + { + "epoch": 0.9048716471416682, + "grad_norm": 15.071650505065918, + "learning_rate": 5.4707194580695504e-08, + "loss": 0.2887, + "num_input_tokens_seen": 44499968, + "step": 14135 + }, + { + "epoch": 0.9051917290826451, + "grad_norm": 37.00905990600586, + "learning_rate": 5.4343273415473846e-08, + "loss": 0.4279, + "num_input_tokens_seen": 44517952, + "step": 14140 + }, + { + "epoch": 0.905511811023622, + "grad_norm": 25.922666549682617, + "learning_rate": 5.3980532913778576e-08, + "loss": 0.3413, + "num_input_tokens_seen": 44532928, + "step": 14145 + }, + { + "epoch": 0.905831892964599, + "grad_norm": 32.402870178222656, + "learning_rate": 5.361897352849554e-08, + "loss": 0.3928, + "num_input_tokens_seen": 44548288, + "step": 14150 + }, + { + "epoch": 0.9061519749055759, + "grad_norm": 22.13262176513672, + "learning_rate": 5.325859571103586e-08, + "loss": 0.3204, + "num_input_tokens_seen": 44563712, + "step": 14155 + }, + { + "epoch": 0.9064720568465527, + "grad_norm": 23.106555938720703, + "learning_rate": 5.289939991133508e-08, + "loss": 0.3376, + "num_input_tokens_seen": 44579264, + "step": 14160 + }, + { + "epoch": 0.9067921387875296, + "grad_norm": 12.965365409851074, + "learning_rate": 5.2541386577853895e-08, + "loss": 0.2387, + "num_input_tokens_seen": 44594176, + "step": 14165 + }, + { + "epoch": 0.9071122207285065, + "grad_norm": 21.157468795776367, + "learning_rate": 5.2184556157576e-08, + "loss": 0.2536, + "num_input_tokens_seen": 44609664, + "step": 14170 + }, + { + "epoch": 0.9074323026694834, + "grad_norm": 53.38215255737305, + "learning_rate": 5.1828909096008234e-08, + "loss": 0.3807, + "num_input_tokens_seen": 44626944, + "step": 14175 + }, + { + "epoch": 0.9077523846104603, + "grad_norm": 16.712305068969727, + "learning_rate": 5.14744458371803e-08, + "loss": 0.2294, + "num_input_tokens_seen": 44643520, + "step": 14180 + }, + { + "epoch": 0.9080724665514371, + "grad_norm": 86.15962219238281, + "learning_rate": 5.1121166823643646e-08, + "loss": 0.4922, + "num_input_tokens_seen": 44657984, + "step": 14185 + }, + { + "epoch": 0.908392548492414, + "grad_norm": 30.85004997253418, + "learning_rate": 5.076907249647122e-08, + "loss": 0.3841, + "num_input_tokens_seen": 44673024, + "step": 14190 + }, + { + "epoch": 0.9087126304333909, + "grad_norm": 29.058664321899414, + "learning_rate": 5.0418163295257055e-08, + "loss": 0.4111, + "num_input_tokens_seen": 44687424, + "step": 14195 + }, + { + "epoch": 0.9090327123743679, + "grad_norm": 41.66473388671875, + "learning_rate": 5.006843965811536e-08, + "loss": 0.2901, + "num_input_tokens_seen": 44702976, + "step": 14200 + }, + { + "epoch": 0.9093527943153448, + "grad_norm": 44.971763610839844, + "learning_rate": 4.971990202168008e-08, + "loss": 0.4813, + "num_input_tokens_seen": 44718144, + "step": 14205 + }, + { + "epoch": 0.9096728762563216, + "grad_norm": 27.01129913330078, + "learning_rate": 4.9372550821104697e-08, + "loss": 0.3209, + "num_input_tokens_seen": 44734912, + "step": 14210 + }, + { + "epoch": 0.9099929581972985, + "grad_norm": 24.344900131225586, + "learning_rate": 4.902638649006119e-08, + "loss": 0.3205, + "num_input_tokens_seen": 44749888, + "step": 14215 + }, + { + "epoch": 0.9103130401382754, + "grad_norm": 22.84157943725586, + "learning_rate": 4.868140946073973e-08, + "loss": 0.3289, + "num_input_tokens_seen": 44764544, + "step": 14220 + }, + { + "epoch": 0.9106331220792523, + "grad_norm": 33.33592987060547, + "learning_rate": 4.833762016384857e-08, + "loss": 0.3017, + "num_input_tokens_seen": 44780992, + "step": 14225 + }, + { + "epoch": 0.9109532040202292, + "grad_norm": 54.81636428833008, + "learning_rate": 4.799501902861214e-08, + "loss": 0.3869, + "num_input_tokens_seen": 44796672, + "step": 14230 + }, + { + "epoch": 0.911273285961206, + "grad_norm": 45.477725982666016, + "learning_rate": 4.765360648277217e-08, + "loss": 0.4287, + "num_input_tokens_seen": 44812224, + "step": 14235 + }, + { + "epoch": 0.9115933679021829, + "grad_norm": 39.50046920776367, + "learning_rate": 4.7313382952586465e-08, + "loss": 0.4228, + "num_input_tokens_seen": 44827136, + "step": 14240 + }, + { + "epoch": 0.9119134498431598, + "grad_norm": 17.21167755126953, + "learning_rate": 4.6974348862828027e-08, + "loss": 0.3649, + "num_input_tokens_seen": 44842176, + "step": 14245 + }, + { + "epoch": 0.9122335317841367, + "grad_norm": 48.12306594848633, + "learning_rate": 4.663650463678448e-08, + "loss": 0.4412, + "num_input_tokens_seen": 44858880, + "step": 14250 + }, + { + "epoch": 0.9125536137251137, + "grad_norm": 23.15502166748047, + "learning_rate": 4.629985069625875e-08, + "loss": 0.4434, + "num_input_tokens_seen": 44875328, + "step": 14255 + }, + { + "epoch": 0.9128736956660906, + "grad_norm": 55.39519500732422, + "learning_rate": 4.596438746156728e-08, + "loss": 0.3751, + "num_input_tokens_seen": 44892032, + "step": 14260 + }, + { + "epoch": 0.9131937776070674, + "grad_norm": 32.531982421875, + "learning_rate": 4.563011535153949e-08, + "loss": 0.36, + "num_input_tokens_seen": 44907328, + "step": 14265 + }, + { + "epoch": 0.9135138595480443, + "grad_norm": 28.193361282348633, + "learning_rate": 4.52970347835181e-08, + "loss": 0.2689, + "num_input_tokens_seen": 44922560, + "step": 14270 + }, + { + "epoch": 0.9138339414890212, + "grad_norm": 33.741458892822266, + "learning_rate": 4.496514617335845e-08, + "loss": 0.327, + "num_input_tokens_seen": 44937728, + "step": 14275 + }, + { + "epoch": 0.9141540234299981, + "grad_norm": 43.2702522277832, + "learning_rate": 4.4634449935427197e-08, + "loss": 0.3603, + "num_input_tokens_seen": 44954560, + "step": 14280 + }, + { + "epoch": 0.914474105370975, + "grad_norm": 28.025667190551758, + "learning_rate": 4.430494648260219e-08, + "loss": 0.3096, + "num_input_tokens_seen": 44971520, + "step": 14285 + }, + { + "epoch": 0.9147941873119518, + "grad_norm": 41.917076110839844, + "learning_rate": 4.397663622627279e-08, + "loss": 0.4524, + "num_input_tokens_seen": 44987392, + "step": 14290 + }, + { + "epoch": 0.9151142692529287, + "grad_norm": 26.760129928588867, + "learning_rate": 4.364951957633789e-08, + "loss": 0.3122, + "num_input_tokens_seen": 45002688, + "step": 14295 + }, + { + "epoch": 0.9154343511939056, + "grad_norm": 32.50722122192383, + "learning_rate": 4.332359694120669e-08, + "loss": 0.2953, + "num_input_tokens_seen": 45017792, + "step": 14300 + }, + { + "epoch": 0.9157544331348826, + "grad_norm": 31.373065948486328, + "learning_rate": 4.299886872779734e-08, + "loss": 0.3571, + "num_input_tokens_seen": 45032640, + "step": 14305 + }, + { + "epoch": 0.9160745150758595, + "grad_norm": 37.13477325439453, + "learning_rate": 4.267533534153678e-08, + "loss": 0.2975, + "num_input_tokens_seen": 45048256, + "step": 14310 + }, + { + "epoch": 0.9163945970168363, + "grad_norm": 28.1370792388916, + "learning_rate": 4.2352997186360316e-08, + "loss": 0.3218, + "num_input_tokens_seen": 45064192, + "step": 14315 + }, + { + "epoch": 0.9167146789578132, + "grad_norm": 20.448345184326172, + "learning_rate": 4.203185466471082e-08, + "loss": 0.3243, + "num_input_tokens_seen": 45079488, + "step": 14320 + }, + { + "epoch": 0.9170347608987901, + "grad_norm": 22.20013999938965, + "learning_rate": 4.1711908177538556e-08, + "loss": 0.3984, + "num_input_tokens_seen": 45095616, + "step": 14325 + }, + { + "epoch": 0.917354842839767, + "grad_norm": 43.30317306518555, + "learning_rate": 4.139315812430055e-08, + "loss": 0.378, + "num_input_tokens_seen": 45110592, + "step": 14330 + }, + { + "epoch": 0.9176749247807439, + "grad_norm": 28.17921257019043, + "learning_rate": 4.1075604902959915e-08, + "loss": 0.3863, + "num_input_tokens_seen": 45127168, + "step": 14335 + }, + { + "epoch": 0.9179950067217207, + "grad_norm": 29.114439010620117, + "learning_rate": 4.07592489099855e-08, + "loss": 0.3137, + "num_input_tokens_seen": 45142208, + "step": 14340 + }, + { + "epoch": 0.9183150886626976, + "grad_norm": 39.799137115478516, + "learning_rate": 4.044409054035147e-08, + "loss": 0.3934, + "num_input_tokens_seen": 45157184, + "step": 14345 + }, + { + "epoch": 0.9186351706036745, + "grad_norm": 16.564228057861328, + "learning_rate": 4.0130130187537195e-08, + "loss": 0.3929, + "num_input_tokens_seen": 45174464, + "step": 14350 + }, + { + "epoch": 0.9189552525446514, + "grad_norm": 37.10162353515625, + "learning_rate": 3.981736824352522e-08, + "loss": 0.3149, + "num_input_tokens_seen": 45188992, + "step": 14355 + }, + { + "epoch": 0.9192753344856284, + "grad_norm": 32.673851013183594, + "learning_rate": 3.950580509880286e-08, + "loss": 0.4703, + "num_input_tokens_seen": 45204032, + "step": 14360 + }, + { + "epoch": 0.9195954164266052, + "grad_norm": 42.633636474609375, + "learning_rate": 3.9195441142360066e-08, + "loss": 0.3999, + "num_input_tokens_seen": 45219328, + "step": 14365 + }, + { + "epoch": 0.9199154983675821, + "grad_norm": 24.69942283630371, + "learning_rate": 3.888627676169043e-08, + "loss": 0.321, + "num_input_tokens_seen": 45235584, + "step": 14370 + }, + { + "epoch": 0.920235580308559, + "grad_norm": 32.20174026489258, + "learning_rate": 3.857831234278886e-08, + "loss": 0.3666, + "num_input_tokens_seen": 45250880, + "step": 14375 + }, + { + "epoch": 0.9205556622495359, + "grad_norm": 32.91603469848633, + "learning_rate": 3.827154827015255e-08, + "loss": 0.4145, + "num_input_tokens_seen": 45266752, + "step": 14380 + }, + { + "epoch": 0.9208757441905128, + "grad_norm": 13.686686515808105, + "learning_rate": 3.7965984926780383e-08, + "loss": 0.285, + "num_input_tokens_seen": 45282496, + "step": 14385 + }, + { + "epoch": 0.9211958261314896, + "grad_norm": 42.95164108276367, + "learning_rate": 3.766162269417139e-08, + "loss": 0.3521, + "num_input_tokens_seen": 45297024, + "step": 14390 + }, + { + "epoch": 0.9215159080724665, + "grad_norm": 42.11014938354492, + "learning_rate": 3.73584619523255e-08, + "loss": 0.3723, + "num_input_tokens_seen": 45314176, + "step": 14395 + }, + { + "epoch": 0.9218359900134434, + "grad_norm": 23.218151092529297, + "learning_rate": 3.7056503079742616e-08, + "loss": 0.352, + "num_input_tokens_seen": 45329344, + "step": 14400 + }, + { + "epoch": 0.9221560719544203, + "grad_norm": 28.30218505859375, + "learning_rate": 3.6755746453421945e-08, + "loss": 0.3452, + "num_input_tokens_seen": 45344384, + "step": 14405 + }, + { + "epoch": 0.9224761538953972, + "grad_norm": 15.89623737335205, + "learning_rate": 3.645619244886145e-08, + "loss": 0.2969, + "num_input_tokens_seen": 45360192, + "step": 14410 + }, + { + "epoch": 0.9227962358363742, + "grad_norm": 14.244527816772461, + "learning_rate": 3.615784144005796e-08, + "loss": 0.3147, + "num_input_tokens_seen": 45376000, + "step": 14415 + }, + { + "epoch": 0.923116317777351, + "grad_norm": 32.481868743896484, + "learning_rate": 3.5860693799506184e-08, + "loss": 0.4197, + "num_input_tokens_seen": 45390400, + "step": 14420 + }, + { + "epoch": 0.9234363997183279, + "grad_norm": 30.899581909179688, + "learning_rate": 3.5564749898198466e-08, + "loss": 0.4608, + "num_input_tokens_seen": 45406976, + "step": 14425 + }, + { + "epoch": 0.9237564816593048, + "grad_norm": 34.5185546875, + "learning_rate": 3.527001010562425e-08, + "loss": 0.3533, + "num_input_tokens_seen": 45422080, + "step": 14430 + }, + { + "epoch": 0.9240765636002817, + "grad_norm": 56.734092712402344, + "learning_rate": 3.4976474789769504e-08, + "loss": 0.3585, + "num_input_tokens_seen": 45439296, + "step": 14435 + }, + { + "epoch": 0.9243966455412586, + "grad_norm": 39.2069206237793, + "learning_rate": 3.4684144317116636e-08, + "loss": 0.2994, + "num_input_tokens_seen": 45454208, + "step": 14440 + }, + { + "epoch": 0.9247167274822354, + "grad_norm": 19.383590698242188, + "learning_rate": 3.439301905264369e-08, + "loss": 0.3015, + "num_input_tokens_seen": 45470400, + "step": 14445 + }, + { + "epoch": 0.9250368094232123, + "grad_norm": 43.57049560546875, + "learning_rate": 3.410309935982403e-08, + "loss": 0.324, + "num_input_tokens_seen": 45486528, + "step": 14450 + }, + { + "epoch": 0.9253568913641892, + "grad_norm": 20.486600875854492, + "learning_rate": 3.381438560062555e-08, + "loss": 0.3488, + "num_input_tokens_seen": 45501440, + "step": 14455 + }, + { + "epoch": 0.9256769733051661, + "grad_norm": 40.25934600830078, + "learning_rate": 3.3526878135511025e-08, + "loss": 0.3167, + "num_input_tokens_seen": 45517760, + "step": 14460 + }, + { + "epoch": 0.9259970552461431, + "grad_norm": 53.134132385253906, + "learning_rate": 3.324057732343666e-08, + "loss": 0.3751, + "num_input_tokens_seen": 45533056, + "step": 14465 + }, + { + "epoch": 0.9263171371871199, + "grad_norm": 26.382478713989258, + "learning_rate": 3.295548352185262e-08, + "loss": 0.421, + "num_input_tokens_seen": 45549248, + "step": 14470 + }, + { + "epoch": 0.9266372191280968, + "grad_norm": 35.308876037597656, + "learning_rate": 3.2671597086701753e-08, + "loss": 0.3503, + "num_input_tokens_seen": 45565760, + "step": 14475 + }, + { + "epoch": 0.9269573010690737, + "grad_norm": 23.9242000579834, + "learning_rate": 3.238891837241964e-08, + "loss": 0.3294, + "num_input_tokens_seen": 45581568, + "step": 14480 + }, + { + "epoch": 0.9272773830100506, + "grad_norm": 46.7926139831543, + "learning_rate": 3.210744773193386e-08, + "loss": 0.4179, + "num_input_tokens_seen": 45596928, + "step": 14485 + }, + { + "epoch": 0.9275974649510275, + "grad_norm": 45.16802978515625, + "learning_rate": 3.182718551666386e-08, + "loss": 0.3016, + "num_input_tokens_seen": 45612800, + "step": 14490 + }, + { + "epoch": 0.9279175468920043, + "grad_norm": 76.23828125, + "learning_rate": 3.154813207652063e-08, + "loss": 0.415, + "num_input_tokens_seen": 45627584, + "step": 14495 + }, + { + "epoch": 0.9282376288329812, + "grad_norm": 45.80707931518555, + "learning_rate": 3.1270287759905143e-08, + "loss": 0.3294, + "num_input_tokens_seen": 45643840, + "step": 14500 + }, + { + "epoch": 0.9285577107739581, + "grad_norm": 16.698345184326172, + "learning_rate": 3.0993652913709476e-08, + "loss": 0.2947, + "num_input_tokens_seen": 45659072, + "step": 14505 + }, + { + "epoch": 0.928877792714935, + "grad_norm": 24.177282333374023, + "learning_rate": 3.0718227883315796e-08, + "loss": 0.4243, + "num_input_tokens_seen": 45675328, + "step": 14510 + }, + { + "epoch": 0.9291978746559119, + "grad_norm": 41.06902313232422, + "learning_rate": 3.044401301259503e-08, + "loss": 0.3658, + "num_input_tokens_seen": 45690816, + "step": 14515 + }, + { + "epoch": 0.9295179565968889, + "grad_norm": 16.021848678588867, + "learning_rate": 3.017100864390787e-08, + "loss": 0.3301, + "num_input_tokens_seen": 45706432, + "step": 14520 + }, + { + "epoch": 0.9298380385378657, + "grad_norm": 53.745323181152344, + "learning_rate": 2.9899215118103446e-08, + "loss": 0.3406, + "num_input_tokens_seen": 45721920, + "step": 14525 + }, + { + "epoch": 0.9301581204788426, + "grad_norm": 15.850577354431152, + "learning_rate": 2.9628632774519435e-08, + "loss": 0.3547, + "num_input_tokens_seen": 45738048, + "step": 14530 + }, + { + "epoch": 0.9304782024198195, + "grad_norm": 25.69484519958496, + "learning_rate": 2.9359261950980485e-08, + "loss": 0.3313, + "num_input_tokens_seen": 45753856, + "step": 14535 + }, + { + "epoch": 0.9307982843607964, + "grad_norm": 23.73916244506836, + "learning_rate": 2.90911029837998e-08, + "loss": 0.2998, + "num_input_tokens_seen": 45768704, + "step": 14540 + }, + { + "epoch": 0.9311183663017732, + "grad_norm": 28.18566131591797, + "learning_rate": 2.8824156207776673e-08, + "loss": 0.2851, + "num_input_tokens_seen": 45783936, + "step": 14545 + }, + { + "epoch": 0.9314384482427501, + "grad_norm": 111.7474136352539, + "learning_rate": 2.8558421956197397e-08, + "loss": 0.4491, + "num_input_tokens_seen": 45800320, + "step": 14550 + }, + { + "epoch": 0.931758530183727, + "grad_norm": 39.434696197509766, + "learning_rate": 2.829390056083436e-08, + "loss": 0.3872, + "num_input_tokens_seen": 45816512, + "step": 14555 + }, + { + "epoch": 0.9320786121247039, + "grad_norm": 22.48917579650879, + "learning_rate": 2.8030592351945492e-08, + "loss": 0.3173, + "num_input_tokens_seen": 45831936, + "step": 14560 + }, + { + "epoch": 0.9323986940656808, + "grad_norm": 20.20699691772461, + "learning_rate": 2.776849765827427e-08, + "loss": 0.2995, + "num_input_tokens_seen": 45846784, + "step": 14565 + }, + { + "epoch": 0.9327187760066578, + "grad_norm": 42.71226501464844, + "learning_rate": 2.750761680704905e-08, + "loss": 0.4281, + "num_input_tokens_seen": 45862080, + "step": 14570 + }, + { + "epoch": 0.9330388579476346, + "grad_norm": 40.49309539794922, + "learning_rate": 2.724795012398251e-08, + "loss": 0.3977, + "num_input_tokens_seen": 45878528, + "step": 14575 + }, + { + "epoch": 0.9333589398886115, + "grad_norm": 37.85169219970703, + "learning_rate": 2.6989497933271543e-08, + "loss": 0.3726, + "num_input_tokens_seen": 45894016, + "step": 14580 + }, + { + "epoch": 0.9336790218295884, + "grad_norm": 20.322919845581055, + "learning_rate": 2.673226055759692e-08, + "loss": 0.3228, + "num_input_tokens_seen": 45909504, + "step": 14585 + }, + { + "epoch": 0.9339991037705653, + "grad_norm": 35.590576171875, + "learning_rate": 2.6476238318122402e-08, + "loss": 0.341, + "num_input_tokens_seen": 45925376, + "step": 14590 + }, + { + "epoch": 0.9343191857115422, + "grad_norm": 34.80650329589844, + "learning_rate": 2.6221431534494742e-08, + "loss": 0.3917, + "num_input_tokens_seen": 45940224, + "step": 14595 + }, + { + "epoch": 0.934639267652519, + "grad_norm": 58.117610931396484, + "learning_rate": 2.5967840524843243e-08, + "loss": 0.3508, + "num_input_tokens_seen": 45955072, + "step": 14600 + }, + { + "epoch": 0.9349593495934959, + "grad_norm": 32.32001495361328, + "learning_rate": 2.5715465605779195e-08, + "loss": 0.4243, + "num_input_tokens_seen": 45970240, + "step": 14605 + }, + { + "epoch": 0.9352794315344728, + "grad_norm": 85.06121063232422, + "learning_rate": 2.5464307092395777e-08, + "loss": 0.4145, + "num_input_tokens_seen": 45985856, + "step": 14610 + }, + { + "epoch": 0.9355995134754497, + "grad_norm": 22.371501922607422, + "learning_rate": 2.5214365298267148e-08, + "loss": 0.345, + "num_input_tokens_seen": 46000256, + "step": 14615 + }, + { + "epoch": 0.9359195954164266, + "grad_norm": 26.058761596679688, + "learning_rate": 2.4965640535448917e-08, + "loss": 0.3203, + "num_input_tokens_seen": 46015616, + "step": 14620 + }, + { + "epoch": 0.9362396773574035, + "grad_norm": 32.65026092529297, + "learning_rate": 2.471813311447657e-08, + "loss": 0.3659, + "num_input_tokens_seen": 46031040, + "step": 14625 + }, + { + "epoch": 0.9365597592983804, + "grad_norm": 34.6900634765625, + "learning_rate": 2.4471843344365915e-08, + "loss": 0.3221, + "num_input_tokens_seen": 46046016, + "step": 14630 + }, + { + "epoch": 0.9368798412393573, + "grad_norm": 19.555171966552734, + "learning_rate": 2.42267715326131e-08, + "loss": 0.2701, + "num_input_tokens_seen": 46062528, + "step": 14635 + }, + { + "epoch": 0.9371999231803342, + "grad_norm": 32.25774383544922, + "learning_rate": 2.3982917985192697e-08, + "loss": 0.3421, + "num_input_tokens_seen": 46078144, + "step": 14640 + }, + { + "epoch": 0.9375200051213111, + "grad_norm": 52.68037796020508, + "learning_rate": 2.3740283006558838e-08, + "loss": 0.3982, + "num_input_tokens_seen": 46096896, + "step": 14645 + }, + { + "epoch": 0.9378400870622879, + "grad_norm": 44.6820068359375, + "learning_rate": 2.349886689964431e-08, + "loss": 0.3756, + "num_input_tokens_seen": 46111808, + "step": 14650 + }, + { + "epoch": 0.9381601690032648, + "grad_norm": 36.16184616088867, + "learning_rate": 2.32586699658599e-08, + "loss": 0.2836, + "num_input_tokens_seen": 46127936, + "step": 14655 + }, + { + "epoch": 0.9384802509442417, + "grad_norm": 24.07101058959961, + "learning_rate": 2.3019692505094056e-08, + "loss": 0.3551, + "num_input_tokens_seen": 46142848, + "step": 14660 + }, + { + "epoch": 0.9388003328852186, + "grad_norm": 51.901004791259766, + "learning_rate": 2.2781934815713223e-08, + "loss": 0.5477, + "num_input_tokens_seen": 46158848, + "step": 14665 + }, + { + "epoch": 0.9391204148261955, + "grad_norm": 26.652942657470703, + "learning_rate": 2.254539719456061e-08, + "loss": 0.3611, + "num_input_tokens_seen": 46174912, + "step": 14670 + }, + { + "epoch": 0.9394404967671725, + "grad_norm": 19.459861755371094, + "learning_rate": 2.231007993695633e-08, + "loss": 0.2694, + "num_input_tokens_seen": 46189248, + "step": 14675 + }, + { + "epoch": 0.9397605787081493, + "grad_norm": 20.323457717895508, + "learning_rate": 2.2075983336696357e-08, + "loss": 0.3136, + "num_input_tokens_seen": 46204928, + "step": 14680 + }, + { + "epoch": 0.9400806606491262, + "grad_norm": 43.52647399902344, + "learning_rate": 2.1843107686053353e-08, + "loss": 0.3964, + "num_input_tokens_seen": 46220160, + "step": 14685 + }, + { + "epoch": 0.9404007425901031, + "grad_norm": 26.077720642089844, + "learning_rate": 2.1611453275775405e-08, + "loss": 0.4228, + "num_input_tokens_seen": 46235584, + "step": 14690 + }, + { + "epoch": 0.94072082453108, + "grad_norm": 28.561120986938477, + "learning_rate": 2.138102039508538e-08, + "loss": 0.2719, + "num_input_tokens_seen": 46251904, + "step": 14695 + }, + { + "epoch": 0.9410409064720568, + "grad_norm": 45.0341796875, + "learning_rate": 2.1151809331681703e-08, + "loss": 0.3995, + "num_input_tokens_seen": 46268032, + "step": 14700 + }, + { + "epoch": 0.9413609884130337, + "grad_norm": 54.203208923339844, + "learning_rate": 2.092382037173701e-08, + "loss": 0.3371, + "num_input_tokens_seen": 46283392, + "step": 14705 + }, + { + "epoch": 0.9416810703540106, + "grad_norm": 29.033945083618164, + "learning_rate": 2.0697053799898277e-08, + "loss": 0.3089, + "num_input_tokens_seen": 46298752, + "step": 14710 + }, + { + "epoch": 0.9420011522949875, + "grad_norm": 32.4161491394043, + "learning_rate": 2.0471509899286144e-08, + "loss": 0.3394, + "num_input_tokens_seen": 46314624, + "step": 14715 + }, + { + "epoch": 0.9423212342359644, + "grad_norm": 27.046730041503906, + "learning_rate": 2.0247188951494797e-08, + "loss": 0.3517, + "num_input_tokens_seen": 46331712, + "step": 14720 + }, + { + "epoch": 0.9426413161769412, + "grad_norm": 56.06675720214844, + "learning_rate": 2.0024091236591655e-08, + "loss": 0.5446, + "num_input_tokens_seen": 46347200, + "step": 14725 + }, + { + "epoch": 0.9429613981179182, + "grad_norm": 18.983240127563477, + "learning_rate": 1.98022170331168e-08, + "loss": 0.3148, + "num_input_tokens_seen": 46363008, + "step": 14730 + }, + { + "epoch": 0.9432814800588951, + "grad_norm": 31.674177169799805, + "learning_rate": 1.9581566618082744e-08, + "loss": 0.3808, + "num_input_tokens_seen": 46378816, + "step": 14735 + }, + { + "epoch": 0.943601561999872, + "grad_norm": 54.48043441772461, + "learning_rate": 1.9362140266974025e-08, + "loss": 0.4079, + "num_input_tokens_seen": 46395200, + "step": 14740 + }, + { + "epoch": 0.9439216439408489, + "grad_norm": 51.17926025390625, + "learning_rate": 1.9143938253747383e-08, + "loss": 0.3223, + "num_input_tokens_seen": 46411840, + "step": 14745 + }, + { + "epoch": 0.9442417258818258, + "grad_norm": 25.376482009887695, + "learning_rate": 1.892696085083023e-08, + "loss": 0.4503, + "num_input_tokens_seen": 46427776, + "step": 14750 + }, + { + "epoch": 0.9445618078228026, + "grad_norm": 35.88258361816406, + "learning_rate": 1.8711208329121542e-08, + "loss": 0.3146, + "num_input_tokens_seen": 46444736, + "step": 14755 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 25.610300064086914, + "learning_rate": 1.849668095799084e-08, + "loss": 0.3372, + "num_input_tokens_seen": 46460672, + "step": 14760 + }, + { + "epoch": 0.9452019717047564, + "grad_norm": 31.893014907836914, + "learning_rate": 1.8283379005278098e-08, + "loss": 0.3458, + "num_input_tokens_seen": 46476736, + "step": 14765 + }, + { + "epoch": 0.9455220536457333, + "grad_norm": 15.131741523742676, + "learning_rate": 1.807130273729329e-08, + "loss": 0.3238, + "num_input_tokens_seen": 46492416, + "step": 14770 + }, + { + "epoch": 0.9458421355867102, + "grad_norm": 38.48772430419922, + "learning_rate": 1.7860452418816173e-08, + "loss": 0.3331, + "num_input_tokens_seen": 46507264, + "step": 14775 + }, + { + "epoch": 0.946162217527687, + "grad_norm": 29.807109832763672, + "learning_rate": 1.7650828313095834e-08, + "loss": 0.3365, + "num_input_tokens_seen": 46524224, + "step": 14780 + }, + { + "epoch": 0.946482299468664, + "grad_norm": 16.179826736450195, + "learning_rate": 1.7442430681850362e-08, + "loss": 0.3172, + "num_input_tokens_seen": 46539456, + "step": 14785 + }, + { + "epoch": 0.9468023814096409, + "grad_norm": 40.83128356933594, + "learning_rate": 1.723525978526652e-08, + "loss": 0.402, + "num_input_tokens_seen": 46555136, + "step": 14790 + }, + { + "epoch": 0.9471224633506178, + "grad_norm": 27.168893814086914, + "learning_rate": 1.702931588199996e-08, + "loss": 0.3503, + "num_input_tokens_seen": 46570432, + "step": 14795 + }, + { + "epoch": 0.9474425452915947, + "grad_norm": 31.79697036743164, + "learning_rate": 1.6824599229173897e-08, + "loss": 0.3141, + "num_input_tokens_seen": 46586304, + "step": 14800 + }, + { + "epoch": 0.9477626272325715, + "grad_norm": 34.36116409301758, + "learning_rate": 1.662111008237932e-08, + "loss": 0.2946, + "num_input_tokens_seen": 46602432, + "step": 14805 + }, + { + "epoch": 0.9480827091735484, + "grad_norm": 32.66071701049805, + "learning_rate": 1.6418848695675003e-08, + "loss": 0.3135, + "num_input_tokens_seen": 46617472, + "step": 14810 + }, + { + "epoch": 0.9484027911145253, + "grad_norm": 36.13750457763672, + "learning_rate": 1.6217815321586614e-08, + "loss": 0.3713, + "num_input_tokens_seen": 46632896, + "step": 14815 + }, + { + "epoch": 0.9487228730555022, + "grad_norm": 20.115230560302734, + "learning_rate": 1.6018010211106602e-08, + "loss": 0.3516, + "num_input_tokens_seen": 46649408, + "step": 14820 + }, + { + "epoch": 0.9490429549964791, + "grad_norm": 18.152301788330078, + "learning_rate": 1.58194336136942e-08, + "loss": 0.2899, + "num_input_tokens_seen": 46665344, + "step": 14825 + }, + { + "epoch": 0.9493630369374559, + "grad_norm": 37.48488235473633, + "learning_rate": 1.5622085777274417e-08, + "loss": 0.4377, + "num_input_tokens_seen": 46680704, + "step": 14830 + }, + { + "epoch": 0.9496831188784329, + "grad_norm": 39.530426025390625, + "learning_rate": 1.542596694823839e-08, + "loss": 0.3267, + "num_input_tokens_seen": 46695936, + "step": 14835 + }, + { + "epoch": 0.9500032008194098, + "grad_norm": 56.298133850097656, + "learning_rate": 1.5231077371442914e-08, + "loss": 0.4208, + "num_input_tokens_seen": 46711680, + "step": 14840 + }, + { + "epoch": 0.9503232827603867, + "grad_norm": 24.305173873901367, + "learning_rate": 1.5037417290209685e-08, + "loss": 0.2846, + "num_input_tokens_seen": 46727040, + "step": 14845 + }, + { + "epoch": 0.9506433647013636, + "grad_norm": 38.6282844543457, + "learning_rate": 1.4844986946325743e-08, + "loss": 0.3933, + "num_input_tokens_seen": 46742720, + "step": 14850 + }, + { + "epoch": 0.9509634466423404, + "grad_norm": 26.332656860351562, + "learning_rate": 1.4653786580042681e-08, + "loss": 0.2686, + "num_input_tokens_seen": 46758336, + "step": 14855 + }, + { + "epoch": 0.9511554958069266, + "eval_loss": 0.35565948486328125, + "eval_runtime": 50.6621, + "eval_samples_per_second": 274.09, + "eval_steps_per_second": 34.266, + "num_input_tokens_seen": 46767552, + "step": 14858 + }, + { + "epoch": 0.9512835285833173, + "grad_norm": 22.38384246826172, + "learning_rate": 1.4463816430076215e-08, + "loss": 0.2999, + "num_input_tokens_seen": 46773312, + "step": 14860 + }, + { + "epoch": 0.9516036105242942, + "grad_norm": 38.47566604614258, + "learning_rate": 1.4275076733606395e-08, + "loss": 0.3573, + "num_input_tokens_seen": 46787968, + "step": 14865 + }, + { + "epoch": 0.9519236924652711, + "grad_norm": 26.21702766418457, + "learning_rate": 1.4087567726277061e-08, + "loss": 0.2955, + "num_input_tokens_seen": 46803712, + "step": 14870 + }, + { + "epoch": 0.952243774406248, + "grad_norm": 29.341995239257812, + "learning_rate": 1.390128964219528e-08, + "loss": 0.2811, + "num_input_tokens_seen": 46820288, + "step": 14875 + }, + { + "epoch": 0.9525638563472248, + "grad_norm": 45.728111267089844, + "learning_rate": 1.3716242713931348e-08, + "loss": 0.3966, + "num_input_tokens_seen": 46835904, + "step": 14880 + }, + { + "epoch": 0.9528839382882017, + "grad_norm": 28.86192512512207, + "learning_rate": 1.3532427172518789e-08, + "loss": 0.3738, + "num_input_tokens_seen": 46851136, + "step": 14885 + }, + { + "epoch": 0.9532040202291787, + "grad_norm": 37.157073974609375, + "learning_rate": 1.3349843247453252e-08, + "loss": 0.3431, + "num_input_tokens_seen": 46867456, + "step": 14890 + }, + { + "epoch": 0.9535241021701556, + "grad_norm": 28.760547637939453, + "learning_rate": 1.3168491166692941e-08, + "loss": 0.2796, + "num_input_tokens_seen": 46882816, + "step": 14895 + }, + { + "epoch": 0.9538441841111325, + "grad_norm": 42.88594436645508, + "learning_rate": 1.2988371156658073e-08, + "loss": 0.4594, + "num_input_tokens_seen": 46898624, + "step": 14900 + }, + { + "epoch": 0.9541642660521094, + "grad_norm": 27.931549072265625, + "learning_rate": 1.2809483442230763e-08, + "loss": 0.2959, + "num_input_tokens_seen": 46914304, + "step": 14905 + }, + { + "epoch": 0.9544843479930862, + "grad_norm": 25.078886032104492, + "learning_rate": 1.2631828246754128e-08, + "loss": 0.373, + "num_input_tokens_seen": 46930368, + "step": 14910 + }, + { + "epoch": 0.9548044299340631, + "grad_norm": 45.43979263305664, + "learning_rate": 1.2455405792032969e-08, + "loss": 0.3678, + "num_input_tokens_seen": 46945792, + "step": 14915 + }, + { + "epoch": 0.95512451187504, + "grad_norm": 23.990598678588867, + "learning_rate": 1.2280216298332646e-08, + "loss": 0.3474, + "num_input_tokens_seen": 46962048, + "step": 14920 + }, + { + "epoch": 0.9554445938160169, + "grad_norm": 56.57452392578125, + "learning_rate": 1.2106259984379642e-08, + "loss": 0.4736, + "num_input_tokens_seen": 46976768, + "step": 14925 + }, + { + "epoch": 0.9557646757569938, + "grad_norm": 45.05714416503906, + "learning_rate": 1.1933537067359889e-08, + "loss": 0.4153, + "num_input_tokens_seen": 46991424, + "step": 14930 + }, + { + "epoch": 0.9560847576979706, + "grad_norm": 24.859119415283203, + "learning_rate": 1.1762047762920446e-08, + "loss": 0.3603, + "num_input_tokens_seen": 47006656, + "step": 14935 + }, + { + "epoch": 0.9564048396389476, + "grad_norm": 47.79610061645508, + "learning_rate": 1.1591792285167602e-08, + "loss": 0.3643, + "num_input_tokens_seen": 47021824, + "step": 14940 + }, + { + "epoch": 0.9567249215799245, + "grad_norm": 31.10300064086914, + "learning_rate": 1.1422770846667206e-08, + "loss": 0.3862, + "num_input_tokens_seen": 47037440, + "step": 14945 + }, + { + "epoch": 0.9570450035209014, + "grad_norm": 18.239160537719727, + "learning_rate": 1.1254983658444572e-08, + "loss": 0.303, + "num_input_tokens_seen": 47053760, + "step": 14950 + }, + { + "epoch": 0.9573650854618783, + "grad_norm": 45.93240737915039, + "learning_rate": 1.1088430929984017e-08, + "loss": 0.3218, + "num_input_tokens_seen": 47068928, + "step": 14955 + }, + { + "epoch": 0.9576851674028551, + "grad_norm": 35.72513961791992, + "learning_rate": 1.0923112869228645e-08, + "loss": 0.3807, + "num_input_tokens_seen": 47084672, + "step": 14960 + }, + { + "epoch": 0.958005249343832, + "grad_norm": 42.905418395996094, + "learning_rate": 1.0759029682579801e-08, + "loss": 0.3554, + "num_input_tokens_seen": 47101632, + "step": 14965 + }, + { + "epoch": 0.9583253312848089, + "grad_norm": 25.768041610717773, + "learning_rate": 1.0596181574897389e-08, + "loss": 0.3051, + "num_input_tokens_seen": 47116480, + "step": 14970 + }, + { + "epoch": 0.9586454132257858, + "grad_norm": 29.31188201904297, + "learning_rate": 1.0434568749499107e-08, + "loss": 0.3227, + "num_input_tokens_seen": 47132992, + "step": 14975 + }, + { + "epoch": 0.9589654951667627, + "grad_norm": 28.007709503173828, + "learning_rate": 1.027419140816066e-08, + "loss": 0.3077, + "num_input_tokens_seen": 47149056, + "step": 14980 + }, + { + "epoch": 0.9592855771077395, + "grad_norm": 26.973087310791016, + "learning_rate": 1.0115049751114768e-08, + "loss": 0.3029, + "num_input_tokens_seen": 47164864, + "step": 14985 + }, + { + "epoch": 0.9596056590487164, + "grad_norm": 19.246578216552734, + "learning_rate": 9.957143977051941e-09, + "loss": 0.3514, + "num_input_tokens_seen": 47180544, + "step": 14990 + }, + { + "epoch": 0.9599257409896934, + "grad_norm": 34.508419036865234, + "learning_rate": 9.800474283119142e-09, + "loss": 0.3879, + "num_input_tokens_seen": 47196608, + "step": 14995 + }, + { + "epoch": 0.9602458229306703, + "grad_norm": 26.967897415161133, + "learning_rate": 9.645040864920462e-09, + "loss": 0.3755, + "num_input_tokens_seen": 47213504, + "step": 15000 + }, + { + "epoch": 0.9605659048716472, + "grad_norm": 33.09022903442383, + "learning_rate": 9.490843916516334e-09, + "loss": 0.4015, + "num_input_tokens_seen": 47228288, + "step": 15005 + }, + { + "epoch": 0.960885986812624, + "grad_norm": 26.84346580505371, + "learning_rate": 9.337883630423316e-09, + "loss": 0.452, + "num_input_tokens_seen": 47243712, + "step": 15010 + }, + { + "epoch": 0.9612060687536009, + "grad_norm": 54.509681701660156, + "learning_rate": 9.186160197614423e-09, + "loss": 0.5173, + "num_input_tokens_seen": 47259904, + "step": 15015 + }, + { + "epoch": 0.9615261506945778, + "grad_norm": 35.004150390625, + "learning_rate": 9.035673807517795e-09, + "loss": 0.4795, + "num_input_tokens_seen": 47275072, + "step": 15020 + }, + { + "epoch": 0.9618462326355547, + "grad_norm": 44.16777420043945, + "learning_rate": 8.886424648017698e-09, + "loss": 0.2802, + "num_input_tokens_seen": 47290688, + "step": 15025 + }, + { + "epoch": 0.9621663145765316, + "grad_norm": 21.235763549804688, + "learning_rate": 8.738412905453408e-09, + "loss": 0.34, + "num_input_tokens_seen": 47306496, + "step": 15030 + }, + { + "epoch": 0.9624863965175084, + "grad_norm": 31.245132446289062, + "learning_rate": 8.591638764619324e-09, + "loss": 0.3524, + "num_input_tokens_seen": 47321280, + "step": 15035 + }, + { + "epoch": 0.9628064784584853, + "grad_norm": 51.93947219848633, + "learning_rate": 8.446102408764643e-09, + "loss": 0.3707, + "num_input_tokens_seen": 47337536, + "step": 15040 + }, + { + "epoch": 0.9631265603994623, + "grad_norm": 37.979652404785156, + "learning_rate": 8.301804019593129e-09, + "loss": 0.2796, + "num_input_tokens_seen": 47353024, + "step": 15045 + }, + { + "epoch": 0.9634466423404392, + "grad_norm": 33.141231536865234, + "learning_rate": 8.158743777263333e-09, + "loss": 0.3505, + "num_input_tokens_seen": 47369088, + "step": 15050 + }, + { + "epoch": 0.9637667242814161, + "grad_norm": 25.881275177001953, + "learning_rate": 8.016921860387272e-09, + "loss": 0.3566, + "num_input_tokens_seen": 47384320, + "step": 15055 + }, + { + "epoch": 0.964086806222393, + "grad_norm": 27.325672149658203, + "learning_rate": 7.876338446031416e-09, + "loss": 0.3949, + "num_input_tokens_seen": 47400896, + "step": 15060 + }, + { + "epoch": 0.9644068881633698, + "grad_norm": 40.98260498046875, + "learning_rate": 7.736993709716033e-09, + "loss": 0.3234, + "num_input_tokens_seen": 47416896, + "step": 15065 + }, + { + "epoch": 0.9647269701043467, + "grad_norm": 54.15633010864258, + "learning_rate": 7.59888782541418e-09, + "loss": 0.4736, + "num_input_tokens_seen": 47432320, + "step": 15070 + }, + { + "epoch": 0.9650470520453236, + "grad_norm": 17.9570255279541, + "learning_rate": 7.462020965553151e-09, + "loss": 0.2698, + "num_input_tokens_seen": 47448320, + "step": 15075 + }, + { + "epoch": 0.9653671339863005, + "grad_norm": 19.267284393310547, + "learning_rate": 7.32639330101259e-09, + "loss": 0.4844, + "num_input_tokens_seen": 47463488, + "step": 15080 + }, + { + "epoch": 0.9656872159272774, + "grad_norm": 52.61140823364258, + "learning_rate": 7.1920050011252675e-09, + "loss": 0.3884, + "num_input_tokens_seen": 47479104, + "step": 15085 + }, + { + "epoch": 0.9660072978682542, + "grad_norm": 37.068931579589844, + "learning_rate": 7.058856233676525e-09, + "loss": 0.3994, + "num_input_tokens_seen": 47496448, + "step": 15090 + }, + { + "epoch": 0.9663273798092311, + "grad_norm": 83.22144317626953, + "learning_rate": 6.926947164904162e-09, + "loss": 0.3758, + "num_input_tokens_seen": 47511936, + "step": 15095 + }, + { + "epoch": 0.9666474617502081, + "grad_norm": 27.578569412231445, + "learning_rate": 6.796277959498331e-09, + "loss": 0.4048, + "num_input_tokens_seen": 47528320, + "step": 15100 + }, + { + "epoch": 0.966967543691185, + "grad_norm": 26.990234375, + "learning_rate": 6.666848780600864e-09, + "loss": 0.2726, + "num_input_tokens_seen": 47543296, + "step": 15105 + }, + { + "epoch": 0.9672876256321619, + "grad_norm": 11.355256080627441, + "learning_rate": 6.538659789805834e-09, + "loss": 0.2706, + "num_input_tokens_seen": 47558656, + "step": 15110 + }, + { + "epoch": 0.9676077075731387, + "grad_norm": 33.00820541381836, + "learning_rate": 6.411711147158438e-09, + "loss": 0.3739, + "num_input_tokens_seen": 47574720, + "step": 15115 + }, + { + "epoch": 0.9679277895141156, + "grad_norm": 47.104095458984375, + "learning_rate": 6.286003011155783e-09, + "loss": 0.3126, + "num_input_tokens_seen": 47590272, + "step": 15120 + }, + { + "epoch": 0.9682478714550925, + "grad_norm": 33.4498405456543, + "learning_rate": 6.161535538745877e-09, + "loss": 0.4041, + "num_input_tokens_seen": 47605696, + "step": 15125 + }, + { + "epoch": 0.9685679533960694, + "grad_norm": 32.14289093017578, + "learning_rate": 6.0383088853277475e-09, + "loss": 0.3798, + "num_input_tokens_seen": 47621760, + "step": 15130 + }, + { + "epoch": 0.9688880353370463, + "grad_norm": 31.649654388427734, + "learning_rate": 5.916323204751439e-09, + "loss": 0.3175, + "num_input_tokens_seen": 47639296, + "step": 15135 + }, + { + "epoch": 0.9692081172780231, + "grad_norm": 28.495525360107422, + "learning_rate": 5.795578649317345e-09, + "loss": 0.2636, + "num_input_tokens_seen": 47654656, + "step": 15140 + }, + { + "epoch": 0.969528199219, + "grad_norm": 44.42762756347656, + "learning_rate": 5.676075369776656e-09, + "loss": 0.3059, + "num_input_tokens_seen": 47671168, + "step": 15145 + }, + { + "epoch": 0.9698482811599769, + "grad_norm": 25.46860694885254, + "learning_rate": 5.557813515330468e-09, + "loss": 0.3451, + "num_input_tokens_seen": 47686400, + "step": 15150 + }, + { + "epoch": 0.9701683631009539, + "grad_norm": 28.098102569580078, + "learning_rate": 5.440793233630115e-09, + "loss": 0.3484, + "num_input_tokens_seen": 47701760, + "step": 15155 + }, + { + "epoch": 0.9704884450419308, + "grad_norm": 39.411136627197266, + "learning_rate": 5.325014670776951e-09, + "loss": 0.3073, + "num_input_tokens_seen": 47717248, + "step": 15160 + }, + { + "epoch": 0.9708085269829076, + "grad_norm": 60.595664978027344, + "learning_rate": 5.21047797132157e-09, + "loss": 0.3607, + "num_input_tokens_seen": 47734336, + "step": 15165 + }, + { + "epoch": 0.9711286089238845, + "grad_norm": 26.46986961364746, + "learning_rate": 5.097183278264694e-09, + "loss": 0.3428, + "num_input_tokens_seen": 47750464, + "step": 15170 + }, + { + "epoch": 0.9714486908648614, + "grad_norm": 27.248794555664062, + "learning_rate": 4.985130733055954e-09, + "loss": 0.4272, + "num_input_tokens_seen": 47765824, + "step": 15175 + }, + { + "epoch": 0.9717687728058383, + "grad_norm": 28.56536293029785, + "learning_rate": 4.874320475594107e-09, + "loss": 0.381, + "num_input_tokens_seen": 47781760, + "step": 15180 + }, + { + "epoch": 0.9720888547468152, + "grad_norm": 17.6987361907959, + "learning_rate": 4.764752644227377e-09, + "loss": 0.292, + "num_input_tokens_seen": 47797312, + "step": 15185 + }, + { + "epoch": 0.972408936687792, + "grad_norm": 28.92839241027832, + "learning_rate": 4.656427375752336e-09, + "loss": 0.335, + "num_input_tokens_seen": 47813440, + "step": 15190 + }, + { + "epoch": 0.9727290186287689, + "grad_norm": 31.72882843017578, + "learning_rate": 4.549344805414246e-09, + "loss": 0.343, + "num_input_tokens_seen": 47829440, + "step": 15195 + }, + { + "epoch": 0.9730491005697458, + "grad_norm": 33.45530700683594, + "learning_rate": 4.443505066907049e-09, + "loss": 0.4009, + "num_input_tokens_seen": 47844608, + "step": 15200 + }, + { + "epoch": 0.9733691825107228, + "grad_norm": 38.720211029052734, + "learning_rate": 4.338908292372934e-09, + "loss": 0.2898, + "num_input_tokens_seen": 47860160, + "step": 15205 + }, + { + "epoch": 0.9736892644516997, + "grad_norm": 43.10197448730469, + "learning_rate": 4.235554612402214e-09, + "loss": 0.3906, + "num_input_tokens_seen": 47875648, + "step": 15210 + }, + { + "epoch": 0.9740093463926766, + "grad_norm": 45.98534393310547, + "learning_rate": 4.133444156033006e-09, + "loss": 0.3799, + "num_input_tokens_seen": 47892736, + "step": 15215 + }, + { + "epoch": 0.9743294283336534, + "grad_norm": 37.45781326293945, + "learning_rate": 4.032577050751551e-09, + "loss": 0.3319, + "num_input_tokens_seen": 47908992, + "step": 15220 + }, + { + "epoch": 0.9746495102746303, + "grad_norm": 29.421432495117188, + "learning_rate": 3.932953422491669e-09, + "loss": 0.3489, + "num_input_tokens_seen": 47924736, + "step": 15225 + }, + { + "epoch": 0.9749695922156072, + "grad_norm": 48.30408477783203, + "learning_rate": 3.8345733956345326e-09, + "loss": 0.2816, + "num_input_tokens_seen": 47941056, + "step": 15230 + }, + { + "epoch": 0.9752896741565841, + "grad_norm": 30.11746597290039, + "learning_rate": 3.737437093008777e-09, + "loss": 0.3635, + "num_input_tokens_seen": 47957824, + "step": 15235 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 42.55520248413086, + "learning_rate": 3.641544635890281e-09, + "loss": 0.4132, + "num_input_tokens_seen": 47973056, + "step": 15240 + }, + { + "epoch": 0.9759298380385378, + "grad_norm": 19.125173568725586, + "learning_rate": 3.546896144001832e-09, + "loss": 0.3959, + "num_input_tokens_seen": 47988928, + "step": 15245 + }, + { + "epoch": 0.9762499199795147, + "grad_norm": 50.54212951660156, + "learning_rate": 3.4534917355132364e-09, + "loss": 0.3935, + "num_input_tokens_seen": 48004032, + "step": 15250 + }, + { + "epoch": 0.9765700019204916, + "grad_norm": 33.77085494995117, + "learning_rate": 3.361331527040878e-09, + "loss": 0.4168, + "num_input_tokens_seen": 48020800, + "step": 15255 + }, + { + "epoch": 0.9768900838614686, + "grad_norm": 32.820064544677734, + "learning_rate": 3.270415633647938e-09, + "loss": 0.3997, + "num_input_tokens_seen": 48036800, + "step": 15260 + }, + { + "epoch": 0.9772101658024455, + "grad_norm": 23.62579345703125, + "learning_rate": 3.180744168843952e-09, + "loss": 0.2911, + "num_input_tokens_seen": 48051264, + "step": 15265 + }, + { + "epoch": 0.9775302477434223, + "grad_norm": 27.472536087036133, + "learning_rate": 3.0923172445849187e-09, + "loss": 0.226, + "num_input_tokens_seen": 48066176, + "step": 15270 + }, + { + "epoch": 0.9778503296843992, + "grad_norm": 36.02907943725586, + "learning_rate": 3.0051349712727493e-09, + "loss": 0.3135, + "num_input_tokens_seen": 48081984, + "step": 15275 + }, + { + "epoch": 0.9781704116253761, + "grad_norm": 30.320404052734375, + "learning_rate": 2.9191974577555954e-09, + "loss": 0.4143, + "num_input_tokens_seen": 48096896, + "step": 15280 + }, + { + "epoch": 0.978490493566353, + "grad_norm": 17.627288818359375, + "learning_rate": 2.8345048113274096e-09, + "loss": 0.2341, + "num_input_tokens_seen": 48112128, + "step": 15285 + }, + { + "epoch": 0.9788105755073299, + "grad_norm": 31.73265838623047, + "learning_rate": 2.751057137727941e-09, + "loss": 0.3353, + "num_input_tokens_seen": 48127616, + "step": 15290 + }, + { + "epoch": 0.9791306574483067, + "grad_norm": 66.26738739013672, + "learning_rate": 2.66885454114274e-09, + "loss": 0.3961, + "num_input_tokens_seen": 48142144, + "step": 15295 + }, + { + "epoch": 0.9794507393892836, + "grad_norm": 62.21368408203125, + "learning_rate": 2.5878971242025983e-09, + "loss": 0.3685, + "num_input_tokens_seen": 48158272, + "step": 15300 + }, + { + "epoch": 0.9797708213302605, + "grad_norm": 26.044448852539062, + "learning_rate": 2.5081849879837746e-09, + "loss": 0.3216, + "num_input_tokens_seen": 48173120, + "step": 15305 + }, + { + "epoch": 0.9800909032712375, + "grad_norm": 20.764760971069336, + "learning_rate": 2.429718232007771e-09, + "loss": 0.3423, + "num_input_tokens_seen": 48188672, + "step": 15310 + }, + { + "epoch": 0.9804109852122144, + "grad_norm": 26.268352508544922, + "learning_rate": 2.3524969542414453e-09, + "loss": 0.2693, + "num_input_tokens_seen": 48204480, + "step": 15315 + }, + { + "epoch": 0.9807310671531912, + "grad_norm": 15.829779624938965, + "learning_rate": 2.2765212510963418e-09, + "loss": 0.3537, + "num_input_tokens_seen": 48219584, + "step": 15320 + }, + { + "epoch": 0.9810511490941681, + "grad_norm": 32.62509536743164, + "learning_rate": 2.2017912174289164e-09, + "loss": 0.2813, + "num_input_tokens_seen": 48235904, + "step": 15325 + }, + { + "epoch": 0.981371231035145, + "grad_norm": 31.979473114013672, + "learning_rate": 2.128306946540648e-09, + "loss": 0.3963, + "num_input_tokens_seen": 48252992, + "step": 15330 + }, + { + "epoch": 0.9816913129761219, + "grad_norm": 31.331867218017578, + "learning_rate": 2.0560685301774792e-09, + "loss": 0.3328, + "num_input_tokens_seen": 48267840, + "step": 15335 + }, + { + "epoch": 0.9820113949170988, + "grad_norm": 21.032350540161133, + "learning_rate": 1.985076058529933e-09, + "loss": 0.3753, + "num_input_tokens_seen": 48282688, + "step": 15340 + }, + { + "epoch": 0.9823314768580756, + "grad_norm": 37.26376724243164, + "learning_rate": 1.9153296202328863e-09, + "loss": 0.478, + "num_input_tokens_seen": 48300096, + "step": 15345 + }, + { + "epoch": 0.9826515587990525, + "grad_norm": 34.11316680908203, + "learning_rate": 1.8468293023656823e-09, + "loss": 0.3943, + "num_input_tokens_seen": 48315136, + "step": 15350 + }, + { + "epoch": 0.9829716407400294, + "grad_norm": 18.752748489379883, + "learning_rate": 1.7795751904515766e-09, + "loss": 0.4025, + "num_input_tokens_seen": 48330240, + "step": 15355 + }, + { + "epoch": 0.9832917226810063, + "grad_norm": 58.279869079589844, + "learning_rate": 1.7135673684584019e-09, + "loss": 0.3109, + "num_input_tokens_seen": 48345280, + "step": 15360 + }, + { + "epoch": 0.9836118046219833, + "grad_norm": 34.251678466796875, + "learning_rate": 1.6488059187974579e-09, + "loss": 0.403, + "num_input_tokens_seen": 48361792, + "step": 15365 + }, + { + "epoch": 0.9839318865629602, + "grad_norm": 30.479703903198242, + "learning_rate": 1.5852909223242894e-09, + "loss": 0.4034, + "num_input_tokens_seen": 48377408, + "step": 15370 + }, + { + "epoch": 0.984251968503937, + "grad_norm": 16.166257858276367, + "learning_rate": 1.5230224583380192e-09, + "loss": 0.3679, + "num_input_tokens_seen": 48392896, + "step": 15375 + }, + { + "epoch": 0.9845720504449139, + "grad_norm": 40.26905822753906, + "learning_rate": 1.4620006045816813e-09, + "loss": 0.4625, + "num_input_tokens_seen": 48407552, + "step": 15380 + }, + { + "epoch": 0.9848921323858908, + "grad_norm": 16.73556137084961, + "learning_rate": 1.4022254372417774e-09, + "loss": 0.2809, + "num_input_tokens_seen": 48424320, + "step": 15385 + }, + { + "epoch": 0.9852122143268677, + "grad_norm": 35.438411712646484, + "learning_rate": 1.3436970309481655e-09, + "loss": 0.5055, + "num_input_tokens_seen": 48441984, + "step": 15390 + }, + { + "epoch": 0.9855322962678446, + "grad_norm": 16.894039154052734, + "learning_rate": 1.2864154587742815e-09, + "loss": 0.333, + "num_input_tokens_seen": 48456832, + "step": 15395 + }, + { + "epoch": 0.9858523782088214, + "grad_norm": 33.132869720458984, + "learning_rate": 1.2303807922370292e-09, + "loss": 0.3719, + "num_input_tokens_seen": 48472512, + "step": 15400 + }, + { + "epoch": 0.9861724601497983, + "grad_norm": 62.29145431518555, + "learning_rate": 1.1755931012961128e-09, + "loss": 0.3169, + "num_input_tokens_seen": 48488832, + "step": 15405 + }, + { + "epoch": 0.9864925420907752, + "grad_norm": 19.01352310180664, + "learning_rate": 1.122052454354705e-09, + "loss": 0.3615, + "num_input_tokens_seen": 48503936, + "step": 15410 + }, + { + "epoch": 0.9868126240317522, + "grad_norm": 21.924360275268555, + "learning_rate": 1.0697589182590005e-09, + "loss": 0.4383, + "num_input_tokens_seen": 48519040, + "step": 15415 + }, + { + "epoch": 0.9871327059727291, + "grad_norm": 27.970205307006836, + "learning_rate": 1.018712558297996e-09, + "loss": 0.6, + "num_input_tokens_seen": 48535040, + "step": 15420 + }, + { + "epoch": 0.9874527879137059, + "grad_norm": 38.23207473754883, + "learning_rate": 9.689134382037113e-10, + "loss": 0.4438, + "num_input_tokens_seen": 48551808, + "step": 15425 + }, + { + "epoch": 0.9877728698546828, + "grad_norm": 37.27165985107422, + "learning_rate": 9.203616201508557e-10, + "loss": 0.3976, + "num_input_tokens_seen": 48566592, + "step": 15430 + }, + { + "epoch": 0.9880929517956597, + "grad_norm": 48.10826110839844, + "learning_rate": 8.730571647570517e-10, + "loss": 0.3103, + "num_input_tokens_seen": 48582720, + "step": 15435 + }, + { + "epoch": 0.9884130337366366, + "grad_norm": 50.18999099731445, + "learning_rate": 8.270001310825003e-10, + "loss": 0.4765, + "num_input_tokens_seen": 48599104, + "step": 15440 + }, + { + "epoch": 0.9887331156776135, + "grad_norm": 13.126496315002441, + "learning_rate": 7.821905766297599e-10, + "loss": 0.3114, + "num_input_tokens_seen": 48615040, + "step": 15445 + }, + { + "epoch": 0.9890531976185903, + "grad_norm": 27.947267532348633, + "learning_rate": 7.386285573441897e-10, + "loss": 0.3971, + "num_input_tokens_seen": 48630976, + "step": 15450 + }, + { + "epoch": 0.9893732795595672, + "grad_norm": 27.045764923095703, + "learning_rate": 6.963141276136175e-10, + "loss": 0.283, + "num_input_tokens_seen": 48646080, + "step": 15455 + }, + { + "epoch": 0.9896933615005441, + "grad_norm": 19.698862075805664, + "learning_rate": 6.552473402678949e-10, + "loss": 0.2476, + "num_input_tokens_seen": 48662528, + "step": 15460 + }, + { + "epoch": 0.990013443441521, + "grad_norm": 46.163021087646484, + "learning_rate": 6.154282465794524e-10, + "loss": 0.3244, + "num_input_tokens_seen": 48680000, + "step": 15465 + }, + { + "epoch": 0.990333525382498, + "grad_norm": 30.393213272094727, + "learning_rate": 5.768568962629672e-10, + "loss": 0.4256, + "num_input_tokens_seen": 48696256, + "step": 15470 + }, + { + "epoch": 0.9906536073234748, + "grad_norm": 43.717071533203125, + "learning_rate": 5.395333374751398e-10, + "loss": 0.3062, + "num_input_tokens_seen": 48711168, + "step": 15475 + }, + { + "epoch": 0.9909736892644517, + "grad_norm": 45.56324005126953, + "learning_rate": 5.034576168149174e-10, + "loss": 0.5477, + "num_input_tokens_seen": 48726848, + "step": 15480 + }, + { + "epoch": 0.9912937712054286, + "grad_norm": 48.832122802734375, + "learning_rate": 4.686297793231597e-10, + "loss": 0.4838, + "num_input_tokens_seen": 48743232, + "step": 15485 + }, + { + "epoch": 0.9916138531464055, + "grad_norm": 26.313810348510742, + "learning_rate": 4.350498684829729e-10, + "loss": 0.4541, + "num_input_tokens_seen": 48758080, + "step": 15490 + }, + { + "epoch": 0.9919339350873824, + "grad_norm": 42.293983459472656, + "learning_rate": 4.0271792621926483e-10, + "loss": 0.3123, + "num_input_tokens_seen": 48773120, + "step": 15495 + }, + { + "epoch": 0.9922540170283592, + "grad_norm": 14.059409141540527, + "learning_rate": 3.716339928987455e-10, + "loss": 0.3749, + "num_input_tokens_seen": 48789056, + "step": 15500 + }, + { + "epoch": 0.9925740989693361, + "grad_norm": 67.56918334960938, + "learning_rate": 3.41798107330149e-10, + "loss": 0.4189, + "num_input_tokens_seen": 48804288, + "step": 15505 + }, + { + "epoch": 0.992894180910313, + "grad_norm": 35.841514587402344, + "learning_rate": 3.1321030676390027e-10, + "loss": 0.3683, + "num_input_tokens_seen": 48818816, + "step": 15510 + }, + { + "epoch": 0.9932142628512899, + "grad_norm": 25.882349014282227, + "learning_rate": 2.8587062689222617e-10, + "loss": 0.291, + "num_input_tokens_seen": 48835520, + "step": 15515 + }, + { + "epoch": 0.9935343447922668, + "grad_norm": 29.71622085571289, + "learning_rate": 2.5977910184904473e-10, + "loss": 0.3139, + "num_input_tokens_seen": 48851328, + "step": 15520 + }, + { + "epoch": 0.9938544267332438, + "grad_norm": 35.663578033447266, + "learning_rate": 2.3493576420985373e-10, + "loss": 0.3466, + "num_input_tokens_seen": 48866304, + "step": 15525 + }, + { + "epoch": 0.9941745086742206, + "grad_norm": 15.850737571716309, + "learning_rate": 2.11340644991842e-10, + "loss": 0.3311, + "num_input_tokens_seen": 48882752, + "step": 15530 + }, + { + "epoch": 0.9944945906151975, + "grad_norm": 39.47983169555664, + "learning_rate": 1.8899377365388936e-10, + "loss": 0.3046, + "num_input_tokens_seen": 48898304, + "step": 15535 + }, + { + "epoch": 0.9948146725561744, + "grad_norm": 19.749874114990234, + "learning_rate": 1.6789517809634447e-10, + "loss": 0.4284, + "num_input_tokens_seen": 48914048, + "step": 15540 + }, + { + "epoch": 0.9951347544971513, + "grad_norm": 66.90821075439453, + "learning_rate": 1.480448846609139e-10, + "loss": 0.3291, + "num_input_tokens_seen": 48930176, + "step": 15545 + }, + { + "epoch": 0.9954548364381282, + "grad_norm": 25.663530349731445, + "learning_rate": 1.294429181311063e-10, + "loss": 0.3522, + "num_input_tokens_seen": 48945920, + "step": 15550 + }, + { + "epoch": 0.995774918379105, + "grad_norm": 23.315927505493164, + "learning_rate": 1.1208930173145503e-10, + "loss": 0.4063, + "num_input_tokens_seen": 48960832, + "step": 15555 + }, + { + "epoch": 0.9960950003200819, + "grad_norm": 21.742311477661133, + "learning_rate": 9.598405712840651e-11, + "loss": 0.3278, + "num_input_tokens_seen": 48977280, + "step": 15560 + }, + { + "epoch": 0.9964150822610588, + "grad_norm": 21.519027709960938, + "learning_rate": 8.1127204429432e-11, + "loss": 0.3526, + "num_input_tokens_seen": 48992512, + "step": 15565 + }, + { + "epoch": 0.9967351642020357, + "grad_norm": 32.181678771972656, + "learning_rate": 6.751876218336061e-11, + "loss": 0.351, + "num_input_tokens_seen": 49008128, + "step": 15570 + }, + { + "epoch": 0.9970552461430127, + "grad_norm": 24.76947593688965, + "learning_rate": 5.515874738071247e-11, + "loss": 0.3451, + "num_input_tokens_seen": 49024512, + "step": 15575 + }, + { + "epoch": 0.9973753280839895, + "grad_norm": 49.30158615112305, + "learning_rate": 4.404717545303249e-11, + "loss": 0.3131, + "num_input_tokens_seen": 49040128, + "step": 15580 + }, + { + "epoch": 0.9976954100249664, + "grad_norm": 15.697863578796387, + "learning_rate": 3.418406027322352e-11, + "loss": 0.3111, + "num_input_tokens_seen": 49055360, + "step": 15585 + }, + { + "epoch": 0.9980154919659433, + "grad_norm": 33.91520309448242, + "learning_rate": 2.5569414155546254e-11, + "loss": 0.3576, + "num_input_tokens_seen": 49071360, + "step": 15590 + }, + { + "epoch": 0.9983355739069202, + "grad_norm": 49.540733337402344, + "learning_rate": 1.8203247855397287e-11, + "loss": 0.2698, + "num_input_tokens_seen": 49086144, + "step": 15595 + }, + { + "epoch": 0.9986556558478971, + "grad_norm": 34.655052185058594, + "learning_rate": 1.2085570569642101e-11, + "loss": 0.3915, + "num_input_tokens_seen": 49101312, + "step": 15600 + }, + { + "epoch": 0.9989757377888739, + "grad_norm": 57.687381744384766, + "learning_rate": 7.216389936171019e-12, + "loss": 0.3151, + "num_input_tokens_seen": 49116672, + "step": 15605 + }, + { + "epoch": 0.9992958197298508, + "grad_norm": 17.42046546936035, + "learning_rate": 3.5957120342322567e-12, + "loss": 0.1751, + "num_input_tokens_seen": 49132288, + "step": 15610 + }, + { + "epoch": 0.9996159016708277, + "grad_norm": 17.059843063354492, + "learning_rate": 1.2235413842098807e-12, + "loss": 0.3884, + "num_input_tokens_seen": 49148096, + "step": 15615 + }, + { + "epoch": 0.9999359836118046, + "grad_norm": 20.215747833251953, + "learning_rate": 9.98809480678986e-14, + "loss": 0.2485, + "num_input_tokens_seen": 49163840, + "step": 15620 + }, + { + "epoch": 1.0, + "num_input_tokens_seen": 49166912, + "step": 15621, + "total_flos": 2.8707953551107686e+17, + "train_loss": 0.44674425404505724, + "train_runtime": 6032.4024, + "train_samples_per_second": 20.716, + "train_steps_per_second": 2.59 + } + ], + "logging_steps": 5, + "max_steps": 15621, + "num_input_tokens_seen": 49166912, + "num_train_epochs": 1, + "save_steps": 782, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.8707953551107686e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..18c78b1 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c315498741d2e87b19c14083378862cee244281352727fb9b52d64435c1878f +size 6289 diff --git a/training_eval_loss.png b/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..54343f1e980b300a1fe72063d03798c736bd9ae3 GIT binary patch literal 39979 zcmeFZ^;cG17X|u&ii(6mC=w#l2+|D-0!j)}(jeX4rJ^VZh_sY|ba%H%gLHREch_Bq z_q*}S9pnB1_Z@@rsyxp*`|Q2;T64`g_xbWlS_~VL2or@uVZVIwTn>do_e7!4Ca+<@ zzp!_W&cgqAY(C07isb{E*BbZeXnayWtIGBgYG@XJp7s^I!s)~H?B>0P!cU7< z&=WJ(DkEo4&S?C+ioO*sRtXM-NN{s6aLaTD@j1j;k%h$5UqA>B!93q$LaNlun}$U?&?j}ygS+Hk+CPKwqI3QD_dq9E-?DgW%!7N zfVVGVO!ZAr@sA}stIzb#Ag(#Sy8+W{&2&0{4RTJ|9I3~`4<)zw(D*% z`i%%FXp^iwXi#V+qxV&djfdNzyYxGWUQxgji$a%Zfb7Up8oyi^3|(Bv9X=S6Yi}rsDsH5xZyl@ zOL_qw(_g4x$jBJLG!&SR{kTIdy|Grlz6E#BH|QiqzjP^E*?WJ&BQ~1Xp*!C|rn|eF zc(hnAM(R7k|Yx=ZEMSBQL*_4|56R-xZ?+igb`Q9PgwWrem?u) zxIgCLbT}6V4~xXnw{JIljoG7l?9#?>?%T#|xVXjU<>zO|U%q^q z3Q^LS+H54t?w*k2Hy`DK31*ILN1bfcO<|*fh_emdAeVA53%oaKv;4)Z9Sq_%y^n8 zr*7r`H)KgwO|5NZDF2JU|DXE$ON4}k7+7~}j&_$bkAs7QCs$Y7XPbjsLt>+&{c5$e z`ZM12n~8{s@SUG*7Z~?pR%*y7E8h(eJmZ3YRB&fn-CfSE?#@)@#KO2l!egs-xHZ?m zK)|HYmLi*gDw+RblCiEj^!K->$Ek>^DMM9N6&EjWPoACx?7l3zXpfD_THB@GFG%fi52tFt4((2Ac8&vCj zg4uXQLwgriT)oS{=tD74=aYyCU zr~8H@MVakIg(d@X^76kS2F;2f4cM$#?oZFmd?+vHy+bAWfPx}H#g5;8 z=j3-Q$BHe;K44(i8M1Y!DbR;LR>i1UuRfPvAFs5sw@-qpIDy^7(HP5P=X1PX9b05J zG7Z-Z2*6Dc^-Jqw!}JpshGmL~h)^1JfUGy?j)zUmb<@Ja0=As)z+qioUD{4P9XoqS zSAy8g+L}Ln6PJJI=Esj8?MYk~iEHw>m94C-Abnq#k&%HZ59f1YIXpb%Xw+CMx2*q! z-A_8*97L^JT@6c(C4Y7=Ek#<7?#D!AL_{-10H4tF=a-!;WB&eqMI{;j)Yy0lfBBWY z>(DMS8CgJVEE&pXzAbl6Q!(si5Y^PuQp@H{W9jCf`&KqKZ%+<3Q75p0z7nzf5Ij~< z9tjX5b@F=u9zF6%k*!fGQI6T|$hPYI%muQWmqvCnOksvf;S!5S=@zU47Ml)rQyqNh-giIPn8L%yw}PZg7cFxP zpJIke$wq|zYIv}utc=BGUJ|11O|dx%WLPR*-bf`)w?gOawA()MPfQ1&^k-{!jyX(7 z6`PMKS30u?@bBF<=!}_b=bX4KzECjgy4`NO*!_`E)AbhYG&Qv-sY{iHT@R7G13{gs zNqW4u`gWwqY}BfeB&0v?=TA?lwRZ>!8zGL2hVm#UT(_}%<1v|_I#v; z_0B(8WO>S$^|N7ZnDyK4LS7P}l8$O>B=bO$f^wlT{(zSxIs}-ZP=Wno)9K70*8-cVqySQj9mV<*V5E9`RBfcE+oCL_c{HcPW~|JNs8-P zY`GH5RJn7TC))o_zP_xyyq9$H+p#h`EE1mJfBy_)JkFdT*^_ykFb`b zYk<-z(hPnoEfIWfV7HYcr+xyzLM~U$|i_zo*l2V+OH@JoS)c`x*upG z>$FxfgQMZLt03x6+`QP63_J3YR--?O=}_KBSoYgQMB?!1$Ur|_#hst5{b0ZUWNvOw z_SLJWii!lVtthHbwr;%r^EI{GgfS>GQo`Jv5mJ61&zU$3R0^`f_I&$Hr{MW)w8J<# z1;q!L{a>PbMW#dFownz5O$HbRPBw&3PEVcYLX0q@;+yTCmertR z1Ad{lS!g<3@MW$w%;x-be+CK+Akq5O;t6RXA#?!&0R?AG0Z1vBH*Q4m*wHyxk|KGQ znCoe^`{}jo*Qc7PV}Ca^d_*F4s@{7WzQ+O^t9<;mrlw~3@F{(t!TH%~KgSBd;u>9+ zO;}~6Ln4}>prHCYGJLg=V9W;Ce0?dYsLt(Agjk~NXd=*WQQ&u({ zl*^mNdwSB+}^&(&c)HO#JWd{Q$1JGWyypk zA~-(&VWb&^llar8p4Kq;Z;CTDDpk;eW8IIM^(XFIRLFk$`nBK;$&L!W#)Vt3_W{#b zP6=Ik#bGgS>M*u)ia&Q)xE>1Ia+OK^>Qg(<&WR}

>2 zy?=k>ICj@zbC-6?!h^Fws<&S7`|>3>bp~o%ydfJAB`a%B(s)?fzirjMs>f<2fAux< zhcdiAeWHWd&mB60JViqnBF=3!?bW^qWsC+w-nFz$R7`B7;_UovzZ&x=0tpq;meAbUG<^>}|>sX)c7bk$+pDY<<@QWoz@%7&fVNpaSa%W(st zrgAZffU7bZUZF}RlB@bx^H(P?4q6JgOy5^1A;Hx`83V03iXuZ@+yIhpFzpvv7*gwOpD4wQeywU~#X@sDM`^Vj^ zCRil=r&bOQO5cNm%woy`Fa~!-^NzUoGZ6S9U=dm6ezU!bfJf$z0lguuDKyOx)cax^Drk8SUBGS&Jvyr8HM*yB9C6LCq=_{m{|T(SP3A z+)NH=2<;LEk_BL!rW31cbhL!f!+FX*&RBWx>A2&HImQJ35)IbRh7U1gP6<$aUktVLN zLd3lGUwPL`J`@zNEgy%5g*Dm@s5e5`r|$|#g+Hm)mwgTZQx-2#EkGCk!0mR#MisQ^ zqbD8B)2IJWuf`KCyj>suqBo`((wVK1&BR&xG*gk){lwAzWK+!A+B#!I5Q@ps-l{}v z=;O?hv$JCh0Pu9g;z+O6z8uS8Mx>G%8bYt6n3|=>@0zZZ&B*Dhqz^rVG6Mlr2s4}2 zs~8yFWp*Z!M)(B29~IIS6`T$R#*ad+?Cc8nIxayYL;%HYxY$Al2ADb0dUie?7wOyi zT24*~YH+4XVO>v8&wTlmI8xD|cd7qQDRGy_mPS860{XgR&5bMfIn8yUQ}{~CKYZji zZHUXa;n`kI=DP6=6}g<4h7`d_4_v4!$Q%{min$Uh?%YfBv8X-`%~CHPb`1#4H4xCW z%U%HR5i(-GX%`k2CT46*=XSg&B8z9gKJM1Q@>WOZvyV^SS-C_QV@Y|rl9Tf7Nr3Cc z_p~LT6$fl`aq}SwDJjawkNu#|!*2QmB@XR)yeAEUxxdkrBj0(~_!BOfv@gNqj1hoj zGNz`cA)4y|*PV`6i=B4+l+xL*j_Tw-f!SwaWkohkNM|}JyK!*)NBhYdG}x{0yuGik zt*sp#9&Q4kapUJ`ROAnU&3Lu7wf%7cEPBEp^{_|6ncw~!$R^t#wZTP0Lqnhe)T!Zl z;p5+TWG0uE#IrRj4adsZk!ez|{{i?U9=6@we7c&7cUn$z6-pwMAr2lB`W#nR*NyG% zw+%k{9P3Ekg(qHs_A93YU$x2=G65wA+9ZGoF3U-jySqC_0HAJE;xk-4ZU(cisEg;+N_r zCdz@2`W7D_Z>+vg=s{Z}_)bz;S-EtTz_%gGMolFtER4{zeY7mi)m>Ofh%Ty$PrL3i zCMM^a;D$V{TJ*>P>y<%fJdrqLKF&UQumyNALa{-i7~f1`%!=!rokjDE1S}1yu2$HZKJ-pJ^gvT zu0=q2j|alSkDyt8!MWeK5DHW1NX)lyHNRd4ZS3v+>FdK66cmJtdUpXFA{KNXO{#!!+k$p(qm`-piR*!7)!C8B zaG{C8_SIr#2HUmK2q20Q>Ucuf`SyA<8KLw8PWS`&Y6g<%_flVa>2g-NBd}VBfcYT- zWrl2PoX`{IX!K-P9T_Lm5~RHjYZ-5BDH`da(Oeu%b9ZNQV`{Pe_xBz29RkNItkC?{ z7{t1Lf`$aT7LMagwbJk8!k^?#=Cl=-j*gbIS)p@uKbX4ee!8m!EuMBXJpmDsPlL|0 zM7d2Ev-|<6`pLd(sWl}Ka0(jjntl64R37Khj(vkl9}b%<98lWi)YLoiL><6!{o7Fj zZe?l-1@5Outx2bMxvd}VkGn951&|h|2l7BR=Nz@XL`q7Up;nqVin|MFxIHOZ7aCa( z-#fih}Dix%aU5Bb7hs<6;6Bo%DE z&jA6ABO~`692_EN=G!9bUDhibJ|hC6mcX|Sl(2yuTnXLepdM!5ArkaIDH2}Mr z-~M?5T_^y>($%7ogZ=9B_SZ7}3Px$38yl9;aC$fBki#7OQWLsl6VMS@EVS(3q zhsKkAVIpM78ZxvE{A{m#oJT_RzXy_I8%QmyPX7Ht^#EYDLnQPpjri$abLdX3t!X3< zjNjzm+UAAy1VmOd@O+2UzDy;HNs$@+G#i~Q_0WzfIi5g7Uyy3&9qsLNCx_epJC77H zO%^($7e`B3I|YszfJTbbtafKmEBO|t=^l}iLIWvEuQjwkW}+oSIX7O-2hXK<&S54` zuQi;@>LKj2GCNgm?YMT#aiI61V!pK*4RJ=bDB2A7Lbx>R=3%D7?AXb+*MQe@g3X;Eclvygk`1_89}OdEiyx1nyyFS-)u zAKrUKP^(Ft3yt_T(kLBY4zmF=T8AUj3D{t>qG~|hu;opRzwGSnST~05_6K9%)A(U3 zpWM?GCwvu8q;!QiA3B-9-@l(na@$h{eStNZSz0n)MbE}`SyJtCsWcnmC^8>YSyL`l zX$N-Z5SW$&U=5igBj{Yd$L00Wd~*cETxNQj5`EiB9c`X0NrQSRj@v z|N1i{?eJf}d1b#CrY$NeTB`Y}FVnkOH}8$RCa|=y(-9)1tE(%FpM!_z2dpO2u3FpJ zwDNlx{RsKg78pBWHS;Gjg4$EI@0moKkFgK-_OGe@P!LT4;eqBuO#D=PTRD>g+tV)$DDJJS=- zzwhqsXjsU8>E8=$6zo#zx$A5-TiPJvWk8-OWB)J4s<_V*2dId%nZ1b;kmetO-=WxVG{ z(J_(VCqPpO|73!?YWeRkIdCBAi39e4vA2M93vmG~1W|`ub-cz4#g$EOVz%zm{6 zHbV=PyqbDkLDSV}r_Bb!p%NXV;S?bswqb zG5jtE2!+~E{u+`s!tZ>-ArS{s2?%)>a&q!0ewR=f9+1%H`;QBjA}Iu`0Ul!AWAU=6 zI>eBX>9qE1^TDV8H&I<}hcl9h&ygW+rw*hmwCxvS zmr)N!Q1fpPO#nvZc7!3C%Uqb|d}?ycLji#pzzYbC11;tu06~z-=<@a3b>><^0H8$1 zymaY<3<<uU>$HtC%_73pMJ+W zFVTiFys3k$XI7fcGzJ&|oiqxq7BP=)6V-lfdOCf*H*U2K35WTpA+{$-mrbp$`BdLX zxh#K5G0_F@{ldA=@fCnVj@_~%f+K*H^EYlc$qVD3_z0=g(b+ih z3j`XE!&+M~?d#89zdnNO0lJ^{eA~UfwXqw6`!PK6V?d(eq$_6XK=(N6aems5$^*&) zHd*J|KwrFwFXn;_#6&%y??Kmbp!+&NUIPYh3JU8Cz^-EMB#Viv{&Bf-wK4`rpblD~ zhGv3>L=TbH0?%MBrC@1Ahb#8$9dKC9&CRl;PP4Ztxww?AtgYo#ip?`Cj+W9n78(1q z)YC_umovpJEpvcx2!=jdZ?wd!1=2z0F_K7Hpw#1&k&%gH575ITx4^6=>FVjJ=W4us z`LYEhF?@jSBFR0p&{JnwHdj`*=B=h?H|WR$=1BMQ_NI5P)L0y^EJw6erK2hp1_DY3 zhK#xbZasueQ_^q}Ssctwh#y$SXJTTaWYCk}~wae4V*ohNtkfEJzmVUi^aNxs;6;S6?D2*|Yb zjEsyhMZVox>b!IBu9Cj;C+3c)5jS#9h3cDIH{e5Jq-A_GuDExKMuW7WyAwb>_ODYY zDJfBMa42#EurFPXXVG*&OuT*PP69*{jY_8C=GK;+j*gD0+(t01#0~4sWd?~&`L6o$ z^^?5ivUo8OT2)ZqQ@#>&%K^##x}u^Y7yuSM^f&g=>3J+nQtQ@>6TRc>bJG_tSyhRXE_3l#qB0I&>D~1R9=b>Fe0o3Ltx= z{7OpFD8dOVj{yZK9<)Dtp6HJG(+DoB`043sxhdwWxTNwhP&{~?hJgX42o^mb_K?wlq-F`9;-BA@y`6qf8`B1tP7CMpC#5I2Mr5a%HUrI?;PDkO&+98Y< zq$LCB$NDc9ygH&UfXqS+DXD9es7se+YG$Us!fCq%(8O{%k5KPtwa&U*M4coHA8*TaJQTgW(v6dMCA zEp3>T!q1*XXi2knod;c{UgG|ofd)VcvO@sq0h=mp&vjhf1>n>3vYxnsp3_44 z*&Nj3dJBhvL8#(~dDY4gVp-uT4Y!u2f7}+jCVLEO!4Rkodi%hXXAEQZi%!C7fYD?v z@0g3{+*DzFSv2>WE;JqpA@K1AVRyNM1z>^4RfqHUVTZi0uJ$NBg>AD4>Gns!2?UAF8X@FuOwH)CI6`IKL?VbNi2tZ6Dzlh~5=!smaXnGLFj2*sY@lgbSuw*%XQ7(rnZHi& z`~Ca3yyF0tC#Y%5|`KiChJ$ zDx_loh|SATmJ!hyTH5c>g{)PdyNip9XY3EJj+Jxz5;7rDd6b|>3~;nhv}#bmNUn?L zdv?OjM=PFzPfUnSI$0)`ijA$HDqWU|=!CciIGt?#DSkiIk%af%mV<43YK}ah^bwmDjm3uA;Sp1wq7@0D5vF2Z#M1 z6_UvPdIO=O0r?%+pNyT*P$~5f>u5W>yCMl)--3g6^Z6k>yANHSJ$nZF>k!sHB7i~Q zL4NF7w4L+c+}ZgBa1BW~Duk690l>i)kByC~qd!GFZ-GV~J;g*PH9!DUK(_!+1(o7A z3_D`_hGd z&G07A3j{W1vthQat*wmo_tcW%TKO+@K~Y0c#9v`tE0BD_msC4iYU4Ed?rO#{ufdPAg=;4LNA$Hm?goNI= z)5YZEJb{|nj+hC;7}XIOEzX-1%uF#kYv3NZ2c)$a5WavEk-i*Q=hxQt#qv@CXTmzJEtBo)Hkb z`BHnX+_&LQUWP@mR=LUyVCm6jBZ5>}_be(p8onxlY$0b5=J2#7*ps{-5)K^Jo0nLR=h`7AK-K=wMq zY9JH>6pi9Fo5h=!An(HFnFB7SuD#t4WUmcSvO1H@W%xEPn++G#8nS^x*KE_tF9wLp ztcoPw=Ml=(vnb|;frAj$#5%X?gu*SM&-#tJMZK*FX`~(}R#m5astA$>@MGAQhM$|8 z3o)m<@3hrSec0)7Rv#zq{jWPwqLf9$X$~LJu@M;-1V%}Fdk%=eQ~b_TVA*=0;{#&8 z9T*5i-vzDx^R5#oCnw@k0`~;Mr@tvMyanxseu!p53O~!`QIy|5(zfFAjvQX{pn}nd zu4=ox>Xfj!1d{qUG|%mlG&=ap#AsCf+)i1*!c_r95%CJZaL{uAwjsL1`RT-Y7dYxr z8m^lnS=DDp;;q2Z3&`;A;(^@=H3{vg9`qYZeCLownqX+S!Fzj^z)2uvDC8RS4NziL z_Whs}gv+@?RpbK}+`dvk_W0M;9JHmO?#P=m1`L=C6OSuBA(Tu>U=x4;ej93*!Pxxl z>~kRQxorP^vr`Qq<)?d0K~L}d>(?Xa-6g%Tq%OnH{{B_dhfHfy@~QHdF)+SIMVXEj z*)Gb#x5c!y9sv2xcf6AS=+UFW)1&9BL8LVpFS7goQny6v@pf*lcCbp`yZL59;Wn(+ zvvgV3GBLx-=#efpH8o-ls#oR;IpT?RI?#Jk@g4V79h>Rw@?dpv;n$L4SKs< zeH8qj*uB^!=(6dPmO40%->sJbS6u5dHDq6Mc9Iv7m&XH32Y9?xi$j8gbKW-M@Yw#- zQo0BHB|to7LVpwlIgrT;iDhcWP^kdZZbLZ+4j&g_e>+$sjpdUp`L3hxr@RnU8pbF= zW5HtZx>*+rhA&9tGr5hVZ~M{|KrQ?R?oCAXgC#L@;Kmg=c!0PRfL)ygQU{^Ab#--t z2S`&d2J>bR(CElh01$Iph;~49gf#V-6q4I0j#pJ#JY~fj=#$4|2F$9#&RKzwk3P)` zo*&{OZZObssg>02@PQJ`(5NJWE}*;>*+X|Z&53~nLy98!hX7Cd)@r5LSAN32{|B;P z9T5GQk-(<-AOBa5y8&FPcW5RJrlz^fWJ4^joU+c;wu^I5 zc1X5xC#02jb!}~OYMGc{zyq>lI{(1*Z4iQ%^d9uk;GB_jQ%V+P+%L{--z(1rn1&&q zD0l0h_D_ivYShG9T5Q2(BvWO*PL@Vm5`R+u769!LAmzaoXZ?BCQr6SHs8FG4U7BA` zTS$8%gHk+=exVVV>Q4U%YSK&D^gd*701eO$48$Jret<|s;r7er#_^#qiNZ138Wm*1 zi*E@k@za)yWE}AZjwCEo^S^`1OQL3AXw#?6H55Q%e%q=Gh^~J`^Jq0&V;74l^!Y~+ zDhG{~%gB<|aFIZ)-F z)cf_aCw5e9B#vjyt;9;tR)=U2mi-HWLj_rB(p7B-Rj^|tlC5wjEUNV^|IMx%{Y8S- zhlRF`5v49L#+No`A1K(qD(-^z5$;8vCV^gqA)P2UHKW3V#g|h^h7w%k7t3vyi|s=Y z%?b;U{h;WrZCDdBX?#siPOfh?V}yHMc2$?~epm1I^`La|Whedoegf@(H=8Eg+}s3; za&KkmHKa!SZ>XGG8+P>ADAq0Cd*hGqdd^v!a3O<|54!B>=NTXYIoST)m&o!^3{MkW z%Ag2MwToNJ1^}251&$c3gMS(uy~D%rBBfk73$isOSd}21BS}h<>JH*YgK`0S(H3~{ zMuLky&fT*r_f$-rVD%|EIR}m^8g!<>eDH{k4G;Wj|0~+Nhj0V*zm+ZyU=IgpP!a7p zXgCx!G}oY?CUu-dLm*ivK0ZFeA_xfy{g1YQB2znm(BMnh6EAWD9Ic2A^YP>MXY0D# zkI=Lzv@E+PjcF=PVq?=|_MWZbMLs?$Sa;CIocezP$&|Hz<;qv>=F42q%O9OmIG^ z2B=rN^EU>N89>91;3jyqe8)|lw1`-SQh+_SgPhcqH{wq#Jx*8UN9u{O ziEPDAdas*OZ~pIn4n)?Luas?gWtqm3^+tp8SDsAu1#3g+Msh@1x$KakiDV;rZiOUDZMLHb0|D`m}ws+vy$ z+DpUhpwsgjyx2dH?2l|r!1(@vCG=+2oGH>vg?O3?v zmYG)Hr&~lbeRRUeaQS8XV;$vUtzNpp=x@zmf=B6zX(92XE)EycgEff+_!@n%zcZ*5 z07ZqhyiLMuUjVolS_qH;hmI`#L_dK06{sT=@N=I5YN&h=^aQL7JqcpV8yXry?Emhk zro1Fj9vh*^XDLsBVvl?bQ1fyrXc+-Rrbz&iAb+cn0VBc-Pg>lc4GlDh-ri!*g3khe zNd9{ArxXVP!YiKcS|mZ{k1fdBDkeAnw#d3)UYeoi=p z0n9Yw1VCJ?(8;V<9cQWN6+-&~Q5*!?#p;#O3MW?M!XJp&cx?>?peK=>7W$yp_s<}Z z3)UU=cTt>>;Ya&CU39t2{uV~2T!fhp~tWR3cCZ64Ps>ma|kw~#sUA%1Km@mqLXMsAg~5O zF)`BM!?_2_EE1Z?B-_P;%BCt6C_-6K{~J?)1QiFS}~I7bTGZV(lRzwU=}&y-=nibG!=;3d(6_ zXWDi6b^NVBR8HV#WzNk{qRXdtK{te1RHl3VwX~W%Fb7 z2XzA<|5-9IV4mbGsJSUhAr;xr3(DKqY?C4JNu6bWGK=!G0>TZ|H`svYgs$;ke3{aV zk>gDhBO98f&4QLK5xe&t8!pR^stO4xyl3RsnSYQTMry<$mw~ujuH&oey-{C7DoU-x zb8so|?sF*K@hb#+U~u16yEA2|=g}i!AxtgK?teUX?LMG7IcuL6t&gc%vTaAq#)RLL z3I>wxMa`wgt^PHBR75EB8F7gLD-SRlE12rJV(M)F55I_bn%RW~uroucHz+;6;VTEz zB$T*~vdQ50M12@Onp%ygU@s0A^iQ;w;yph~ht7`eA@F*r{s*tvu)+A;U!6%KgI*(= zVoLr(-`z;aFj~b|-WvFHq3D+RFiizj6hv4>HhJxhaWpAt_?#MExB+ z?zepD555}08i5fz6*}nr;ucn{EIh#ts?&$NTDRX^p>*VGt+-0=l@kWx`STgueLwy9 z!S`2geE$5oM(fF6V4Q$|a34BlWo1QL`)in(LfHcd2Lh*X-XyTx|DLB&DLvt@SJNv(Ne`kV+HVMrc;+z3@As?{X@gO2YBcRBmnE?J~NNh1c5kQ6b1M03a zbYP$c^)cqhrP83FEg7{B86}cN);}y+j_6r@6zJ_e&`=@U5XC@nLt9&$4@@WjF;+MQP{HdiPC(aM9S1PuzcE$CtetGk8=de=+ z?+S;2J@#YQuXZX-NVvwOckY$P8E)`JI$wZA zS&a|4sMoJw!{H!A`7p)3cI^Q(b8g*R=_syR;Buctwj+$zLX3+)F#6F5D1fU2XNC*^ zEmMH7@F_d%sMi>OoRRR;I~yO<82WPd(XBCqM|a8)EZSlOe3>1X>LBGEPMn{KK^G5v z%T$CxjP_LBM|Qe>L%>?QC)B;y~O5I+u zzKcET$$o(PJ1{^9=St+{zPImzfmr+wAI6m{T@VeCb0@1K#nfPO2TI#GHnF_}D`BVD; zD`kb8TtPv9HHnawYl6UI0bA>YsO7+c!LcZWvgQNE(mrhHZZM^nBHoK?-Ma@%FUa2W zW6k3IPd>*_Af zb?|LnQ!le6fAq);Ifns)LERe5{TpP2{7xc=zh?1nO`7MJE2S%7t&`Td-1pCS9k^${ zbXz{45D&SNa5v2sibEcoJmBxtZ&Ul|mPfx^Bc7R^n8x@@bWqOwO?0Iwn7SPCEPu;I z5@-~=*Is!QyAQdREcb*s!FyD}mP4XRIJLfSU4TnRpWS~;_!N4fCZ~ILDGGoJY#7C_ zsn75EB9#j!&=Yb+jA&+EYa$z&ma(~2>4L@D?XN+i4HE|W+~31r5A@wZ#&rpB`5i41 zjE={GSSzdXGe3&XJa;ax5FdQmQ|jp(p??LK=ubUIvM{gw*9Ggg2OuLHg?nEP9=pv* zQGL0G@_HOvmeGQDa@Ut%D-QwEW-c#Xl!|vyDz000nJ_QWNngYMIbd9pDDebpNpt2D zvdBQk)lkKol4^-3*4HGazf*!f(>yS>V~O%21jH_WdzUNoeMk*ztd8%?j$Pc(-)&6C zodbIR%|H#*isx$Plutax6Z7`aTb|?~W2=C%J$Eb*hWKL0mFM}*I}Nxble(KLC{Q!( zf6x68o?AQUX4(6ZIy5gwo*wG%tDa=D{g%k2!E4syIXyPnNizS z`DBZAS?iFolsyU=A2E#2tK?rH*K)5i-iyxc3c{R~e#?vAGIjc?ERrFm!eI8l2DEf)To5EwX zQ5CuKDZjjsJ@Vt$byVctOi{n&&6YzH8fi%qK2PS4RqCE0V(JT;>bVU$%+~CJz9#DK z>{J^`r3z}}&ku_38wd@{b_d4ztUshC>?H)}nMpiM)=f&_m9ZSr;NJ`-jx^%vKo9p*G@jxj{EdyNJisC*}I zUTR!?C>3afFL~5tTKCA5_=Ao_QHk{LA1nQ1zu0{b5drQuw<^wkH4qPI07IO?Im@pf=R`h_mw`~XdhRZIPvoS(K&~35a%Y`Mk+vAAiSwk^9^ruI|w+qqQ|L{gM0V z^;fTeh#gm5Y52xLwI6j8ErXTef2&mtlZ`RT^vH`t^+6@(^*APblj55XPt}^*n5Jgh zH~lk_i6mcweA({R`EjH!aZZs5)tjQgN8E$qDdG|JfL`+gBYgfNj$Wf*H&E#8aJh7_ zJ^F*;{Yg>jxG(1;wso=!7egX}D2}jvj!IEXcbq-yF7+fc^QWfBcCP$#F@Ksce|PkR zYn(*pi70KW;)EZNydQ0T%(7W4x_AYNbRr5^tLxN$L^3n*Kkt@EhUg5}07t%7X{p7CcUId$wT&WwJw3ZuTN*L$T zxqElAZ12n#d-o?~-#Y9@lK%xpoah}CoV^ML-^13)c4t1E_5wW>&gn2f?;S#lbWh;0 z%M5s(+%nN(+VqW_Y!@D~EGY~bE8u(1Aews?;}uB7Eq zRn8H}`ZISiU*>E+s2m=^TlHYJ0Sn@NIBBc{hcJ+n0$`U`-bc3|XRmjfmLNu5>jXh%m! z@m+>5#KrT5VuFL8Yia#d*8^#@3&_IhX}a}PpRZqk4-6D{yTyQODlacDBLK*ozW8NEANi> zypW_H9s-oA*2J`YaP#J+7d?_jU3;GZ6*te8Dc|=Z*R(KTa$N@(v5%D5C z{Bu|sV(dZ=YC`m*es&oS!U=LXoIwsJff4GUWC}yWd69C**{!nTPucWx8U=Lp1r<7V z6@EChHuJk?gO-mzuJ1?9otda~wM_Jp&mje>Zd2YTzL$YqjrIR#BK?fHxEQ&^pr`}L zxYHBoYW{nv%%EBG#GdAhY8eKrhnjS)*-{-;rVOjAP7v3u-JbYXn{$YW{MWWnWM zPg2`aiLb9j4SG=dww?T#xL+a}97|tc?hT-YDB?fN!UltSE9;)+<;eY3Rtw~ZHXOBE zd^5xShP`y|flo#oCR=`(4u{6xG1Ttl1VM{n<1d zKGJcqLy?=$V@VS^7w%w8{yjAH9wWsOSGw$SFZ$rvlw(_w)LyytBFEpmj=G{fF8a{b zj|S6}Kk!qtsnv_pccd5^WMlZA(C5E#d(idYQpW2)LgAryuI>Lx>G`H=s8f7NX-#8< z(UbO)3i`$%rweyb;CfCTGS9O7AS9z~RoE*jB)s1TP|jHkq}OQc2@;I>n`_7D68>=9 zXdMgYO7XtHo_!Q(KXWj*ER&=>=}{xnzj#fjK1rwRpOvhY+$&g-ZkjFZ6C;+(LN-^L z82VG~1l4eAbBxD_4f^(f47f5zpIA_2`>&waT)i)ob7q-W-$H;aQy*{}padEpJwN;@ z_smsA;JT52WIpGw*EjDRdtG|6z08k8qK|yi>l7Yq?x1XG@#ZW}jsJlyrVmM2{d(&C zksjv3=&vWYNsp`p>J^DE8c+AzC?8BaoXwinf6oJ(>xzPmaOejne`(i}C{3je=|6RERM9BudLR{-$#Zqm7^AYx?gP;Zz2R9Y1i0 zsZP9vI#;*zQortBPKflmz~rI*OvwX!Fv`w2?}p?Gf6(0Dy!C&5?<31EYva^EUA|dyp*mlepFVzJ8CjOpB688HIygp*RsKe{{E_dXcnzyZcJZ$?HXbeKA z4{v2rlHNlJb;cJcn%it8Tx*U&Jx=0)FHMABO)9C=9P0SrfIsu#pxX6W;Rq5rh48#R z`;{?O$7$og-<5Rj4_qrd;z;@2-nyXXv-sNFui~;JVb}g96T=gflqbj=7Ony2xr8$2IStXoT=@+IdgmXmLCXl5NHV53*j3Co$MkW*3rGN+wVkf2q!^&qF z2g4ztOy?)MNZ>?I=W)mO++GjPV@(JRLw{p-6c*Up!K(tSKXT#+ETgzj-Q5L<{E|3i z9xr_p&5W1%g=>r-CCxOcYaAooTAlWhFrcC6idKirRW94{TnF-t1~O6w8wJ908yt{b z>`lEx7vck$3k8~kC=iIo+r9DrPNo0;;Z38}Xl^df;ztmxoCTu=Vwo?C!G_*pvgfXF zMY1<{Y9DF?4%APA4NVtbQ2{2rdzBivo}vlJar}(-*S^kw$(u;Xlt?J`KbleZ5c6ot zfNAt7CF^p@GIz}P#uYLg*kj10N@Z#7_|gSfS?_=uy^K#U6TIJuHkOgWFgi8{4j*hG zd+qD#d(=94G&`R+;-x$zg)dRA25|tsy+;fTSf1=A{W1fVpgBHg*O7?8_bqS4 zdUX>1smkH2S32|WIvse~cE7@|>LTtnMuo!}L~NAD*`WdQLJ2tXQ-j#vK;*i(QmQ-OfC=Vb^E%h}N51ki^#T5J}swmD16E0u)J>37a=lMf_a& zIfxsn+oUJB^k3o5SDH$RrnqhH6~2(Qzar#k<{0BdJtQHRRiiOA(9QDvH{+@4uy~;1 zf(E!Y3EC?)oRYop)_{_ToGpXM#D40PeS`mD1&Zr?cxzFRKQ{Vt@t)ohd*VP>(8St8 z4R@&~Lhfh>KsdoWQKXO;L;;jj&ez8Va|;SlY(YG#hR`JX%zPf3Z;IMdmdr|`fu~IDcHqeB+&-y&F=9=$8^q@$TMJGcDj4_?=WHJ zALeSoye)|myIQL0o~^~c?#RxyH9u+9^lCHMxokS!?gQ4i4{8wO2-wZxcp5ns)hD16 zE#Uy(ut27v7C{~jt zmEm^YL9{GyEFJo-&3HXPPZn-*w-6?V6nz=?yTSg>Jdmc|!GQeY7r*V5hq|C}NqByP zsYc!WSlwZ)q4vY~^32jL>#kcpKmQ;iNH67m7Q z8m+q|=o@T2vad|1iiMkh@jUSJ&6ln}Ut4*XogP#Fg}d!YKfB&S4B;Z!^b3hl0&v6@ zA(5fwWrY_}z%kk1U>t1)+g}azd;dpe?*Y#B{{M}Cs3MnUTKWkK#0L$7E8c>03OiKQ5L$>cKuWX)9TR(StE4@mcu_ShEG3@%y~1+-+RAe zOaCOL;b3{vj+Xt|Z%lA7UF|`Xyd~*vLR`e|m2J$-xpt$<<7GZ%KnZy5p! z4j={qgzoBGD+^yoxmri(1WFVr09NmQPERLd_#Vl3p^bK5Pz*Zfo91laSl+;_jWHoc zW61XHPs`U&UNMjwR$S@)w&Ysq62)J0rx^Fb8~I|Vse9-TTNt>tRsg(@!u*d~4=X)= z1vowjBne0{ft4oq8_ocNkO#$sdh`Gg#<>Iqo3-C0n?y)Cr9mVmAYe86aYP*7YadRt zHr}OzAHz*yX4R=9xx>yZTvwu2xsvs8aufZFu&Lv-WsaX)G z&&+ja(os+x6q&X7K||E9!F%SV^974z%uTb^p!yuH{{Y$&{GY*BK0kzcx~gjt!kcw5|`yFV&Pa>r_W z=H7sLQyr>(;B?ZaT|j$*x)o-NN4kM{-MBgLnsOxcEuhx2k3Z91@L1P%{5OIt|DX%x zAF6uiZ)vGfVsPw_G6m-N}b?ya%^-v&kx^G7evoR`glJlfrc^5;%WQZ$~o(0{h^7FQ+hd_hU$&;4=0^7^Y-2$OK zWLGA7)%Qa}ytRLylY9u(5X3d5GiR#n>j`iVAlI@u>7FkWcu}Hl0dfumBF6g5WkAE? zjXb2?mi3X$`lwl(rIc#%(Q0G$uO080gFd~gQHXS%{-nN3x#7z2s$7c4%P_W-g?-hn zU9a7< zz>%C0Lq-?*EADy@y4~;DtPV5Xeh|gCGG1i`B$u*J@ck*auYRej*1%EFpS_>gpC3Xo zg4Uv~{e4-rcTZb|3WH=&hU^7k0Hdj|{qH&^+1O36pZa!sZL8|*J%bXtwY;0`6i%W0j4;u6W`mpiQ@<1^9VtP|N1*X~YIRVY3@ z_oLAub+@`A@xUHEZ6_TP5C{fnZU`z2ID~+vJ3`04Yky@&jqB7vT&mE~xOjsM+uM$` zMsFN4$ec$?%DY^6t1dbGX+_^eqK3)(bwVE%nHiW3qeSYCjweJjXOXiLrHydAVCOCM z)l8D5RJ?+@wQBx5mtVyns~tBZ#tpt|atw{S__fxYxcj+u^>r0cqq;*7RUuKa8{#++ z{&9w0L^P)L=xDlF#cPXPmZ~h|?hqx#At|X|S1$wLZ7n`U0tTB4N*#{khdp+$tw0CrBiFNf2Qg0uX}YlpBQ<=v(Gr)wjj5;Um&P8loNu+js5?PHYkZi2!K{ zN}v@4fcSE-lsYnIa`X#cTm799j?ajshs`~`_CUabbw}+7K3O}%aLtPFj$X<2AheBzvN;-e!|l#@GzdQtW`$M_bS8oL>SD^)RSGO>HS*HJ_toB?z z5zD?2Wwh7tIc7)!ekS0oJUsq5+jA|ugaQJL$;+O)#mP$g|FAo*YIs_g#jL%7LGz7P z>ZPUp46U`!KP?>$nU}ZRaT#)2E&IGjYs1BJGMvcl+I+>M*FEK-A~2|d09#7EX;x%wpHH(0w`UiX#+^>RJ%2Po%m4MsMw;F0uD4t4K0l91Kfa6}Z+rv#Q`J?1 zR)vq9sAEMO96N|ffySN*r`-bNWSvL`fn5~^__WH>y5|7TFu8dwk}FS%C^-esO_T4n zGpFdi^`Bi=>Iz889=u9rr?Km^rY%1ST^dQLJgJWS1;yl0Uk<#Ke&VID4+sIEKUlu@ zTL46s$G~p_#R!3$o5e@->4$MlY_`^k;XG7QQT^s|dp(0D1G))^t$r^KKO!rYEZri_ zFRuLL%k`#xUV^JtR_F;0hs+YqqQk1sq`V=kL}uVVvjIAhNf;b~Wgkz-Pfbrxe-bx| z3pJn*(z)U(h$=wUNPXjiV2kLxw}VplUA|nU1*=EdtfWuo)ZIL_fisKBMBP${yoZKX zDiWE>veL3%J((f&O~F+p$X|fE<-b(EaP14Z`3nDghEDd*I5aBrZR1f%tT@dvcz$xD zC7JG{S)d6pdRGy#-QiyJEgUM9(kRJ((Az-7ub{`4n!pd?B8SmzC%G?;r9$#A4fUaq z9)9?zxmHZc)IgEtJ+AimWu8@~K@Dsjal;D>!|6=%2%;{t1&?)SKT$+XNZzVBe&Pex zn2!{S7hM1sk&J_{OpLoMmcU#*2)?im_G6{>eb3B~yKZ>Jy&JH+ay%n$Je^73T_QtZ zs;@atMZh%lMPSriqyBBaPUEv`L?f{6CE^BHt|nmp*J3(hV{7}udJ2e5q#x;x7pPUa zh(%i(!f7*`)5bEov2+*SRHNR_MXQlL@hqQcMav?}o3033Hzzdc7AsB)5L^6XyE)1E zODzKh?JiC05|D&e$tK6kKzBG!V z`-R#)uf+yGi)X$zQJw0vP}!Skq9M!8+1D-G<12ZScyf<%RYy(UZ6!dPL`4G?OQ2kd z+?3!w0bAKVlIHuA31i%Wm!8`llEui^Yz*&J92naoQvxw*1iN!C@mpsXO8?|Pbp ztu>ZoZxFy?3s3bK1HN+IL-Plw%HL)hx}*u`GBNhdsZ^C|ef0=zRX1 zHf5pMK=>whxmnJMg7w~qIhaFu4i?dGW(r>w&FYlHH89Crp`cpeU|Lvic zrf~Xmqf`qL_e1OT0-fj+B7y-9US11G@v<;bEZ>V9<;RoX5u|K|FXW6GV7hRZcj~fi z)!(&?DY}+VNY(bF_tW#o%Xtj6e{DZ-P(UgxYFWCsZvK}&7OVi>RCS>~-?z`n$PUBU z|GuQBX+SQnd~Pv3#Lk!SE6=@+cl(DoSuODu9AVM>%6prfN}9-RrYe-;x3?&(Z^UsM zN5oCJ53>ICt!C{=4t-q1M_BGo3mT~!w&rYQ%%wUYsgTt^{pM>XhWaetbVuR`R-2qu zRSquDbq}Z#$gAkT4d3k|--$gIdSOQD209pYZ}SE-1baW2wLKRPsqI{| z;u1^!oJfpX3ZvG011@B;z)Ab#5K>r zGJyD3e~R}5YAoxEzaA#QdR}{@D?{JiI)=#i(?VzVec|NSJoAsc&KNaNdWAmV zJK%%))&tl<_$wfrAO7UH3Q(2tNM!b)l2A=beZRjmCN{5PTXv%USZ=x^=>GZ&>ebB0 ze;!FKac}o+gpXdg>L&gCGXh!h6xk5A_~W>teW7T7e}6EK3^O*3koh2S*Xe(K>?!dv zhM7a+*Uw6L#ftC(OZ;m{^V>FNm%M4g7w+0JOA3o28kQrITwXIDuPNr#ex}FsfBEOf zKwAbgq~Hs%Rcezx`B^2}GI zl7zE@zXj+&y8^0$b$t0993Snf8F+*uTZ6bsy2bYUK@$D~RKF<{=Lx(y#CL%B7jV;s zN-!a)0?m(Y(lVgBfTMHG;}4oc)q{N(e?{-HwU3^ds`2tAU`mm-eLZ;(>+W|>3k#2Z z3+@Z=*w8b-%Jgb=++yTltm>2+qA=RcL!30gp`HRV#n33 z4-ZPdvSd5Q*YDKzQwYaT%ss#snEq-yL73rr_kX)i1W^hgk0R)59OyL#M&j4GU-nwj$BSLJjem@1#>h_FLr7p1L<}7Ej zAeyD@SGWF8ZaXS{b>{|r=dbd$Wf|nUYGy?zZf_n$WA+kq9fIGm0wy~ouD9kEeb$Hk zAqU8SR;;oj>ycET%RW&iZNvX?-RSp+9jT~iri(uwjP9Wpc)SIx`ugn3SuEV-BhQFm zk3S47NIJAT3n9c!&|sl(7(OWqU1$&x3DVdOY4@WtYb(M~I5oumZkv?B`H8=t2WMwz zYrAfcRnr?Mi5C01kY_L0Gbu9KeV2AL+$rPfnk}OzD+SckrIxqdE~XQLC$}L*KC1?N zg@=a+0P}mfy128?o;t;G=umdgDi|~Sz!!qX(uNH6K*&epUI(GpEBCD37AYQ8)0Q1O z?qp>NfISJOFIq77!^2HHVx{@EjLKKF;B}(wOTt*;^~?j^l3O5J>30X2b1`&ygiBt(4<8vTnp+3HPwb;kCg&)&4t9>nE;Sr__Ij$MEi#Ww+ zzt!@8%{NCJ7#VR|3sDn9f3f=NcnSG@3;^+9Ts?s--_LLDfe?5Kglrb~x%kZ$M3-pO zCPKduEoew}8rzk^0K(hB4LaIb-Gkto?K^ZR1f1Gg$TPLV831&&@t^sd2Cr)5PgfeN z`=5Sqg3=CW-6MU4l3=!W&bzd)CwmsUl$BGNZP1c&Hgn#mun=NKhs)V262N-Q8xWIe zFkGOiXi1=%!PESQhffY+uYwXu2td8^SSIw|!QvhSUyU2$0#F)WSEg^EBXGiByj8d| zaH9by(}p9vPjd-A11sHW^ZdZ**t)`$3^w_XYJ$sL7cpuQA>*xyZ^%iKMPCq}Eg|9I zTve}=j=#{{dZZ@VmisXYV}t=m&tD}w(skdIj1M^@spg@=M17!aM&n|dBF;b-Mzivr zs&t9;N#FjexEa5gRT!)O`mIETIrT;U%jhgzmVJAt#dp!frgz_rjaSEMWcVXR& zySgG9-d?cn+L3UhE-A4iy-56>ACfTM@Jl&7rMZ*auVqQyPVxq3ss5{vU+$Lp_u@Tq zaTs(c>#wBo zD(YKbsZ`;kp_(xqxC*&`^;gbEB`&7LB1QZ1o#O@7DFmARzpot^#19$|8x%Lnse6vu zxm?`k`7!+>;LNdpnHE8*sv60ced!w?=Jhj@KKkn(OWH3EAV9*~Rvsa(59t@>6rCA{ zN43XC`3N%6L2CLyt97;1YiHl_e{olTG*?cVq~H_Iv}p8^X84*Xl%cipx=D-YX_6+V zE!mxv^u*4Fh@rH-CXp%rccukfI4eu^$)8s;3eT9iRCA4n>KVSU4nF;UII_G+XWeVO zq;s@pZ$p%CJXI44R@ta3W>(Hcdd^VfEq4L=K4p_>?$jBk0CKDXqOUZbo)tE`&y<-F zWaP2C?hVX8E?TtnR(jmc`{{70Kl&hJE_GL2$m=D}{U2TY0J?Aqf?EcB?^V77e9!#-J(4{ncq^cZY*Rcl<$5mp&)Qe#)_WV;DphHuZYf8Kq1}$ zF%WR(%02Gk>E6lQtq~6t0t+0eM{+t}`nc}ct$&VPZixHuJo+j>Mocdg&g^5CV)H&m zC*{H6as8v&uJfL@?|u7T{rpkpfiIs)d(xo_q2z@3U#1s#C5ou5OsFvSPl{WdRkU+j zi|CO#N+o)tohcXRF#o^TO^~oNFC4Dlp_%fcPg>xz*)@a0;J{ux+Tgt-*|TaCq(}Pi zosU{9HVG(px^keC$h*o)er|oOKRi~s z$+Uh(t$}I!db?nDzQDuJEl~wO==YtUC0(7Ede)x-GIyfxg^06U@24XWUD&CB(DOA}8I zU!CZG7ins3er|csVhyx1hC#I(_QsS5?^v{yxNqIKqm#2Tur|<^;-fmg%y`=a zB1J_!*sN?$kv~%f#=QFRP1GOX<~Z9))?A_H-FEjL?NIa1PXkldHqKIL2uA*W!?MRb zbM9okZMC$eSStKxQC2WzOBK+F1OSvU#yp~JxvV|P%l%|{_T9It=bcWhB#?hRZY4zt z?P<9bsEx1sXS>KgfGE$p(HC7_FGWlZcVuYG2U3@LIE2e&pSjajnQ}QTwux6R`DQ#r zU*3;Ao)qDkwEcSddNj_5A-OU(Q%72-=D6!^~tcAmnFy1PyU7o+Ow z(Aw=Jh6dTou(8<>ITD|T-q+ri?v%)^?COTkd@~0%nH7~)%}&Kc&65G7Bk@oy z2&~)V|9&#K@Qbcvb?Rdt`Y)FfQEtZa*KLZ5(5ddW2%cGdxxRsE16mG4IaWl=dV$7u z=F%ta4}pnY^YdSY--;exCSh^?bxLrErDped0xl!rS}+jlNAUr~a4~sqR;}X5F@x{i zoLPme=1|hQ2ecl{@vfta@$e(L>pCF)MM2mw_H%`5A!yoR@S)rC^w&8nM8oHuUh$m; zM_vpn+3xPp-pIa-INRiMjlp@MpS;}#dsxD!&Ef|p4I5o3BR zA+>NZ?(BE5I4#A)D3<}~B<`2MpeQ&KN2+(#s9oo-$lG1#uhU<}uByzw=kDqxK;BCe ztJF9&zfdqe67%@MgWhSB4lGA;s?w8x-TwV}0uNgouaZ)e>F+er8evKvtu)zfF?sbCF21-cHNPW*6hzc zjraW4Sw!dJ(mWky)UO%Z&=>Wokw_kHAHxIoDfCHIRkVD2Y^CRCR09>d!6r>(XAXup zP_N(k!)7F`#C4{UZ}$g_cC0Gd?>alRa;DsOf7FG(1o38N)he_!Vz^VR5_gg)n@n;C zuL}8cg*Q>FJ-c}3TSw?N;icC7)o~bqvHmh*rt`{QesQ7>jo=Pco3j3=p0@J>?TT?A zCAKyO^`N|jM(Egg4QVfySTlIr_u-^*v2O!~if7fGXhmFaqqYgFiSS>r)XHm4fWdBD4G zow{#)@qfrFI63&xJA8FlC)4@dLL>ra?Z79T2=SZU7JRy%@AKHyeD2aQMv~qMDQBJ( zNPf&ICZBJ2%=e{NTBi9ckB}9)<|5Axc*7}IeL)i96&9Rl%`s)wt*?3=BA7iT4JV{5 znZLftd#&2jCbls{fx^`1>Dj+?hfVBQf90QB0Xw5ShV%|?p~O~wG34_6U#iaTTU=?n z@MT*h|7Np6E8}k!N(u^oMm8;N=DogGGymy?$v(J)+#>6j{Xr_gZn4DzQ&mkna&W00_6ATw8g&#znE`I~IG=6(4D6x;y zDq~Nu$LvH+)0Hv6^gnnu|D$hHlEg#cg`hYAJu$*+urud!zKR`Mz#~ug?_MqA?b#Ae z1>hOM>=yIoYUdYz*A%Xy;CN|hsKW-C)piFK+SvLlOUxxjlXEogMN&67!n$r`p$>ci z`iu>P1{-3?8Q44E=N-i zqd>#8x|lC|Qk1@kU$c_*tZ~~$%^s7WAiTZ@ymQz2?{&aKQG3@Iu>;~mNZu0$GL@Gh zVGQT6FAy2~3=RA}U^hUVkCkWQH{{tyFk~}Ld-ZA&(Sb_H z8PnuVD@s0@;Glh&J^nqk47Pk3)O|eIy<)V*z*?r!&ntGoU41NA3%yG^gN~RP4Zoqi z1<_k%KV4&i6vAJ!hv?3{DL$K_zGcgnDvYZSNKImdSl!>BKq&w9*a5u?zpv($GoWe% zMy6*L4T-hCrs?!a>37BX1Si&grT>2K^~tk2b(M8(l1Et24tMLB;!Hawt1Zo)o2+fp zk9}G$<4Ci%yUSO}fLUnvwDVxV)bgv^P zCy+wEZwa%iwdL|P0yno!Isw>DzM&SO`uLJID0srpQ+b;Oq#%benL*p8<#5|AQ00_O zzu1YFd85;h)r*%Fte_`ey>rBBF90B!%hZv;T-=>xcmw$Z&7}^rzF%`Z`_BI;91Sb~ zx^uV+0H#zxjQC#3qFMKm$CdPqci%ogLVz0jhlAteY5_obxZQ4nc ze)nW!M(<4`mzlWLV{`Z_vWhbbWIDmAIp9QpWIGAaYh)$kGP4ud0B;s(O{*^7iav6b z3Yl2MkXetU*Op*`?$fp}8(f%@#LTx!d82;?hbAakTfW@K#l^6FyQcLT8YWvn#NjLG zG3vdN^ME$_YQ8o!G>xSAp;(L^^-!n;0yUSU&^UB0>j}KDQB?wkIL;4uy*c}9gYw{- zrV9T!snoO&W6;G0+4rVQFF09OdVl)3?A$af^vS`G0rl^>&1P>wXr1nFSNvr*C5Hax zW&)7%S1$eHof9vT1O1>R%KyjS6CE~J-v2;1`2r^=r#EZc+|1P$2K2k#g~&Z_5|LfF(?_td=Yr{Py@@6pu>tqBl$E3`yEmg4um2h zSA49sp%fZ$gjXN-+c5~bIKva{7XY}>*gb?LMktZOI~C4(8iO-%8GNC&Ve|C616k96 zjrwt&j`#pyPr{TA(6qM@))U;G>0ZLECBN6_DIFqGY#q-YcBzCs>H~M`ks~vFGuhkF z?eCY{YC?qPgoU`#@QZ$8VXCBKqx5v~Woc4n7{q0)%C?CjKGxA6s`$Slq4e@;8__L{ zMJFvqIqnqvGzlp~D7`pt@Ir!P06s(>oo$jwh{R$IH_;0dmr8*4Ov~&krKWl#RM0?_ zZk%=G?s=PBX{Z9S#pw=1%+V`QHW19!-=?|SXUk>oOl+^XaH4z5;P`pEEEMhCENAHq zPwXwIf6j*k1(gk7)x20Ng+C7{1BR zm^q~XN>tVVjod%=hun#aD8!FE_P#K-7~6DEZ*2Veg)P0651FLj)U&#Y?Ds1=K$kP9 zM5Pj=v;=(etiZ{=d@&pUJ-UKkFFq%CtTqkN(JWTl+u3?7c;hwQxyXbKKbM=g5;z6L zFk3SJpALf{#}GiM@bo380}M&8{wY6wt{#NGA$kvX)4E5PHlQBEMENT7*V{`%1@Xes zeD1$HcJyi5RlANCOW#kv^F+c|iNmb<=zl(MiB3%Cb)h(rR1)o-3ru}PJ*^{~uE$)^ ztX$rCZ+0*x4E4rSwL?GBRj$;$*W2zaGqQm~K!!|ZYZHtIPE2Iu_m4sqB z(|piG<69(}{-rCk$02AzJ}{ax`u?VtLD@)s zjDx|+l&?(Nb!h5Jzie>trX2^n7@GoqkM0+YJR)#ol4A1vmiL|UmwwewXI~}#=PNGG zq`7=QRhyXo#Qwh0g9K{5DoIe`J?|@_Q6?+hA5k&dt*q7h7IQJ{36_Q@o(%SzBL*+2 zdj01sOys$R-OiaZMC!(Dgmh3VN=b|rF7jTkdDSvFh`sQ<+b#d+72E!e1k^S}!QS|J<^TREvrlm9G?GFI8 zZwK*CFH$lJ8f<5!owFr?5pRB8z2VoqI(UUW^FLeLPTlfF_S6|}Y{86ftV?a@oo0iV ztDEw_y&foPAO}%>E5sZ33?U%|P=zy->eBiCji1&Riv2w7KXUS)4dUYs9Urt6a1^Ef zsx0#*yDLs;*Z8>Yd=|S#v^k*nG1H9uQf|nxo?;rQtNp5CJ;r#j>}JxZf1irKm*zpL zJ6*o9r|L(~D7-q%AmaV(`02s|;tt~#mNk}n11f>%BJS<5U+D9yDxl7GimyKe=^(O< z-^Q7VPtk_|t6PcIx)EeXa0`qrvtkcb4bLu<4-9^nD?B%LC}(-wq^Nrik_*pHy9g+U zcK)QIC>-C>EcM`i^P{{U1u}|{uD<%uWgS3_iETVMpr$Pif^;DdQ+0RE9i~2h3`!xw z@abZF5)WQj`loMLxy2nGp#d(}ffW2ND1yL=I!IrZ=6r@sf_KQ~Ud3uZ1)MwoO8o~hA)wNCWK z=L9&#C&H(8y3f$hEV#YbzIKW7Lz(;-{S1&f&+HFKH0S;2m+wVt7{8#nHRiIm{+ju= z^)BMAfU@|f=IXsRykFPgI!=E*pppJMnggf--_c(e-uusXk!{QTUdhPVa?0;cF4gKL zio@pU7I62ZifnH)&)l^+f`-9swqwV=y>;~0?f&UvV(Jk0e1%atdGtC4v9YpY4khXt z9G$r$+c*C>q${|+w1@sW&SIh`u=VqC{1f^Br=|?I{P$;r@0J?j3&KL;&rR06PE?S8 znko!J>P@M*tnH=Sq8pw|sENy!4t|w6QEd_%Ct|AX>6+!Hl#l_b;VZba3q2m zgh9q-RidmeSnoiy=B=R#w?D0UPODk%0H zcFmx;6-H4O#&S#jvPR!)A?{q?q5Uq&WP{OX<`Wt}4=jGSJ^lz}qxSoK=z^Ta?tqw3 zhB`fOy28Iv=xXUfCg1AsvY2hz0sYN5JN)oOKGkMr=9?$>(NIjtydIS4Zc9G?SY3~Q zcTk-r;(BEHXW>p;$#6)@Sf2lIY>aOjps=#(Y4+NxuE8vwe>*AQphWRep zsL2M~?@QY*i&Os7qs2slGOWvphJyNRM9ZF>YS~A1x-2{qnmj^!o$5w!j1=p?nb~sy zDgf|Zsh&NwyS|HuzaAM%1MuSVTJ)KRwr#v$cYQm#rY?;qQ-6`K6xt@{Q}Uvh{#|iC zPnB}gJ`ZHq8l&Utg9MY`+M|Ai?Y67fmI3QykJayo8&rbgYAF`~E~QWnz{#%Is(~jd zrqwdDloIQU>ZvYE{3oa+iDUEvHCtQcgmzjRj+|)F4hq;qx9*XCa>2_3TD3ifP`7wIO9z^CN!^Pk7^Qi3~BYU2>$-g4{ zf#$KYPHI*bu>AjP&_{sw&_B}KNJ0HBgK^%*k)!&VeP3d5;QLQi8fc2(B8IcL$C z*NstV|B=Y1698%X1P2~IbkL)a;qzF}b64u*zfoPt*B`Eym$}gde3Kj^t@cxw9E{vR zMs{4TRPFrUJ|!N({OB805oTu^f=4Wls?I%0?I(i0%Ip&hwb#skYg&$fyZfVnS+;SJR580&p zIz>N$_LWiY&WphgtSi_mgt7=*T_hrbWLEUTb5oc+x12r_V3iX&?4(df0m>;-SH{qvAFN!mspGRRgQMAId-SD!8e-=jipi+e0+! zv6gv*FWT=ZSnTB2nJ6^M9WoCb%A9JB%L-QKqiTqv(SGk`b`X2dfZ|~z} zlxVYZOeOcba6Ih7@e8M?BBM)Y=bG7h3uj+Hkmx^mJh0a#muJGA&B8+Bxs|f!r%#v6 z#t-iG@W_+;9zD5@`WOcX2b4pOhtyFX=T2bl-mSbrbeFPbx47wMJiiZ*{FiKb$%jWe zXyuRlkssYw!RzxEoqcZxEy*Pd%@znud( zzS4IjHRt=QCE5$@lBZ`fEDhFD`Rh-%3FK+hg=<9aG&3Zsq`JkAGw+%HFN$JaNLYy!!r*S2ln4ufH;PRk>pd2&cd?1Xf71zc9h zKp`s{ZR>8hF8Ysiq2NDv^BhsqBi)@1g5S&>*_kP5A|E+9Ia z#0txHnY}{tZoE|9-G0+4e(2ZgWYAUm#D)QOr&F}t&h^vsUX3B z+|EuIUc-mLse7(da4*sYw^94Js8QUTjw6#3&-06?^QvQ*8;Q8RudA2Hl!*V&q-TV+ zf>O1k-ELKp9#3nb*`aTKq+~#o)VJWO;;QGbzEdu`^0BEY9#9QaTOc_Z^%P%nYH!Vq$dK6S4E5( zV-D*tvt9V>k0&g;nXbnB(A~g6#^AE!J&Ezp|NaRca{9N47{_ow^6xv;B)L9`(k0gz_dN5Xi&oe6WpUk6y1dVLN;FEQG{wc3CPwu{s{SFYKDgsPuGRcPKUy zG0e!wC>t~`&$_8udwcrbM~^!Yn-0{G(O0A0VNE-4&CfeRQ-cxLHodT*2r~=NJ^~>L-C)8t zDM{h9yxG10d~JLan)P)iB@(0O%IJTHBf^D7M(%5GZ$Gc9%HF^9vB0=9`;t*^S6dV; zpiaSo6C3^=s7k{Y2Fo#o*0p&F%awdffSVd^@FM-RhNIs(oRY28v{Ybhz*cY=b#!Ad`^r4q&n8#h|*{+7LX#I&@$U!suHFTQ6G5?d2z^ zs5@XMgn;#umYEp=+LFc@?y!|fr}FU*_iF{7zg^*y77iwHJs7vfka=gMr{9xRHXp+> zbaD}Ve?@RzxuD8Se#~*QXRM{qpvV(|U)c8ih9)Ek;KN~fjf-iNO*P$U!Opa78o12@j_kIu&5uh|mCjYGc)QJaF=JN~AlUdwgmGIntwUmIV!Qv7qDn3&e9g|eyM zj$KnDvs8)v-~}_1Y~eu5F!NmSE8iN1cuMy6d1@LOr!6g?pC2U8&9&8qa`Q6_9`-f%6PFYD z^h>v6cRT%TZk=5lH&_dJ@?(D}YOkt}P9TuFa}F_OuHSRs5+@9ty0?;4w6yNPc7o7p zBT34vO)d#{P|i&~+Sid~K$tz@OG!;^>*Q4rfguk8g+1)-gZCWIHTCP{kbc?*BpE{b zj!+4Hb8j~_38wAd_&GXHJw(V4A3h|$u@~?x{#5cwTHuXQP9ky?ftJNe*t$wV1z}%p zV`JH4L?juHB@u|=--VmxWeB+tV%E@a2!MV+sjz(w-mOvvAR5Z=TvBGX+ONn3R4NGO_TnoYbbANlg-5h9X6mmk`86a$Nw zr*jj2Z+0zX#Zf(x)7v_`puTitl0@?yLj8}6i&E<~gDjW#eubq)HQXD;U-^DJdx(lb7EB3+{W^DOt4% zt{r`S_pnQ4<#6VX+?Z4@=MfNi3p?1`=cE_cvjkm9EIIMpU)WpUvM!XXv&>Bt#%_n~ zzp6O6!B2Y^EoIEZhkBpkF6x7|e|Tg*)u_btmCoHR%iDFv#r?pM4i67swY02;n%<+1 zar`}lsPtL1e31q-IlKu;@ipWP1J4Y8r`NxPqVh%u2M3b%TzTp%r>2aYoaW@-G5e&n zw6W7c&Btot0%}Tz=`5j$urfd?qhfkXO6t$D=rdw{kn@#y6}}*TH*bt|Bb8vChn6T*y>Md{QNlQy36sLK4dD}G3 zpJ&92Zks~DMIzOo^Sy~Mngx4;bPPui2PdcAl68)zin{tps24JhclGt{QJ80)P~8eo z;b(%Bw7ZVExs|Y7P*EY^+lKI9@QCQ|g`Md-?Tqn9=Tcjbt8OL5CwMvijvl|Yv_R_N zU4^jt4ML+3$FPIh!WTh|foapz({=Cp!R!=I>@^_XBxlD}eXGO-*+;7dNN!<#9tvNy zw5%)*15xW9mCGO=^57&6!==>Y3__%rhk0$=e5Q#ChHkttczFTW*&UM3Ho^6Y*l0>; z&jyvk4S}v&h(~H{b!#>eqTtF5lhYT7{g6)ZvFv_%c4cM77^#+d1Kz-1IJXi;$Z6^6 zp`em46pGX$Ts(RH{0!_mdEi+Jo^Vo8krtK&!3{sJr^h!{wh#~HN}69nhv3^31_=a* z9Z@SQ?whEo!?qnze<~RNpuF8Y9A*%(b5dOIz4A??JQV--^jzV*MkW+O??V!Fu|~j? z{qW(#FQ8xIsYCu{_D#j0O)WSG`*BK!BiD|+Y_zaXyZ6r_QrJ6jjx4ayX|;`59UG4Y zXJPDkI0YvmVY;|OWz^L&LKVeBv;o}ujTH3V<^iQiLh`a1$-gjf=Zy?2+c#{Wrq(vpzYKVQ+!HT5kG0>1W&f}L`G+~e*IOuU9{$j~hvS>Ze?AHEp2d0tXkKnan)l&@ z3eG0>efubpokI@A9W}{1IJ#ink}8nJ2*Owc0s@>k%b+Xz7G{I(za1KPLD`52Ck6uQ z8xV;GZH`iKALmFCI4<*HH-60muPsW-?Ffu;{IM}v)Sqp`S|?QgvCj>2G-2gVNdi&h zxSN{t^FPU z$pGZgE~QILu1DjP30pl7g-PDXB1ztL#y5Md6^)Ibu#^Lrz!Pv&W@V5L2{6eU3k|me z`-JUqWZFQ&3tE{8_VR&AJOt>c!xC23i;0P8!UsD~Ti=j9iIO>3MefTCTuJ<}WB^?G z++~_j16q0zagMSW;NXO602ny*81KWE{Fsu`#!sI<5rt^v@1@1XG?~;VPb#kD4nl6o zBZiKlx$N_1T(o4IG7&ykP-4HqX>qO;J}qF4Ywj*8t7gj+5y zk(2YdN8IcNycq<%%Rj!<5ve*5MmbRS!A&6XF04j~Vt|&FBFce=D|T7SaCw!pusDEB zoHT&dtbpnPx%!d_Ge?qJ&FKo<7k+OV)V9yaVxK;Hb^>y9{06V?62}9=x8lW%+srL2 zt`EQ4mRC>!13bbqB1+mr9Bx!Lu#hCo7=d~!v>CivY~WXQ1JEZ{q@Yz`zz}k3gn`q- z8bkBCU}10k6hJn#yN-IJRq@&=XU~x%Oyw)X0)#6jtYjYHbwC3ryUP;cy%Kdew18+0 z;qFJ&_0QE0U=N}O|0cv46%!LcqzLFFWOq*xOU!d+h>cD+NlwlnL_y9tQR2AV7i%<4 zf|1Fh#IQ&#k(K+IJR`p0|5t1|Z;JX;x7kqqUBCUmZm<7t&K0^=)2bc6)hh3j@YhLs LWw}gQ!<+vfSeJCp literal 0 HcmV?d00001 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..3ea9fca130fecfbb0c336d6083da2da0ae5f071b GIT binary patch literal 41354 zcmdqJcR1JY`#%1#WlPz+lFCl@3{gfjBzq(wdvB=_p+Z)oAtWo=v+S(0_l~TPo%OqJ z-k;Cs`~4or_xJn%_dbq}*E^oi$KxK?eO>2yUgzBtO$}vo5=IgX!^khIC|<)bTz?G1 z86_r!pIrDkI0^rea8}ZBzHWEd+0De!98))Ow!d%ZeBaWP-PPRD$^(`5W%9=7-$e znQnFyF1ej-dXX_$Y9{GAE>^yA>8R-oIpQ2DTvg*4s(;sHUbbGuoo|21*n7(4jOx8N z@9D0oaFo-f7S|3r*nQ?{*$B$aEjguEV7|R89g(DX!gA_kSoojvX?^))O4N$*zfQ9T ziBg<{|CdXT9i^tGUK1e0o&*O6d(sp7qn}HY;mL7wa=v8^#9@bDYf)BX&*0Z-K_-|A zdO)=Rb`pL+Yx4iAuV>&GdwWaSmM#=f*oMr#lJ=rKZ_)ZlH#3l(cylH-Nuu;}BIk({ zcmeFAN&7n%7NRK%L#_ih!4VOneO$!2hTh9cF)YB0xBc}!T18!5>Bfy?QCCixnwl=9%I+Sik6^P~uJ9~pZHnQqA1rpFW?~{`V`C#@ z7Ng28`%5xAJIicaosnG;QtUL%DJ3=dOZJ6CdzL}z(s(HU+fSdGGY5=!d}TcTo)!6Ro(qq%Z0hpz zG8O*US)(KwpZ5d2MitBf>{ah>W;&SV=jZp9dx^gGm|~Dxs=`Ygdc|kf5`STH^dV)* zJe{1Xm9_QQ*jP!Sm%ctT{Qoza-1-G=>+id)avdfv@!bCWJ}+TBsw?A`*;I3!!IXz< zU9MR@6+L}$5SuSO2M0$(TU)(Fl272zcgkiPe}?FV?gY1NcXxJ<^80O()`hc#dM%dI znV6WMF&Mg!5!@dxW2oF+XTq1imrLFzv&{oBE^xnB;FD41a5QCG&17SwmYyEz)vIl; zefwy2+Ew|-3I=Qy8k-NqHRQFJC?SK_juBue_H@{I_ z>U$X%_XK8vf-$N<{k0T>5$wSRDHa+Mf+cro;7)pWb#*N*FI#rV+&CK@s@tJLg&(un zK@GdTwzj6hw!Japr)+M13f8DYgV(7%KA!RZKmj?M&#D&0uz}S6&I;|opRyH=nwpx? zcM&q*>e>A6uT;E#{kpf<>2zHL8^2BITvjO+8yi2tgK+Q0_o=BTWMo)tYio(g$!&)4 z9Q@U|=Azm{FJ$hL<=@mBZhOo9HY9EQGI=2B~& z*R^HqNojeiLisE13AIeE57;SRxk7?sD-}-;0%m2ZC9ik=`?+ikv#r>=YpJ(5T>i7v z&8|MkpI^Voe=fU{RbF0xX??w}H`grJbImGrpnPN4i(bGOD=ROjh6s3mLU|lM_A)E` z)2Esf5mEu0o9^7vR~|inJodMVzy9k(BE9?7Zxn(Ath;$Ox3{Z%dgwY*ufoiCI1Kh7 z=sEfLD6pj(nyYeDk7)gF=XVY_H>(Z$?eD5+XavT@&;>kv=&-xili)nCI8w<5DPg5e zwy<~q-`X4pF|Kx|?kSt5#>TG;g(KlhOZM<$GI{|l$!9&T!^Wv@z^*c^s7TD+!^7lz z)ETa7HIyl2Hb?O=ff*r@&8Y-C0n-{{I7nYdM%W$_l7A~*teERuCXlbfqm=T8lt9wh z*r=I)gI>@Szrb$9*0Ge%$eS9*0P$Hg^C1D9fG0Gkuk`qB{fa>!Dl03aI(F<4lU*5g zUtb>u%Se%1hy5#vr_naQz4M(`7cMF&EDt)hL4mkqVKFhAksVlBDGR~B1Y?PxZ|i54 zt1c^f^@_PUL6owhq5}54b|ONSB2mM_aUWtiiC)%M3RYz7*RRLaCoYX{FIEx(VBjYRfW8VngfB5jNzMe=+OUoTb z%@Jan(O&|GT z)S0(~!1-}cIQD||%UYLpvF$!}*z z&Qca$uPK#dLZ zD>gY_yN@5ib%>ZZ!KuqesXhjv#PC)5+!p8D4un*Mez&wX1qq8uX{tfS5Z@Y1V`ag z1Z&uc-~Lc~z4f2{y~TYra@qZ@HtlRSLDNUI%yXlo#%l4xjbm*eatl_e{NUKAsHo(4 zR@kQzI#6ZH|w>gwdIp1(QpiHIiPAfXDiyt^@?dOGo{?Cvp2=~b$|)l}KBu^_ff z1_mtQtkNOUUhAVD5^Qm?LCCC|d)tf4a5$3v_Vv* zKeD&ChiVkm5EKAvi4w;L3T&!l17v^+ZgP*w*LfB#YO+E^F}GAMN& zt7EcLR8u1p5)xYasmz9gMhq$;?5?2Wq{`}_A*K=EZC+cb*pX(RbaHVYJa_;FIQn%H z6r(GrQ-1u={A5r{>o({>boT7orG^s`VuK>eyBn4JN|2Okd37I^lt`3q|0#_Xv0)5n zkqm@{0cTOVp{1p^E{tipmd+@4&~2hFOee<}=fl;c@E#*S7P*O-CfL5^rD{sau5g=y z0+ZU1aA8zEkMG#8Mi;DsBz`teaaq6}A-^<>cV?hUHTvs;)4H%A@r0@2E z=uE|k&jyQ?m6bjGmqSd9-fQDeEkXgIbo}Fx(4C6P&CSh!Smh&-BDq-hFeHRTElCDW z(fwyhNt77zkt3E}y9;M{d28qU@*JwkMUB4(5TIppfCql<=%9f&d1Y*P)E>Vz+Haj* zOD;O-52IwgD!q1$keu-(!5(iSX?FR#?kjO8!hJv;OYmLQh@qm?nw|5<*w_|3y}!Ow zjxcS1e_50JSP|?HH5*&R%5>WV1XwL>@vJ7((~&k57z&iqkNkG$j5Y^G_5!1#j$zOC z2>ic%5$4g!!8Qh+DF2kq@tr(<8V{5C_xl8@bg;OOgMx^Bch_#AwDMzat$n-qb&sJJ zxlkNggzTQ!_ogP;G`mr#FLke_y_kJv$o^FB`#uT`9N(HwQg(5jAgQz$eE)t$kb^`1X+#8_i$o%K$XoZdu3Mpagk(7ds!wz?Y3X=%?`(*4 zLt(tSy1II-ElHN&rm3an`0mb5O=IL~)q(bo4kc_{c5gX?c4J1x%i79{r(j{YTn}CV zDImH)dUv(s?Zr1#s#mVOw_yysL`FvFpLdS0yRj}BXr;N`I3;(XYKq-{=N%t@XrR+}5@5M!a5nAaXCHV$D2N~* zU+efLf7wz%t;3&+ZCgt{1%-$7G&Cnj0@y3o`>os8+uN`GY-(znJa6J|8^f!2*HIrE zZ{n}K?>KK#9ry&!kU-zvy^4?7*>pq{%-k9YA}w!+gyPhs9L66tu0se>`)@){jk}*D z>6S$g2y=GL({kW3DIE;@C9fV&|39DtR|Zx+%f&2OnSXY4T<1A^*70K|tWW`Q)uitf zB1c+Wk#Vn-6}67Z?roY;`xn6xk6bACTodwWDY)Nfy5_(j6;xQ=*q~EYRZT20($acfYf;l1b>=#Z6%yJRn~rX1y$rAse1v5{I?TQKh-01qhW;UteF^?2P5;)hkX>j_X=h#6x{9mY|e=}jMqHj8X=F4jy9E(^LOttaElw1k(D*Nkn(h<~ z%U>10u+dYwcN3o-;7?dT)*i-c(CM5R@y`ffFZAICQ9snm|5IOG~qN_p7t9OfRo< z;sdhDXHfF!{ri*n`1rA8dT`R%;QYaszx!fS9Z1f7?p*cG)(Vbr>jY$EHv6udw*bx_ zqoY&xOHWVd=I4)DpHUfLzF;>jrKqf|8lw!T1?rVC&`sW9Q7HdFqI!!Q%n}9!0VqDD z7ra<9m~o3=-)F5$r_#5)mTCqVhPC4sP=rT@+4YcZjsKP@DJmvkO_J&TY_6Ko?>wF8 zc4a@N=@-4Y<4HxW8gzbbD2a*X0J!q$7mfaqX&I0KP!+n(SeboARTUAx%aL5S6G;Tk zzaK+L_>LZ|kl@{BdT&TF?FBDh97V_hDrsILe+ST9g-q=PZ(s7KG$~}|?ZEANzgR|I zx4E!dvA46*21%B-9*5DZ`P#K>KeCNvBYW)ozgS|%e;HY2eVK3HzMUDL_q*9i(&wLZ zf=Wwi^Mm!JC4o5I(V3YiUVn;-9zT98Wlg2z4WR3xjXPDaSUCRG)1qc@j_4*Gi=90H6`_3(ARtwfnQ$y-HmiI3TxKF% zDm8O!Sk$f!(w>%%PB2v6o=L$G&}uX@x-?{m5ts(v*nThe9SJEZ3OCRSaIVU1z3-^E zAul(Ku&dZ2gpfKvb6aFe6N`N2_w?y`^oyt@-Tp+$_#vnR2h_Is7j5JKgAip)Ub4AR zT;r4D_q?UC*fLRO%Q-lkf+pYa2R)_}i76@TffWTq67N1qFXgq)qwz}2WceRMxaAIG zmSH&~Xg--|$0nPdtQXdZj0J`7^xE!7+%-2p&{{M#p9zmR0ddCISuzR=Xt%buxcsI7 zDF*>i7<3!)f#WJ&vGZ3|3M#9H^b{=TXrDz=EgVM^p)Sn7c3DqSS@7)H@21zvx3+8h z%xY_DbnvyNlIU)|BDL`eWH#CKwNhoN=`KsM$@iFUO$3TS=HEgWIIIP0L1whZouOrIkh2#}`c;{#ajD zQX+Wm`_EEpxrRoAoaNT7TN41S6@dfz?rlv|WK@;_37edpj6LZW>tso54}wU0dwUM7 z|0lgd3fY|{Jei#(1r$5(Gbu3`COT-3$E6eR7W68X{A{Ji zu3&Cs)Lnp+qY`fK>^J4Rzw6eVC`pGvB+4QU4JzEnNO<%Lnyp?$WHwhKMFP>Tihr}` zb{6dXs$nQkK)Q*4B>+Gv$7QZt+oD~fUz?ww|Nck?i(bL~uRuvqU?ZZ{@j)FX&cq(m z^rmM{@ZJyhvbAnK?Fe)4YX*6D$+L>aNm8eKK?LacEZ6m<{Ov5E@4NMj`^=fI*RM)H z_TByW74!yiaq*&sf`RvK|3Z$Rd(Y-JwVlHSJBuC-Wcimp&PEuMsOi|Z$Fb)v!n@u- zg&9OP0m-&5ytv=W+Vs3F{#@&0pz#I6dYLmmHkTonb@zMk?RWr|XW9L?R;_1O{GOxVW2zz`e~A~a+9DLcC!h*>BC?(lS*f7D~2kRSxG8uYXr52hePBrUy9Nx6Mr z*5}_B=kf;;5tLJHNfBEs)2ftvUAK&CJFd#|rVJ@xx-RyQJc)kw1xKRX4tNvMTZg%#Q2K#GSKN&;MnNvGJKjAPdLuA{Q zEnkYPADf;gO4OLvWGp(-mU4ou^83?7ai_XFzng}iGy-F}W#Do=t7P`U1Ap>+6z;OO9yPzPoayau>P_Y*(M!tXlo&%bdcGt3Vm(#!tz{6wBapyOo z!sgnKU9>FSy72@OO={{@sD~1rk$9Nb`ux`mJ^Bxk)}{qCky()n zeBdbr%miRidI@K4|GJeqAVRqwD|fvi?Am|JDWk^&I1+G~y`iY2)K`|Mpil+bs}7)7 zuGa>@dmvsiTaz?3_4TSw=Kycrag@D!^(tT!wzu3DGrBIOcHDl|`|aB|s^iDa^T{4P z(C4d%#aRZ;CV@=-__1T7K-24Lp3safE-p4G2>!h0MP4tmZ@vmGirc%}EU3GH!mliGA-BlV#N5}U~h&2O2R13hZzC5&Z z=iV?yoEq2KwSKRbPkrTHg^g05=I8J4z~;n?JKYVHwg8UFJLxEnqBFDykW8p+$@yY^ z3VABEVFfrvnYy2sUh3I?1-uPu_^KRVOv276#^}X*DV*8zeB?TuhL}D65YA!0eILkF zXGpt#7QBp#(wQHy=M*lxYFji$2q}6Lj!=G)jg3uaRK5kbv9*3tr&gA?t)2vhalbWZ z$Xg|k_mC%=V+E@x+p7A%JO&sHt%W|*v?Zv5Vt#vNjRi=af*^+GQT@l88TZ$HW7$T^ zVp_Gx$;q#sIhvg7W;WBF0_8S(YQ1Dur;b^iu3y5hkA{wps94`tD`d8sQdYQ_nUOIB zW-^i2CX>g#W4-fl!0KV4+8IA9{ru*K%b5~HY%bc{=XCkB z0z+t%-Iuy?_V}No35uCwL3dF4a?G1(pE8^$L>LmfR}lcRnWfyR;9D;$R9Gw%-o1G7 zLcn2M5g`dUE-155F*Dy6V(7BR^#g$obgr`mG;YHlr>|VOB4E=m3|$j;0EUa@8?1p8 zELKjY>AZME6pBF9E;G<$6yy^pxwytbi24dag9s7Q(1GfyT)kTVJ^Gvj=uZ>N^=xh* zr4!9zf!yaZ3+<7l1LEdIXqce30OZH9-#_1JES>`y2|=y9E%A?mWJb>Qe7bEWh;NyE zEjdoR6jw-m4{9mfL^2#a&mE9x*}8j z>!scj!0~Z7qP2h)1OTK#seC2l!w9lw=8cv6YqMRkVh*MmT$ZJOi}O>BDt#eI*C725 zIs(u;LHlcO#oCoD|4;zP1sRLPF_1vT0M;R0MlPdk975)NaV7`5s5iB?wl?R53&&GZ zQryR%oaV9m%*DlJI#}euTQCNCkKEn6Tp*cY7;uCblKNrCCSI<0lBg#DqVeh@lDmuTL$KZR5`ktbx2}F;${FlO7R)m znV%VB_veiEzd?o)^xm`|^w}7KQX_Zi5}|gsVOVt#<$w(wpcP1<95GGHkdNP$l`(^E z@Nv;%u~^_~1JY%oC-)6BgAmw&$zQ+3*UAcr;Cy+n{!}i4r4&Gq1(XFxR|yg`dh8Jz zE)`h`;MxQK%Cj}OmjLx0h*hAb3LvdDHPsBLx<(rgvy69y3q#C&RLDuQn$${C}^Xvq{PO?a?3y#H1+T(<_-xvvv*Z$ z`H>hLz@KexS67#pse{NEIDO&$6e+@NQtp~+>gvaRe0-Gb?Ch35H}TQ9E%ci^GFh~L z*0ajy7I}$nH5u)320nTe3>_j<2!Ho~tCksU&$&a!K~Yb6K_TfnpW`_g5P<9O^R1HU z{rjKNG`VYngNYrWGLN55@*cl2pliCLU$!-saPZq3XKR7+J%P$BSZo8j6Y2_wym|8m zRETSrV?pXP1ZDFBJuNMlr>AEK=siali>Fxy`T5(%T!T%`&IBSk3_?GpH}xu;R{D() z0E>Ct4~)6nOfz0JfJnv;v_lI`)W_Sq2Jqk$5O5W7xTwR3Qz`VZCt;Qsp-Ae)0?KO z&aK@3BWXDRg`pO}&B1P{s01VF2d!nUyHW4oZ>0RePyvns#Z#iBq(tvU0%I+}O5H&b z5d9_{82c^f+WLzd85PdZTKi3%(jz3JXa6%&X~ZmQ`zj5@bby(tN8|Qetl)c<&bdX? zy?6;vz8w&n`7S9dhYH0tzh@3DZ_@;w%zenE=Lz6DQ)A;~_crMbl_aK>o+S`##{oB; z@FC(leOfPSRlu$qgunY0NlKvP?XF(Wbz2;MtIsFu;X6J)UL)=_rBmL~9=!BUfF2!M`C`}gl( z-3udW=jVq?=|H1E%`{zuf`<(TOS@`+n#WQ`gBx}`ySm0eIjsR%#^UG>P8iJ!Eu)f>e)TDYgv6@w`=L`=|4N%b~ z*qI5)Qs1Eak@B9D^KG~NNjzz8?uPfBm)$gP+_+P?6Oo#l+V~;^wd(-wGBxj&Znb-(fHm}U z#CGHrI|(3UeJa=zey7~FY<&dTsAhixpL-q?6BHjG-)c)z z{H(B06rk_e%uHXOx`QfmUZvHy&#eI>2-4?M2IG#Dv~-_!XYG@>!63{YJARx)TAIZN zvSe=U>xBacmbA8yk2ElE81dM`!cboe2i@}DzX2EDpaKeSdvh%v+)B1V9D=Ya?(-b8 zzMJv=^%g<@f ze53j0dT3hSf$eK{N>S^PM(V@tY<{peC6?G}YHCX6he*%_`44p#f4id3{c(2wE*16j zgm%DB0GUuTI{M0E2zsOdWB!?Xg%_#tk+cc@0e?s((1+s*G-P6AT>7Cv$pKpI*RNmQ zXVX*d0SuWAmwB|?Gza-lOiUo@aS#s0(#ncLwqf~FFAM3ZuP?%LVIjhR$4nGY zzkaBlVbhlS@Kp`=gS^Yqr8mJaM3yf~E}KrWR2EKWi=2YxS{1??TI3LF>_7p%L#xbHyiodE1i^rLrR0~&y4kP*ScLWoIi z&*!z0;mw_cCDerciNetS}&>2ayWvfEteK1MI?DXftwaf-kn z3dfh?h`)awj~zQ^3|eQqNU?r6*goX43r9{^4kmCDk@k-B7991nLf|@|UAs zN$axmhIMv_KLaVeof5C`z=(LKMuBfGO4b4p_BW6hkQP~BnXfxPP)NDEz2SC`%_<*h ziPy%7F@UrH^M9ll#17o$x&I!~S7dn4_;LBA<>2(zQE`l;LGN^5h+&`%E z4anjf2vrZjH#O)`&m!hH8LY7FvGkQF_fF$cz~*lN5xWEY*VNS|k-xV)xB~hr`?+(Z zBi<|Im%rNZK*0Mu9rh}fo6|Dqvq|~NcBRB4lx;-vpXB= z;X;oR2@Vav91mcsy82^%K0BAvsn*0$2#RBrQcIXHBRQD59!|gZqCgotW8-qx>ENd# zx|tB0Fm({m`4R_pZ>QE~EC0pBVva-4t2I3%L&4Ki;^@(%z#1e<#S=BMdr$)$$%%W= zl5}%*>aQ8L9xxmkF*xFd4Qc9?3`62$js_PRIg^Mn2(O41fh6?&_3Q8i+miIWgi8xg z4PZ)&(ot$$)qj6A30ie>tHlc7pr$$$qh~K(wD+mMbFE{AZ1|S-tcXbW$;enwii73; zfAgLuCzw6I1hM3J;eSiIR-(u-1udiLzq??zWCni_--zASuLmC0ex5^%=F(Vo;?OZi z3{Ot9=2sldAWd2jb9Q=4@Fch)9`(t=SQP;76|gNq8;I=2ty`)JSGaF5+Sf!g82ejAVIIy zD3O7d0*1_k!~y=`_Kct3N6H@LeQIXrr+`f5GrF$ySPC;r?QKp3hldA(Ojq}m;UVP4 z_(4C&8o#?T=m7LPv|Q*J1!oVRCzlr&AHj4WzWncbE-LCZ6{vOE8FNu)US}pLlJ-~r z{>?G|ii=QO(Nw%OiYV~p7%#zc5gUl(M3Ua7mE2_|(8hh=eOpJ?#OJjly)5 z!Q%p5pyy!ZaG0nwft1P(U?S0N_z4lKI|jy4A`ID7&=Jf92MrmM2pK>x1{9Dqr!@}x zV9;hqZXopp5fh*=ORK9<{iVRS6)#;fg}fu?_Jl0Ws^Ku4}`+(RNt z)0$xFhK2??ef?LBE-17LY==Tf>G#DJFLj=A|8z`qUOzhAs@Bg(lFkUYw@I%V#Hwk0!MQ86)Q zz{}g$0AZd0s1@}2vk-8tEoY(NLJUaR@ z_(-Xhl$2C1U$*SpL#`)a7f7BF^W70csxuLrH&x-FBPoOl4u*1!q2I0x>S;j#Z4+S+ z&;sazq6%Q_JUnDzrA5I{LPDYms@{)WXvK$uxE{zCTR)Yns-((dp?R&|{Fa z1`FuAnCHFaNE~4NcbczqulEcpO$+JpX=M0&s}#BfJ$z$mh=KVERAVlPh=t)cu-bZf zc(fbcy?d7^zt;iPiQtB=l1o`$q|N6ayyQ0uhZiu5*<(P23Or=xsi>%_e-5`eF~$U( zfYGdX@_J$|4A9^OOd-O^wbG zD1~1q~Ed>AJ4C#99B}xEcXzlc@W9ItuG8v)6h(G`Wz=Afc{z_xVieuT9ABM z&+4UCqOYa(mHk<88fj$HFJiDafJ*6K7tru3Xeb{T3!tQ9w6x%-tTOd6vFhZ-mD4)5 z3`x%vl6x;)g8<<1YB)$EwHn-!0$%I)6AHD?Mr)ppwz2%E&}*Zv*y%vl=pV@btR%?a zA1hQ_b*3BQ#Pq-1qpsNdXN`n@1pa+}WiYu+ZREVgasVWOqr(iQiTOkSkO4Y2N?;9! zKF(5v-wqzQd63aja>0R$TC1K09|v?q@VNp9-$lB|=SIOkJ$sn*6|% zEQ?@}a3TMtv3VkGC2J!drY|YQc$VNQx-^sZE#JS?gjN6)aO%E#>_ck)8;+coTQwqeqQ6u2dphSNa!@fS19=@Ge zA-<)MZNW(l11zS63EyGHx*5ocP#2;Y7ybPxW+hI9t~Tk1!ZrwH?Wg{n<@n5sE5vJ(3CZgW*#pI=W_##hfkYLC{fdlh+eh%h-JpgMUm-c7I<$`b0 z!ES%RCw|t*h$nY|un#{{C+$xzarpN+0r-m0Z70fP?Y4tl>c|4TS zFOCHKq!KuMs|&pKjJ*H<_V@I6c-(4Um0?^fm%JczGcaA!^x(VnL*y z0*c?ug~V8)%>WX4$DjsQ0f>0hWhLk|r3O%bpsWp!Cj!I=jLrEs6*8E4tW1r8=*ZIR zxJ{-|^$PdkRGtie+q=LO3r-ylsI;g;z%7c*;k$b~TWP?*p*i>71~N)5q}F99`;yMD zz^Dss6cAYT$jHbk8&U4;pQb;1h;k?N1!G5?g;8vm4fhnUqCHfVM zAP6E-*ly(NTF?|>!k#RCBW(x> z2w+q>9&9Psj-*_hc~MZHC}2n$tNIlV%5zeeP*#-X!M<$_S$-23$Fpb8evJDdQ%*~Q zXu|!0B8L<3mmb3w=MYTA1rx_d$07^WOfb+s0+pLlc7NwQv;p6Hcms0=j2#1jr#blfua-#p?znqHGYR>r1fbX94bTQ%?wjWdcNR`3(mW79g7s7NRxycd z-|fNvMmvD+hUkNfe&NU40vdLEUKG<-bg-{El1Dih&2i2j0eljy9|XDKd?(Hw%7g52 z6tV}`sR#BH|D8)t(cr&)fMVbED;$sXV~$)8P(mv;#(vj%-w3LdRoIWP#@mE}@Pd9E^hg5#=KntCb+H6!9Ty(JnDofef=7(nnCfag=E@0|lF&)X_m!CdVyeXz&KYUmy+L`<{&oiX4=Emrl-h}6g zdvs9cXxAfV!+G$P@N;NmpJyPwq2=6zoWmUX5T-`DzILAv4|IC;QEGTP57&NHewro5dM=^Xdl@zR@?(j%Gm>z z0G5J}0D+4v_)1vZ-T!r!;`9FNacDJj4Rr2 zG2~a%Dfi5G-U0zuQBDqnJAx6-&CMG3g~P?A4%i|0^ToAg}(z-+SRVoWlkGl`*KWD(#Xr82}j57gBYP(0f+z>(@vZ@ za|Bxg96AOq&uZi^Mnn$)8^vO3NeQDuAlbo%JL~N7?y+s?FvImZqY`FX+NmXa+N!}O3-th`C5@5ncMXRpQ2$d0 zUKKDNX4L*i$QW*m7_ZOwVg7*d*=1xZh9YD^{{hI3F4lr1#|i4G@k#+>7$G?O5pe$5 znrY34-*Q+MqcxK7Hop?F8AJb@`|R0o!1w$|M~yLHw?t680$e3+@7y_wEE>ohgN!?H zn;{g$r18?80t5dUyO|rPukopjE%p@YD3}lO1bB_(Pk1cMlLvhO_5U z6_6;wok5AAS=;}LzGb>RUMuFllpIAh9{lm+*~sBiAianrgDVwDWyo2HoH?M)@MEJO zTFHar11(E`xLk#Ll67D;btRc{b7W)e?m|DO~&Im{4#}nQ;Z(Mv! z3icoL_-7{r61^agQs;l_@WP;bd-mKp-U1MS;OTW8V=K3F_xitvLzIndWK=N!mkjHTHbACQ531IPmV|9UE)#l{AM*a>$* z(REND-6~hEe1qFq9q^4C0Y7vI>!@(8oJ%xpC}AKjl!-D#0??j9SOo;5)6owuU%o64 ze*4pf&C*AaM#4{?uU&%BSA@{Yo%UE(|djramK0>%n0>ENZ0A zS1|YAKRAMHPZKx06Tkxq<%rbr4pRQ%_y3HY7$95DR=z$HBk^C~@jObL|0$P<>Hs2e z#vuGarZC#_p9-9|K##fNz-$js(>Vcb9`}DK{|t`F(*MI1jImw<_Xe@U3UvTrq8@0e zIQ<8p+0q)WyJob`YXaaDaHBRj0~1n4cSLjMb5f#Uq1>QoJ1dqyIkcbIW8yedAq zR5~2{!|*~ywONxm{&p2a!{ik*tQ1qaepH3!u;{^KOY%PO$x$9wEnpHkr1uO-!VjuX zC*0m0ihV;Rd^m(?Gz2K^|MSh#U(R2d#%EO?l0vjEYLA9%zE4bis+ zW@rmsF!KPc_-Xz@EkA%kI4#nC*EoZtyW~jD&DqbJim!U+R`){}M$sTf*+SHR{$BY0 zC`XKZN3wEaG;5XmX;K{Q>|-*YZ@Us7UC@h_a*2L~9l3;){7r*0SzgIN!Alh4J-7r1 zjevX4$^HV>K~QKl!LkX;3%tg=r$ZTgh=pO$L$O_~a*_&Yp%uentCfT|Fd;1lEAVe?vpJtPRv^(*%eRU94a@u{aszS4;t9>XV3i6JyZ-jPysBGuA{(AE`h(y>q3X+6(zH$SHq?fP|OhfUb{c~2N_Aqi|*+XY?|2ow@n5A;{c9ZEPLpmGu9 z*viTZH7l#_XI>D}fkCEaX6Ab)LAwI9DOJDR++3EQH?S6ksz3>Ug5xd?eCabe6G-#e z_-Te$)$I=bVCY@&?V4Ly6mAQWXFTAB<+Pe}>Gl3dRhh(lDgVQpp#eGAyCK!WI2a#z zrJ&^+d;c6aw?Ev^hfCt0hLzKATui^=s<^^K?}=qxAlLEWR_ZuQtqVbFi>vTcjazbOO%x<&})gLXVnPQ8@Uif3DgT-qx01D`cGk>;<)<*5!P&Qv{y2o> z{R=H71akE6-d@Wtsle%CWI1car1rfURZa#othbgd_!=JOEB=C1{bB25N!jO;)_Z6Q zE`sRsTh223AAXHu#%+&ZP}m|N&jrX}DZK)(942;Mr|(c0%nCtE(L(D^x&k6%rRI$= zEA@w428@fRMegFG6_0}iI0YUJGrV#E9uc_`?}D7~v%3W!j*oBS(eX|lK`%sXNDIf1 zD~3S6YBUy``CGW1mpPnW^pTa^lbTM|dLYjOr91-kx!0^h%?!+zpLqp4>n9x_zCLAy z*2)2;VamLCKUPNu!#GjRlAWfp!EoE7x0oO&&3=RB&=Pth&OMlPJ7lDd_6?|U9QDHv z1@sx=Q}XpEX&~+KJ%N2QW}ff+yzVZeJ)bAT_M_|b6)Vl#J^ni++mBP5tiSZa7k*t} zp@~KK)eKdbg7Hep(_FFo6`J}(vu4M%JI9Q*&oI8yxtNt=GOC_R^6cidhwvZWAwGnj zq$6J1UHtRWEsg&M3B&A{f`lHy-Y3?GcN9h%hir# z(XAMm-5{P9%+BjXRpj=gc>V5#weQDoT0OgE7}TPUW2ozUN{GMj&zVp5U!}egCbC+P z7F{_4<100#G^S&(a>b_~%@ios*GZRu6!yB}uP!geotL=Sc!1BR;W~zbOa2l1Jj%K< z2mhv%TP~CW}oua=J z<0FWL35)9aUcP&+&t!Na6+0UBsf;q%1DpCsSD z8{IkjE_3?R-QR{h>CgB4xHHI<`Cf`UKkfHqsgQO5X=^`|CSlWLO%u~2(yDO(p7{@t z#s}QZm-f8%bLFVR&(; zxE{P-u0F10UrPmdqwt*R_#IWToWrFec3sCCq{U&T8uiSNcZ9K?a#~gk5ws7yM2)kOIZs!dqM1# z!IU#DDP$96Q+S19+Ra#Lf%ZQ(Tx&-j2Wf5e(fQpOUr@J5&ZmB=LmKtTrinMNVsaq& zl_R*W>RxszrKQ{Dcrq!+-Qi}s@Qupe5PrHr#8x337>A3f0 zuXplob8)_X{Se1k^ynMyv8{94@7#hdEn`WQx=Ce8DLx6lx~_fhlzD7Mw=j_nDD=^A zk&5OW!-$dq6Rt@2&A^8STABm>GNfmPi*$6Y&!3abE~6c2J0>1=A1b6Y9c*rmm_Ypb z22Z=>ApJm)ts`xc7F(Cqnx)c|T_bZ^$j?u@Uq|D^=T=v{m80l%yjhR|z-dGxV#U0YXopmYtgN<|nWQ%;*y^Bx1=B1?X6q(#1 z`}8a5X)x)RUe}5=#+18Xqw*=LR>$jxzhzoJ7JGI(B;K>$>xI?a8ifUI0!?k5?n4M+ zs99@HB&8$@%Qnz#Q09L*krqm-GvGPYN_W$wXG?Xn_CbGyqJg}gb=1+d8~P+!p>f^( zWV(s0hOfhJS3;udv{c6Vb8~#ZaYME z_jcGWtEAVvRtwS$U7iaQwyB<4$KKH46ywG96P|WDXD?Ptw)<}9IrE>MvTRz`#ARyT zZK=taLO!{|)3B}j3{2PDW2=Z%o*$&$TfcwV-6=v z$sqsUAFg}0wX^n`2O{ZP|HRg&t6xEjB9k?RMGVe8GW@W_){HN|Q!d;c{5hu4C;&Uc zk@|&pTc`+^)a4V6dJ;$0p2DOQz-;|%vDqziWm3P~U54eP&P&#gmo%#uJbYDve|_Qk zj`974W$zvLrMI=~&8por24rXFzKFdr;5wyzkI`AjMY!jE(5k5=ug0Xd@is#fXFs9x z=cf1XN8z48#gA6V`BM~so^~##7n(O195d8 zzw&z>ONeDzW?wgQxahLDzT&;)Tf7$kvQtoq561S{+g4ybK`O+{UsaumIP?9A zzNpV|6gow$uQ**N{J0h4f5PQR7(*L@Fdu(MyDgOGx8Lr?4)pJovrONo#FLzOLF$Dw z@UdINrYeAB!_va5H%Tkx(+kEO*ZyTT@<~rm(fHCUogHxGOt{v_A6I@9yxyV85yTy` zIq~h(C)wu_(Ls*-x*;|3G$P$cWnypNeSYkxVMxZT`jr;%!u_{huirR5zDYrMqaZpe zw<6IKCy_x&H0RdCpU2zEGZ)t(%lpc_n)s|6l==0|!k?Ds|K2Lz34zo^J(_9%+f_)u zD!A+-KSC~Hh-z`6(pYdR`K-cj&JL_kQu~D|KRzT7G}4LyFYf!@^lY%g@%0{ zc+h+;sz>Z)l%v>yP^i6~=Mj-4$2Qh?@93HgwD&)id!32+74=T{-T64Xnc$AC<6_ST z1m(@V_Ln%U&S@Rf>TA91+6k`Ld9Dv@q)wX!dxixMmGp4h;>|pCa%GvU!+*WW`6fN3 zHKR0I|4@8$**Ls${WBx)`iB^H-pZpH5#kbUKRqcgdSudyoAdH#cmF)0XF>YWUSh=c zXoN$=9h-6rx{!~_c|RKn=?Gq=T+q#^p2<9OyCb<{!uT#j_?;NljCOQZs&UHCe^#|x z+I;BpN0DY{iJ~N@6X2pF(HAMMQq~)`j?$8Ri1&LU`g@g2+E2+= zIGg;QxWDCGNzp(%W6Uk5!e%AqrnUQGaS?E&*i*^ReS!<&J`AULX=g?5+-7oewLNXD zyP}y&=fIwK%tcpv{ccopg?vPp@{0bt=e&*XpCuhh3a#m=F4K)IRgJDt=P2#e`jQC` zG0)s)^X*RI-#J@1`W8NPNJ)2tgmkCU3JB6E-CY6#3epmS(t;?B zgh+>=(jC&NgdiQ#2=cD;+;QLg;XlT`9~_=BfHTh7zrEL9Yp%KGtQLPb7Z8c}FG;lU zyN@kwSa?`fCSfV23GhUh9T3Hd{hus+_%f%8GLCO&q?w#MX_AX(-wr9v5%^S6HnwbP zNhbQ1>l1d}QSS7HQtV(LcDX03@(1Tte#_cIl;60e@3->VN=rmsZ-hMW+TN(f(9`~8 z6I*M0X&m4dTPi&Ix3EgXxxO%WsYTzcwDZM0uC6on!>w;#5#>1Yj~q>mZ;r;6&e>*f zo#%2T>pFW%7qzire1G00k%K|@hTPn}YVWSyiwOoLo6U#%R9>piJChb{Q*h_UtE=M5 zMORq&GvUYRuT-R+Vjwtvq5hYQepI!VwEQyO_h~M*M&7aE?2gpv!OW1)rEe3_9r`{w z&lJpHlP7r4$nN+m`mXc$XCdSBXxcQBA9crJe0sH}I-hEpIyUp309iRy=Ls3P_n@SZ zs3`iy@44$3tpR~g+Fk9MXlFOIwA*No3W!U;w z)K9&z;rqvrr)7S-{4vK(`3m=egOq-BQr+=ZbK9pwc5+H|L!`R772gBev;&Q$aXnxc z_TGHf!(aNupY=2CD<{6pQ#FXs>F-K9uiT|{qGtpCwiIHBt|iTy=*-S~bHtRJ~62FlJGh|^zPk~v>RFCf%6E8-8F6$>L-N>cV z-iqUe@7czujgbxL)L(k$Jh|61SuyLxJ*Vcpxm0u%QeU<8WtD4-T0QiUwQ%k|-KJ7yd!WN?|B!cG7>!CO%X?1_F{X}gr(6X! zZ|80N@A{LDITT8ynfMjCqXyuIVAye!WGy!Tp?MZ}N!!sg%W0PymsE8` zK-3xIv>3L)^B>~rdQMfzCSDB;F{Q!{zwYw&I6G^axXk$$Vt+?5qogdaYtSg7I2bCd zMo^RJL+aokWG5J8%AaLwQW2)j6jqkRE+Zk>JVRWrp{QY0-s>Sdl5U|j7Kw+fPjA|< z+^EPmzVR&}Vg1Juaq`q;UTd#nsQZbn+O=Pu0&p@Anh9%&Zt?wz4aH14JbcNPL2JE9 z$I2^pfeV%M%%kGWQm35X%`zES!D|U@JYOsQJQfauid#t2B^YUOpUh!C>vOKz*LiZ+ z?$)*`~1uSz_fi^#fZg>(b6SXcX) z{P57UH3El23Tyk4$#wNFmo!Jrq74>QkkOWx_S<4h57718?u>4@t2^nqWY5xw;qmOl z=i2VW2LV3^rZVaZHA<_EnTvsz9MgoEHROD|wCe9A8^{d9#Sf)ztETM}>IJ|X%(4Zc zZ1LLktSN%potIRPMWRj6u_vp1pX*Rt#5enMIR70sJIZoyw`Vl%pIWMAa(Nmb>PB2q z6{NqWjq$)j6ENT^{|-y{V%Z5_J}B8Y%))t-=14qM=jEA&JNjq-Z!^ydu_arg`?Cy* z-iyF}rlmC(5;Kcwdb=EXH-2X8G|tl~?S_)sWAGDY+HTryU~^@5-HNmbvWlMAvH=Qv zZ{e-Xd$kngFXP*x2DLK&3euPXj7%Mk_0#ZYl`AflN5|Kl3{>Lgx~`s|S!QgfpM0eB z##BX>>-@Cvs|{x$H0!8&W+@t2im%|$GxIcu@_aQ_ zreRgY_=CgVzS(pAz|ubFS{`$oC>UFvV%NYe>cGAAj7*2<`e^JEAfC*i(u; zMvOJb^^Q*XmP%LMoTtmx-J9qZ?#ejG)#n_b$RlyVI-AViP%tWhZxmm1shWn-4JSi4 zxLS1#@smKk#<}9dT!6;AZK6l@{p~Txt|-VHBUM>p0{dil!TGQbDDzQ_*zCbvdfnH# z+in+17c1`7I?e*O?deEZl zwhBNKd>*+mVLU?sINipmI~ZIfqMlNvcDmN*lNpzB&QH+#R;Q@C2qeFCtVj_9=)qY~ zxP(M0>06OTso(v*;&OH8*7Y(c*A!|k(#i{m?7{pTSm3iAAC$U$vfPG}A68GgB+Ciz zFbp|P^fA2rch8|8yRu#*g7>L7VbqFObEiJ#9~E5|Lf+wr%Jr2P+yT#{Hs`t?1p>vvTP3iTT( zrZ5mhAGj?L1QcrILj8adXHb4+=HbD&w44mP3FKtt=v10iU~R~5Taeoi5}~An)ogXv zh>PXZLblcH%rOSDY_B*?yysu)qgh-HqeRNz6T6pb3eBAC(QP0?V80*X3cl?=nRDFA zCQx@H(9B9gSjm9%Sa8&9MHD(BZ~hyW&7tu>BvlJ-ScNTlE1dLe)# zH%dzC95=iWx%3<19&uziEP_vm#zUw^lS--HUmALh1~URT_+3AEdl75Tfl4Wnq4unOXkg2H{?lLdeT# zUB96hG9%q2gc@668=On)bemzlPvw_ZXQWj$*|Cvj_q>fgZ|}Ytioc);6$x?JTS?!p z0&15#&Vf&uRvI4xqtb(;l$5VR659T@;*V+VA)o@>kyEVAl0qx-%H;X=?ofyG)@>4- z0A~B&h`L?MA!LEO>A*`c)0-?-FH(+ef5Y^1_Qw|A`qe;hh^Q`-M$y~EV^7}Ya!Be@ z7a5O*#4S6eef3H!*{tkUZDcZ0>cD?#c3Wkw!0FP51FMMc_6PLz6E7;P;1KVYXLJ+s zY_n&<_9Y*ta`KI1)%1?ZOBsx;zl7(1mVUYW5x7{BAQ}p^2xQBoz!k)9gzN^s{YRdw z_+LzG7*n8>4H5^9(>PFGnQm@T&U>yrCjGS_H`ke`PlkyT9S{zx2dm zhhn%zun)Fos62-3h-hpiUe#3n4qe%r|L4VO(05W^T;B1?Y;wy8TFZ;F=ptvE**B7ob<_B*gP^e6 zmPDnV-F;G}*b#)&u=om7h$>=rr}=AnoufLa|G?ofSZ6G!+fF5p$;fG`@U!>ueU z^|v)(0xR#iae3-LE9*ITVz0pcqr@D@;7Gkr={CPSHf^5RiM0I%&=eAaLVVLx$lX6G zdv?|zzV>&T`~E(Jzm02H`O(DJY4LGO02}OIG>!Q`U2tq}S|hFE6&bp^sdO__lWZ~L z-OH86cL~@dr1sSx2TOQO~ta6>C0lz)4^_j)+WkS z|C&zy{j^8gVNaFZ-`vZ0=9>TE~V!MYF!D z_{D(*fj+&{5SRO~*|vGY8nWFnR};Crv@J;6)(U z8k%sR=#b^}1W7lzf4>!2`UfBdy0H25KY1|dlZ~#%#KiDYpuf3stkaZF{eI?U9t4nTHRwKa7qEBKl_yfBvi(VAzXi3Z^~w8twEdjj*C(PCFQobV802KL4TA<5 zW33AnFXmizl*ZDo*Ty2?5uoRrmxz9NWmr0~#q8vA6kBgP!aYs^l=GEV2K}|;Zv=&%Bq&)+`)`=Ha-4SlDQd3uIEIw z<}Pf!xcHC8Cm_q^;#CUi+SZ?d_^v@@ve6RSQvT71fHJ^zaCeV@;Whnx2pphC0U|$n z@4pFGSAf>^&6_-$cY$Xi{~Y8=mq2D()ICp^3)G6-|4flloifJxS!q1dPIvDY^LlP! z&bFI1O+%=z)`rglD$$?T$@dNyYghpSXpKwmGFz1U?$qpI8@zF$DCKIv`KzJ5_+JhW zpu79^Dc$>fKgW1JN@YEYtT?f;yy`{eJQLzKo!mS++r3-Y`E*cil+q?;{C(kM9?;SJGk;X|z`%oh3rk>eh|ilkJp>VJk@2UpKsK45{QC8w0%)RV0i z2X{B=u%l(Cpyq|jrk8Z(1@v47)wMA(EsF&LLRF8u$F`JchdEX5*X}U5yu{Rq#9M}6 z=G3%5)SeRPRuHr`$81 zidg{&V`n%+?r9jkgl+8z6@CPTzEeIrtv8f5`lS^7|F$TR9A_B7MUF)P(OLjBHW48p zk_oSoq+szH!nYbxK%%w;uH4SF_NhfsfcIKhDm=V-)^$b?5=Uvp{ufo&~ z+8%QK@^=Ve$l@z&>mkSeLp?RrJ2tTN-O zy@0R1+|e~tsAMcQ5oe&LeoAfpwYQR<_L7&%I-PXtxyR@7e6mC93fVVNOP(mvx`9UK z3DhskS6p^vEX!BxROP9*hi@y%Db1+}86Oh3I+7|W{zZSzHgSx-@3z}w;wQ~OFNh)* zS0y8u0&(RkWAM;CKDcLFHW+8(4W_y#j`M{5as5~XG|K85LlY%Te)FFoeB<11B2d3s zC&)xXPCy2;*2rnJgCVKC_dsz(rUwWFbmA3(29|o;2o-@8%NOw7Hbl-LmzCimm1PrQ zQWrQ9)j|x)QP}(XjPm6*pN+e=W5ujTUT}WDH9*0oWbOC{ zJD6+;3S!v~tZP@?F9S+g{;g_E-4ZlPQE1I%9L`)cmh_jMSb^=D1^gc({I(jzU8Hna zk2_G}+xyqH*ej%#rng>0w=%XsBpSy>nu?*Hr+%GHD)~{Qg8sATfYgNld-X5cl`E&Z z)g_uAsv3RzzSicP@1j$)gS;T@JU7&?xr%~vS2u)$d^UZ>F&<#WlDsGDKWqNPN%huO zYgD%g^1>bNSw(gzrKD9e6Kmxqm3KWQGQEo;YW}lP1d@IvKrR-)5kcoGxMO04Eri6# z-|9zga9Wn*rV6{J6WW)FU`&*!{1VVGoyjyqg@!nrng+Gl9w7fir0HLNx?J^)Y$=iI zIqrXcJJsj$3;_D@Vgz52tRH0lHC8L0pCtOj#!N6u7=yD>}rGSV3zh{0`@p}9E zLI*f>s%vUUk@`~bGo%*<%*IXaQ@GLdt@qjkjyNl@R5r3jX> zJZ{3s3v8VmYiC7l)9gmUXGPutjcr^-ss_`4mL?DV7pqul0iJszRcdW`_`U8B`{1}^ z_3A;9Rx~126dx`4&tHsY{|N-={og)j7#gbv%YCh{w5!w#m;yz=65|K7v{Q|J9wM_g zSH?N9OP$u)u(ZRz2RX(z1Ikzd;{lL;#LT=`=Eso$_Q*QjG|T@4Q8DIzem z&XU4t;^ljxMJ{gZ$99CNzW}f?VlhhX<`eY)A z5cK|wD47ahOzPc48ZUwOxB@hv-4OjoF+%Rcr{RS;O%SM|yToqpP>rd~SX4$n!zQ91 zWwNZ$EFdY1{d2ee?E}}vyrXXrmUPUoV#gB-v0q zmtizM<0350Xttd&E|y*eq#RGd8g;S!=-W&0PDAh8goF?F!tT0b0K)%@XgSO(&unbi zS9Iv}CA2u8lVG8&O4)X$H!2ts$(J)JuJaJd)ooK?UtBH4s&iu4&Ho}tSgx@*`aX}X zoaZg7@R0ejz9?3ZSH_|bc`*XzrM^`aSxC{ULDS=i@JW7Y{TA%ZRtAgU;(u`Mz%$J? z&u3^WOD>W$*Nw>Mzf&(8mGpRl3&Hvu+Tn3(c%r@Ug|kqxM0md2VI!2Z!V^Z(Ao)#n zwti7~@PdiFUF`V}+)T0~nO}n8jYjNMA^;VSRqHUuAnOU1(tK)?*YfX=5?A&N2s?(n zJCUUdpaED2hs*%;-OM-96{j{i?{f*3{&B(l(>JJCMxRBU;&#Y!M?}zZ&OY7wnmT7XsK9nH|lqE{C(`UY@dEf|D@OQ`4h&oZTf^WCYf)ds)tE+mp6018;&Kj9##z6 z7i_^r@O(^Jp&;8UV_e}CU_5-M>6itU&s3$QTN3UQ`PY3Sp2fU*l}DXadz2ep+x-V- zBv(pWUp*8(7Nm;YqdsE(u@HnQ_Hr|VkwKH=*P~l1yFSk_Y&k73?DlpTW z&u^X-=g_GB?bnwa1T|WvWI6pSlz1FYSnzkQ&~dl zB@G}L7G$n#{<6(7tHlESn{<@r%BuNyfZWIxLF<(f^2_5+_*p=1a5D9~e{M1+r+g&K zuP8Tflz%+<_3yWUXTw9&h%Zae)*-kUg)vtK88mf1vkAY1q0apl8_fE6&2I%<`2Rjjktpd=$Hfq~ z0oJ{La&%WK1rxxSO2aR`Z_Zh)EB`6V_LT+6g`aaCobSewpDuB{HA^P8%SC%5)OKCT zgfI3oCIv?D#j{PGLz5ppfZOF+Emhi~06I2W?l?Qi3ZYHva#6d)zFvUEQiC|Uf8ZON zWIyy3PR#4MOk2UUiRq>7sxGrBCN1r~B5v9i!?kgHae#cF-Ap+qHNDnYINI7ahpG1l zGmW**C*S?lUV#_;9-V{C^q`9Zqq$jL#8-p4pHcA7q(`5KgDQ+K88kXB^cH}rxh@kz{lje= zBa^X-&^zx1EQ?Tb2WKP7?~@A}KD|x&F}>A7c5{S0lC>sR75~c*>tM4=Ior3XC>Byy zhL^b7k=7EhV{9uY%n?UzExah%~X@DiS}R5WE|9_MnA)rg0hzYqaKKW2uUXR7&%7c4CevztTH&;6qm2IMmNV7NR*(6ADGeY=-$=WWd zzaCA}yZ;+~Gk&XMO~BoSEBuY;2kMQ^iF+@O#5fzR61Y_tpuyo(KV+o>qd(A6VsCHz zcv5szfPAitb=vHaGA~KKNP7Gr9dU%rMw-V|#}C6`L33l4^NX7TP?CBpKY=X(i2l76 zhe%u011Ez`MEm~hXE&c|Px`sMr+_Qp#`Ei{l#oj*t>DSu znKi`7JNASu zJ&_cZQ0kFcryh0l*HzlC4lK*mq?RwP2d$G`^j;Zw=`%kYpTrLlDczK8aX6Lmu!r0H ztUI!cAp$M;Qb2$c-Lq9zJjLLJrD$Q#!#U&wz1#3r^-a(=UTr<?^T0)yUFX^R=$S22!t#ULxTU@iG_FL}*B2!Q)yd`^Cd!Z)-X{thLiZz$tRVMHBi? z#qLNh{T;4#pcv*$Bxoqo{sv8L1u>%kkcR}_RD>1R0RKhXPks8Nx;wH$RM+z(^K*s| zb6U;$_MEj$1LenFgK5b<+h9zpavD0ibsqd2)Ng+tZItgm5c!et+_U%H_NXxu0q6Wx zJSx0sYRPt?QeuMlFDRM5*o{jkSx4Xb&$(yE)PvP$NTOnX9SeBq9pE|DN)FJIz92H! z?7$2#8IPFrtn0BdD?XUlG%5A5nKo?GEFH^at&u)G^jts6P5wZ?z4GEs03H09?vz;F zV3rxFmT5ujkarp|o4WUY$4Chygpu`BzfRknD_hpJNcsNZw;EYq^Qrn;B=%g>M9;Fa zQ_+SltXY7djx~ALQ~vE|1wA%oJlQwW_zUY-)KXusP$IN5G5&JA(sGfsd?K%n-36oh zn~F(%#0ElUn2x$Zi~$&dEc0DiH&s?74Z?$Y-mXP;V=P+OW!!i`h>z3G1@jteUw$M-%&~V^AH30dV6r_mf%oU2gU5O$A8(DY zE~}avTYZ-#bcdj9*3xuNKwuoJ_CcvjnE@%_)C=q=7LX6Y{{GBoS~Z=JLva(K3iJfdGzn8g zGn?^Sv%WsDYbGPz0=f6~w@&iY-kXQNQ(F5f=X*bXOwElthJR1iJdPVvl43Dce@dZr zYVIxJJoWX=ck!QM9R@g>gj5jwGvApzdsmPB;U2l88h@xXfqiN)W9k`goPfeuYxJ)0 zHOJDw!dt>}M2j0sv*yh}&tO+`g=)c6Mi|#C13}n0;8qdUE)%FMmI>WSR|3`yM0{rg z{voMJjLt>)`SCh}$YFcJmGcaZddf}%@Az@CC^KSSrcsPK?wS7p16gd^GZ~!s0&-}G z$Yb2QW>2i$Q&?Uw0&;!(iycvoq>R~FXU+a`mfWkeE0>#?-RO)1nvTSXAsTA@;1n!T zkp~rtH&A^DnhA%=^dv-rk8&KJ-tObf7=HIAn>cdgs!c$KjTSjw0q_tYVt6lb5+k|F zD{C<;a>P%O>3Vi=3!NMHh+&jhu-ymJ@zvx>tC^2@8yC>f*!HX9{njTT2^E?ItwCcgKL^%D$K}!wFp2SjfTQid3xhT#oVlrVZS_08f#N@W00RHo%7d zo}WL$U)?yi=OW2%yH+8Q8r7*RGtjloGhv5B{`#6Qx4(Gc6vb`YP8wx2Y3u7GKv#G< zlm@LBIC4Ye`pj&j43N4b+p6XIQGW~BB_^CrjUOc(D(O+uQ#c$1Q4SxAvR+*}D#lI3 zU`17_mTL?P5bO)_>IorSfZqRM&_*z)ebT8`-+A!6`Q8i5FUS_>!#ixs9T?1|`CU$h zZ?%~e6vo1B?KPrhRU1hWb;(+eu04T}8UV4}%1S$Wu+ikB)jHc2F1p+&4jt=+f$gZM zQ3baHTW*M0Gcoz75%XS;(|$0?Y8O^>#6bDrJeqg2!0j2~)zcq$k$1<-)#MepdXQOE z;McbbS}Cjh#kHtQU4`sXs#N>%us`4NNTwHzi@Wifz8(6#h>+xa*IlT4*l17g!256$~gCz+N4Yn?{@DWpdOQZgj`5gm8kW>U>d~Tsx z)Yib-8MR5EGy}^r>e5axe?iNB23x%#^pgGxstQQ9UNpa|JGvEl#^pKq$$>-0Cb1VC z{oLOeA93pxr!)JAICEyWj&kbE+m}ZN6y${j#)~|e{k9C;z3-`^EY9a>O{bYLr>tO~ z)V91V7P2G2?rt3z(8LL9Gh}?n0&XDGvON{W)#s}O2tN(gZ!fZN;~Z4SDak0%m2@0pjQdhp@Ht3wmY}Q9q})^j}j_Q?Un$4so=)<4Z6{+ahq()$B;hG!$p_! z*qFADtG$Wjyur@UCzfg>bfo#sgKG{ESeOTb8QmuYUCF=r<1}%f%kwILTv4|-H&KCp z#?ik=skD)cg=`ymZnP6OAdAx9YJ3Da2yE`!IZ^WhEXevA+!XPYyReXa1y~UUt%AbN z#~-6QF)f0$u{G9GSt^<}-rR6Z z`gA9c6dFYqL*V*x&*`IwMRw&*bo{Nu2e|L*m7bLAQ+L&_W)#worJ^>~51>9=xyb5L zGoe=FE>_fi?~U`|7nqg*@|E<$*H<9Am4@P#aU~Is@R(TS*vOyY?x89A^CtNG6!)ZS zHG7FYX%FA~89=SbK4o8V>Jqe4_jmqvnNNK59|*G^C*Y&Ao%ehVWTJ=WY`LhqkW+AE z1|+{`xr7OvTZ1j$cD-M?F_rIbFjb#F$i~g33a5z@ zIw0q&4O4`eK7!muLH-xu-cS^Dt+Lt}n;pG3wDKzHK(WIGf+tt$iAvsATZzW9q z2a5q-bjYmQb25Fw0u)BxX_YtqNRUqmgSj0k{!I zSy_9pbiWqv`${aIAD=kzw#cwi2IVLbX&Avng4bn7_@_$m`0r#6rz~N4yst-PSqaOJ zvs__5yk2bosO3)WgDFWGlE!JvoKs)7kqwlgmb#H ze{A?PnVz>_)s+V(F#2E9{T+QfD~1OgYa){M)g#5Hazy*V)fbkXUtHAh;k{Z5Ac+3c zU7_OrAV6xc*A|sL&W-oc@v#&d z&<967*`%93i6{=cR<#ee&dxN~@xc_b5bwmicgW$KWlEh$+m1*K5C$UM3h~ZV zI_{xPE})TTL}O&sv^chq-`{h%5M&@@2&c1;eMLMYT{+87F+8GUIT*rGtNRn*VK-uf zVku$!H8m%~CqK#!p4|9&W~y0by=16o!lMdCcL*TP2b7qA6asDvY5ihJs}=!%ELyw> zYb}rjYYXE$qx>_I83i@oQ_+%Gc+0-dOQs(>{cE#Jjv>O%%VeX0l?+|eX87xSC-2iRlvNH64g6z zbhx+&NbcEjv7_c(`O0zudW%U{Crp8b0f>sEg(-~vV|e?$VSZ(Ygm|$ak!3(LBC7CI z?!i+uV7ft4h$I-?P(FPfU>;AEW+a#{!jJl7^|AJ-3V zTS(UKQ*^by^{fc>9H7{(5Ttrtr@T|}shH57r+SteCRRRDk;gA{#V=@+b*bSDeX3gj z+#>#1)r$jm###Fi7flx=YpN)V=|AMKj~t)5l<&AC%Yjz zbX;w5>u>Ws7`?VeZP|aS6WZD4>1i0u%m9qa4|uAd5^nvI6ruuFKfvS~zfxI7`w}l2 zI*6cO(7L9B0YTPMn_+AzcxCt>U`3EzMx=jRW2n7v*M1tgJx7{%-0Uht4m2vh>i}Zr zkXv@_CRxK^B1T8@AndaLb^CrB->pQ_HRs0>R4WXHnVJ;KGu(gYT+nJ^Hh)ulK=U@8 z+y;}F(V&f!7YEESk4SYC=_x&n@3je?=oaQMcKvf>Mx3#7!kU6?F~aex4X`&*iEVEp z$2}aPrhFbF$Da#Sr-Y(*nFyha zsEnf#P8-1_rnN}|(P3)R7bJ~?1;TcOG2h-;mveh=0p%R-LK@A zrxKz&JMxV?sLOj@&Mu=aIG0vAyI*Y!0$8pHG)UM(rjvXFPJodjF}3(Yl*jl1N3r`V z0a~@aC7hAW^O1`5foeRcfElIU38oj!P`W!wu1Y9jmL-KirPZ|2>9G}cW}>E4Hpdj# z!i+(ps!gPjw7{pWEg6U@h1gl!0IHT$!R}PuDv=UVksbK$f`KpXq&{=K7tL;;_VWC_ zHpd=$@rG4G{0|1caO#K3)Q8J4RFO`67+o}j_i^`6l2fUK?PZ7g*+=@VB!r#U@LqPb z4rqTE&`{Y=ywTvPdz-*Ejahyt0>CsFjbiMWB5^z)Dwh`cpotB=D(&HvyuuUwS|h{R zN6;)QBX~>=M0+)7t>)s_k##XSPL=9o^Kx@udx zkhy(J9$RGH-T|4H(O81~P_F~1ZoO!Vu51Yt80gbh<%(!--^4N=i!p9yKNb1%x#hNH zd)nZL8H%X;m^Psmpw~B)#js3qm}JwPg&XH57vj-|parq7_rwBXaO`=~P+2rf63)I3 zCoD^PHnCmxS!14Zr`T69Ol1>-j2T|^o#}rnx6myiaT>UXlax)kVyXSD2Fo`zRSE_xd#lf<8GjdLv8tR|ss4O; zgzF`JPsq$Y5W=Q%m&w0HVR9#Hhm%5qmNGYYp041XFcd~D8n}Iw01L=Kqty)ZP_!TN z^hu5Gn&7mN_vT+LZXj|8P}Bl={Ov2K`RGJ&%*3ei9Zgf?G|9Z|mWA;fRsgN-W{2xMV>`BN~*1g7S=($r|xh1B}V!vTGZK* z$^5V62}zL}C!${~FIokh&NyTs0)JDPxil?lA$v$e)=qF}V7Z3a>c5_0!A6CK!gCEi zV>&C5BmI{#?pOP6E{XLM0pZBGhcPMaK;C2vMbr1a&4yYoE}1RV37GJmw1^6yt1^Wl zP@c9u6<{op>l^S5XWHPiH2QCr&M#!`{1pzthwPaKLs?#L}o()V8+kE5X* zk|g5)8!tu?pc5cDTa_Q3aSmkPSW*1g|2Hage8D-MkpY8UbnwL`%xRXMz!XvZfLfum zPm$u8$@mN!bucAl09NizSH@}dUWeV~RT)PsMcRw76WQM#FjbjRag{mL^`j5d#Lm@%*^b?2;o3HW3E9l#nLT1uf;elcLKn!N(GYXK{S!TUtX&fI zSR^eD8?0JZy-~L+=7Yf!Lv-WFivDmShOD0QPg1nEu|t*iG@MC9BD*o#eyS~bXnwM= z4!tfb49F172q}$H79n>@#YvwRLkF_AwSUFkBiujATDEVKAAda2zl$G{jN^V~7{B(0 z?Lo$^ikdD5?o}gzvzTGJNWlvA(^j)1|F51RTL3a`1Y4aD$)uD1&(go&!Gm!y*VM@k zy5+C|H4%;ym*yYMVuM8YXaY>iucjXc^f1Nb_Wm9A&vt|Z*!)K_PQziBE#bz=^LMJ2 zS}*&Ww{?R)iNS~y6VF#|Tm%Xu+o)z=v;Cu?6Oj5Z#!81?NI?N(%5 zFh-wcdTQ*&lcZEyo|9veJKpZv;0U`aCQznS_QtNX~THiMXUgyVjat0W| zi2P$~l)FStR?|}hXHxuZqr=P|Hx6}{E^hJ=*3tw#0RT`BhX@}1vsceXMAzxK8dtF0 zx&Pw)IY@S5!j-rF+#)UI=G4^E82^!+ZZYCce6#=+{Y(5mT>~gfqmn;Seq@Kt5b-Zg z(>A~s6JA#$HE%D1aWI>~wcQYwBljC8qTuoB2%p@_KX+m*NHeUJ84>dTp;_5x}aV zx|A6ufF*#OFeF{O(Si-l>d2f*6kZ88X9!rGb%lEV#0pPmbsJ`l_gqx-88lY)REOQ8 zD~QqZuC(-~LZAtyZ{Ymm#!1)r*wxWh1os#-y4>BL9(RsvZ(hdl(1pjTO{_Dl{#<_j z>MfE6ck7VL*p&Pw$a*qriA^Sje%&n;IVXWm;X|#W+8+H~^^-z+7$l1mR!tP(D_eL4 zpHT(odLV8A!+~%8nAt`U1$tCDj-Sl1=Tl*wIm(I^vQn~7En83p7*$;b_dGtuTm1Z% zn3a>*>+zxKi=M({i^9_adQG@wV8Ed0^m{g`7u834Cz-qkbM&-OXJnQUJ+El;S0Vtz zZy~_%?V$8Sk)xW~8y#d`D4~po$;pl%PHRN&--`T&XMlc6@;BV#_3yt$lV?9Vpj8-BN%3CvoxZsPF^1o9Lw!Je8)?5N#(O9{Cpk3 zr)ZL~WlxiIcUECaN`d(1%uTCXd2mckz_;tV6(LTCIB_hgr=S|i zdPTQb?x5pH$Lu%qIQR?E&BqF9kFA)|ji`xM6ZnJKLYrj{cK*d88ib8hRt^_r|Fe*p zVT91;&VWX|N${V_*)MdJv|BFt?GLLhUnu3CwEd_5g_REv$MHc+5)R%_MmlNeP9OM=C_G_bFVf}<6==h}YiuL;U%_#lagf&iVVxAVkE?HPSAlOA_^jC8odw$daBp1TL$E@EO7 zU^hvG-fH`|ytw$Ti_7h*>T0AvYt>Z~jqK2F!7b^b?0rfUcv*N$TEm_pk0YkItV|Jn zsBEgTC1^l;%@wSG!9!%s?eFR7>0TX!zI9O6f|{m(mdug>D!hCEFNdVNvS$zNM}*qV z;G^Wy$Nm3W;v@K+DXPu;Z8t#W{p~`sZ)f*R%Taju3<*8Ux9*vR|MfDjRNy}qU0ACB zblCoHOV^b1Kc=nbW*$^If&x6!wE$Fv!Eq`Q)Sj7mcm{uof&+*l=*NT6%hJ|X?AFK6 z1fXWjUJqQ&HZTxe1luT3_D8yMJ2*J-HxB+!eX^_lvHkh~ zMsIN-*XjS{iRH+9x#_RLUwv9QV*WFoprXP7%W5!yMYA8y7MW4Ge;)^F0s-QfEFd4~ zH}8p6Qc|Ms=6$rTrT#`32P}@^AtEj0C4U{DfXhfb_)JEDMF_k%Qdlwg`tQ#Nlju7! z3Ep7+(6Cuj3jVtofNg;mYKH00R|y;cQ!!hCzDU2c6X|pfj=M;wYj8y~Z17+L8;{T9 zD=r(g!oFa(0X|&KheJ~Aa|>4js4r!PkhZkD^NqlT+dnu!IxGCAc=_|uP|I6+*7`qS z6cL@BoxKcWN$lC;p7PDGnjN6iU-|z1`|3yLd`sVrH<1 z7#np32{Oijz`)o9nUi$?KfmrDfEpRes#jx!+d>n;(`Il8CIcI*PH^yw1qIblH7!!o zUEt~}z{{)A4A$NqGJz-26uhQHQx)cX*;0NQO@p8i%U&|!bO3tT#0j(#%%J6HpnmTj zk+ZXN!JMgQC)qL>r*wdTq{1cm=ym4jvwDI-`sgueXIuIEHz0M&V!IO)8g7&=!!9d2|U*V2Yp?dH_mjz0;^2v3m8 zW}osKx1MeEF5*b*?*uCs1?eY$^3TAMBL+7@UK(iKpYIHTvx_IF(k+9Gb;nqdX6$Fb zJ#v) zF-`(X{%^kw!E2sEM@MJwFVlU9R-NFD6%7_l89uvU9kwW^a`>eF8$8e5s=<`2(PF()(~TNtF~?ujPPS?| z;?H%#1oVEC?q!vn%?m7&)g!Z?cc|YTfTEFsctfD9sA$V+KVil1C(V1c!w2UM!hgUu z1i~-u87@!-R^5HIClmbd@-v;II#`bUfoE1i-#;*r44$|*J>#>)zGP_F6@m=veKj?m zOf*fuM=C_ShEt~Bu zq_(*oGzK$)ZzC8`G?~Cyu(+Z^#mlSePdd0E!-$qA`0m{D_5BTI-RKB#4?{i ztm+_~ft<08M+7hBY^KvoRAuW%8&_Vk0xf^?@#C^T~CspVE^JQ@{2hyL(d9n-yhEYJY zzkKshzhd~u53}&d$dbugV;)pAwCgc%fmdeF&TXY(6C+K$K<*mW46N^>VH@ZXj>-IX zNXl?*rncXB=jXo86C z=_^IjwdC>5M$i(J>k%Q~(OOeeV*$F&J^lUOUVeVV;E<@LtsPqN7AvC9)w}!oO8GqilS$;c&XRnLOC8@ev!hNK1$IS9=W4LNML{6_ zj=qh5%hOjUHJ_FHh_s_Q)+kR}_rbQGH!EDpnnLi|G90?M% z1n~bi7g4Fo>Vp^9F#l2M{!><0U*8I>y-SYa*WgC~AXr;hms?t@)@vi03FgO+6OkfS zjtQ6hnS*u{0?JP8D^Ko&3Nh$xexIH$4g$Y>5VqzB&fcX$VH6NJC^R|ClJX+}YXd|G znBsnO5H1O*nQUOAr-3{bu>_*qDyQ$|BeQK931_!C-5TG(;dGCCxW}w=3c8?frdzzHBxDHVIB_ zXDFy{HYzGF{yc3553Bvuz}MW(U&T|ZE99AKM@O{}PEJxGxM;)IWr69KW=cHF2{Lt^ z-Zvu7HgJhq)fyKR6{(yz1)W!ER|gzBflg*(U!ST3Tzlz*b^@7$eHT5p7oeD&)YYYY z4v(9SmDOgT|A>b?HcKW@A3VQFip&b{TyR<3oa*sk?!fu(R9giObB@l=NpR;M93CdY zbZo?J%v@Yx3oezH7gkhM=*E;F*gPm3m1{H_r@rL8S5@v^C`rILe0^{xWf7e1yJ(m$ zkN&CSot(D!p#hQY$-l1a9;h{Aq%NPv^uZLQ zuh;JgyIKpnjd33qV0NrWqnEh%QqkqW^MgTP*!3 zY;U<(nF`StEPfWYwrOlXX5FgaQymSv;sghq;uJ5SeS|O45Zf0`O<$vP%xP(H9ZB#s z-@4V1!Z$`LMn5kpStr+?2g07;&glYToMcC}Sd1j5C=iK}30cGO=md-oOF4P9we|_x zFI82?3x$bAhW2L4E{VjJ+nHtpM;0W0>nXGe>nKQlkN@^0#NtB0wM@X&lUdfJBds0e zQ!4NJGC$U{HRILzB4o&eqh}A+L$vq4$&43Csw&1xCI+hq|Vscg(R5IA|PT zZoq^S3i?CZ?1MZ6hv>*djV2f8VT*q3gej!3m`ICzd;Pdd!D~H#z7RjTg`xGkhw2!2 znvv1q__)uNbfNICqk94Oc16+JjJ#uIEIE;w1WZ11sNox6D|d!g@H9KFG6Oguf+XqVQHBS z$1yd5X=Yxp@B2(Y?sX-M9K{OM7~F{Pa2q0ugbxnId}DNU^zTn)J;XWmy~M{#tyc4o z9aF(!!^2f7EzLn0x!TxR-_Sstot?!QMC_AggyoM=BZ#y3PL+EsEDyfC=a1F>_wP?X zTQwUj>vKQrKef1&;R8nS-#swe)U`L8`OGNiMoi2gMjR!XF_fIJROn4wMTgdMSQwkhkE-$wN z+Z?Od=f705aAL#1{zUWJc&}dLu&^cO;~oM+RXn*SPnn8g5$S`Ol0pApZ$Cbx5l=W; SuGa-p*n-|;`?u`lW&aJoZwvqc literal 0 HcmV?d00001