From 16b7f3df298744ca8f0ee880d3b30b3d506c0a5a Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 4 Jun 2026 16:21:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: rbelanec/train_record_42_1779354540 Source: Original Platform --- .gitattributes | 36 + README.md | 81 + all_results.json | 13 + config.json | 39 + eval_results.json | 8 + generation_config.json | 12 + model.safetensors | 3 + special_tokens_map.json | 26 + tokenizer.json | 3 + tokenizer_config.json | 2069 ++++ train.yaml | 65 + train_results.json | 9 + trainer_log.jsonl | 3144 +++++ trainer_state.json | 25207 ++++++++++++++++++++++++++++++++++++++ training_args.bin | 3 + training_eval_loss.png | Bin 0 -> 39826 bytes training_loss.png | Bin 0 -> 41519 bytes 17 files changed, 30718 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.yaml create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 training_eval_loss.png create mode 100644 training_loss.png diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..09bfa69 --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +library_name: transformers +license: llama3.2 +base_model: meta-llama/Llama-3.2-1B-Instruct +tags: +- peft-factory +- freeze +- llama-factory +- generated_from_trainer +model-index: +- name: train_record_42_1779354540 + results: [] +--- + + + +# train_record_42_1779354540 + +This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the record dataset. +It achieves the following results on the evaluation set: +- Loss: 0.3537 +- Num Input Tokens Seen: 49166912 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-06 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen | +|:-------------:|:------:|:-----:|:---------------:|:-----------------:| +| 0.764 | 0.0501 | 782 | 0.6362 | 2474432 | +| 0.6569 | 0.1001 | 1564 | 0.5395 | 4931328 | +| 0.5165 | 0.1502 | 2346 | 0.5076 | 7397056 | +| 0.5774 | 0.2002 | 3128 | 0.4884 | 9832064 | +| 0.4226 | 0.2503 | 3910 | 0.4644 | 12304064 | +| 0.3784 | 0.3004 | 4692 | 0.4629 | 14775488 | +| 0.579 | 0.3504 | 5474 | 0.4391 | 17259840 | +| 0.3547 | 0.4005 | 6256 | 0.4233 | 19707456 | +| 0.3794 | 0.4505 | 7038 | 0.4316 | 22178432 | +| 0.3939 | 0.5006 | 7820 | 0.4134 | 24646208 | +| 0.3382 | 0.5507 | 8602 | 0.3985 | 27101056 | +| 0.4 | 0.6007 | 9384 | 0.3917 | 29544576 | +| 0.3188 | 0.6508 | 10166 | 0.3784 | 32010176 | +| 0.4233 | 0.7009 | 10948 | 0.3722 | 34475136 | +| 0.2995 | 0.7509 | 11730 | 0.3648 | 36931648 | +| 0.3367 | 0.8010 | 12512 | 0.3636 | 39382144 | +| 0.272 | 0.8510 | 13294 | 0.3571 | 41847872 | +| 0.3451 | 0.9011 | 14076 | 0.3544 | 44318848 | +| 0.2502 | 0.9512 | 14858 | 0.3537 | 46767552 | + + +### Framework versions + +- Transformers 4.51.3 +- Pytorch 2.10.0+cu128 +- Datasets 4.0.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..7694afe --- /dev/null +++ b/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 1.0, + "eval_loss": 0.3537425398826599, + "eval_runtime": 49.0759, + "eval_samples_per_second": 282.95, + "eval_steps_per_second": 35.374, + "num_input_tokens_seen": 49166912, + "total_flos": 2.8707953551107686e+17, + "train_loss": 0.44386771268258823, + "train_runtime": 3548.0201, + "train_samples_per_second": 35.222, + "train_steps_per_second": 4.403 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..08bd85b --- /dev/null +++ b/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..735d68d --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "eval_loss": 0.3537425398826599, + "eval_runtime": 49.0759, + "eval_samples_per_second": 282.95, + "eval_steps_per_second": 35.374, + "num_input_tokens_seen": 49166912 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2b8ae57 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.3" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..08aaf35 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1afd656b448aedcea4db951fa0a8b8e5db03b4c8e36507be2a6b65e76a70ab53 +size 4417933576 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14daf45 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ddc3ce0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2069 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.yaml b/train.yaml new file mode 100644 index 0000000..637b48d --- /dev/null +++ b/train.yaml @@ -0,0 +1,65 @@ +seed: 42 + +### model +model_name_or_path: meta-llama/Llama-3.2-1B-Instruct +trust_remote_code: true +flash_attn: auto +use_cache: false + +### method +# Full fine-tune of every decoder block, but with the (tied) embeddings frozen. +# `finetuning_type: freeze` only trains modules whose name matches a trainable layer; +# embed_tokens / lm_head / final model.norm are "extra" modules and stay frozen unless +# listed in freeze_extra_modules. Setting freeze_trainable_layers = num_hidden_layers (16 +# for Llama-3.2-1B) makes ALL decoder blocks trainable, so this == "full FT minus +# embeddings". Because tie_word_embeddings=true, freezing embed_tokens also freezes lm_head. +# This is lever B of the embedding-amplification fix (see figures/amplification/README.md). +stage: sft +do_train: true +finetuning_type: freeze +freeze_trainable_layers: 16 +freeze_trainable_modules: all +# freeze_extra_modules: left unset -> embed_tokens, lm_head (tied), final norm stay frozen + +### dataset +dataset: record +template: llama3 +cutoff_len: 2048 +overwrite_cache: true +preprocessing_num_workers: 4 +dataloader_num_workers: 4 +packing: false + +### output +output_dir: saves_bts_preliminary/freeze/llama-3.2-1b-instruct/train_record_42_1779354540 +logging_steps: 5 +save_steps: 0.05 +overwrite_output_dir: true +save_only_model: false +plot_loss: true +include_num_input_tokens_seen: true +push_to_hub: true +push_to_hub_organization: rbelanec +load_best_model_at_end: true +save_total_limit: 1 + +### train +per_device_train_batch_size: 8 +learning_rate: 2.0e-6 +num_train_epochs: 1 +weight_decay: 1.0e-2 +lr_scheduler_type: cosine +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +warmup_ratio: 0.1 +optim: adamw_torch +report_to: +- wandb +run_name: freeze_llama-3.2-1b-instruct_train_record_42_1779354540 + +### eval +per_device_eval_batch_size: 8 +eval_strategy: steps +eval_steps: 0.05 +val_size: 0.1 diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..2a268c5 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "num_input_tokens_seen": 49166912, + "total_flos": 2.8707953551107686e+17, + "train_loss": 0.44386771268258823, + "train_runtime": 3548.0201, + "train_samples_per_second": 35.222, + "train_steps_per_second": 4.403 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..75719df --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,3144 @@ +{"current_steps": 5, "total_steps": 15621, "loss": 2.1603, "lr": 5.118362124120281e-09, "epoch": 0.0003200819409768901, "percentage": 0.03, "elapsed_time": "0:00:00", "remaining_time": "0:48:22", "throughput": 16527.87, "total_tokens": 15360} +{"current_steps": 10, "total_steps": 15621, "loss": 2.344, "lr": 1.1516314779270634e-08, "epoch": 0.0006401638819537802, "percentage": 0.06, "elapsed_time": "0:00:01", "remaining_time": "0:40:30", "throughput": 19981.57, "total_tokens": 31104} +{"current_steps": 15, "total_steps": 15621, "loss": 2.115, "lr": 1.7914267434420987e-08, "epoch": 0.0009602458229306702, "percentage": 0.1, "elapsed_time": "0:00:02", "remaining_time": "0:37:17", "throughput": 21482.87, "total_tokens": 46208} +{"current_steps": 20, "total_steps": 15621, "loss": 2.741, "lr": 2.431222008957134e-08, "epoch": 0.0012803277639075604, "percentage": 0.13, "elapsed_time": "0:00:02", "remaining_time": "0:36:09", "throughput": 22461.03, "total_tokens": 62464} +{"current_steps": 25, "total_steps": 15621, "loss": 2.0952, "lr": 3.071017274472169e-08, "epoch": 0.0016004097048844504, "percentage": 0.16, "elapsed_time": "0:00:03", "remaining_time": "0:35:34", "throughput": 23114.25, "total_tokens": 79104} +{"current_steps": 30, "total_steps": 15621, "loss": 2.1934, "lr": 3.710812539987204e-08, "epoch": 0.0019204916458613404, "percentage": 0.19, "elapsed_time": "0:00:04", "remaining_time": "0:34:58", "throughput": 23510.42, "total_tokens": 94912} +{"current_steps": 35, "total_steps": 15621, "loss": 2.3371, "lr": 4.350607805502239e-08, "epoch": 0.0022405735868382304, "percentage": 0.22, "elapsed_time": "0:00:04", "remaining_time": "0:34:33", "throughput": 23790.4, "total_tokens": 110784} +{"current_steps": 40, "total_steps": 15621, "loss": 2.1424, "lr": 4.990403071017274e-08, "epoch": 0.002560655527815121, "percentage": 0.26, "elapsed_time": "0:00:05", "remaining_time": "0:34:02", "throughput": 23968.62, "total_tokens": 125696} +{"current_steps": 45, "total_steps": 15621, "loss": 2.0945, "lr": 5.6301983365323095e-08, "epoch": 0.002880737468792011, "percentage": 0.29, "elapsed_time": "0:00:05", "remaining_time": "0:33:38", "throughput": 24122.75, "total_tokens": 140672} +{"current_steps": 50, "total_steps": 15621, "loss": 2.0027, "lr": 6.269993602047345e-08, "epoch": 0.003200819409768901, "percentage": 0.32, "elapsed_time": "0:00:06", "remaining_time": "0:33:19", "throughput": 24218.12, "total_tokens": 155456} +{"current_steps": 55, "total_steps": 15621, "loss": 1.915, "lr": 6.90978886756238e-08, "epoch": 0.003520901350745791, "percentage": 0.35, "elapsed_time": "0:00:07", "remaining_time": "0:33:09", "throughput": 24304.82, "total_tokens": 170816} +{"current_steps": 60, "total_steps": 15621, "loss": 2.0244, "lr": 7.549584133077414e-08, "epoch": 0.003840983291722681, "percentage": 0.38, "elapsed_time": "0:00:07", "remaining_time": "0:32:50", "throughput": 24355.24, "total_tokens": 185088} +{"current_steps": 65, "total_steps": 15621, "loss": 1.6385, "lr": 8.18937939859245e-08, "epoch": 0.004161065232699571, "percentage": 0.42, "elapsed_time": "0:00:08", "remaining_time": "0:32:42", "throughput": 24432.92, "total_tokens": 200384} +{"current_steps": 70, "total_steps": 15621, "loss": 1.6591, "lr": 8.829174664107485e-08, "epoch": 0.004481147173676461, "percentage": 0.45, "elapsed_time": "0:00:08", "remaining_time": "0:32:36", "throughput": 24496.91, "total_tokens": 215744} +{"current_steps": 75, "total_steps": 15621, "loss": 1.6555, "lr": 9.468969929622521e-08, "epoch": 0.004801229114653352, "percentage": 0.48, "elapsed_time": "0:00:09", "remaining_time": "0:32:27", "throughput": 24521.03, "total_tokens": 230400} +{"current_steps": 80, "total_steps": 15621, "loss": 1.3232, "lr": 1.0108765195137556e-07, "epoch": 0.005121311055630242, "percentage": 0.51, "elapsed_time": "0:00:10", "remaining_time": "0:32:26", "throughput": 24611.68, "total_tokens": 246592} +{"current_steps": 85, "total_steps": 15621, "loss": 1.1532, "lr": 1.074856046065259e-07, "epoch": 0.005441392996607132, "percentage": 0.54, "elapsed_time": "0:00:10", "remaining_time": "0:32:23", "throughput": 24668.25, "total_tokens": 262272} +{"current_steps": 90, "total_steps": 15621, "loss": 1.0452, "lr": 1.1388355726167625e-07, "epoch": 0.005761474937584022, "percentage": 0.58, "elapsed_time": "0:00:11", "remaining_time": "0:32:19", "throughput": 24709.55, "total_tokens": 277760} +{"current_steps": 95, "total_steps": 15621, "loss": 1.2493, "lr": 1.202815099168266e-07, "epoch": 0.006081556878560912, "percentage": 0.61, "elapsed_time": "0:00:11", "remaining_time": "0:32:14", "throughput": 24747.28, "total_tokens": 292992} +{"current_steps": 100, "total_steps": 15621, "loss": 1.1191, "lr": 1.2667946257197694e-07, "epoch": 0.006401638819537802, "percentage": 0.64, "elapsed_time": "0:00:12", "remaining_time": "0:32:07", "throughput": 24788.85, "total_tokens": 307840} +{"current_steps": 105, "total_steps": 15621, "loss": 1.0359, "lr": 1.3307741522712732e-07, "epoch": 0.006721720760514692, "percentage": 0.67, "elapsed_time": "0:00:13", "remaining_time": "0:32:02", "throughput": 24826.2, "total_tokens": 323008} +{"current_steps": 110, "total_steps": 15621, "loss": 1.0546, "lr": 1.3947536788227767e-07, "epoch": 0.007041802701491582, "percentage": 0.7, "elapsed_time": "0:00:13", "remaining_time": "0:32:03", "throughput": 24882.46, "total_tokens": 339456} +{"current_steps": 115, "total_steps": 15621, "loss": 1.1286, "lr": 1.45873320537428e-07, "epoch": 0.007361884642468472, "percentage": 0.74, "elapsed_time": "0:00:14", "remaining_time": "0:32:00", "throughput": 24906.28, "total_tokens": 354816} +{"current_steps": 120, "total_steps": 15621, "loss": 0.8243, "lr": 1.5227127319257838e-07, "epoch": 0.007681966583445362, "percentage": 0.77, "elapsed_time": "0:00:14", "remaining_time": "0:31:54", "throughput": 24922.91, "total_tokens": 369472} +{"current_steps": 125, "total_steps": 15621, "loss": 0.9582, "lr": 1.586692258477287e-07, "epoch": 0.008002048524422252, "percentage": 0.8, "elapsed_time": "0:00:15", "remaining_time": "0:31:52", "throughput": 24944.57, "total_tokens": 384768} +{"current_steps": 130, "total_steps": 15621, "loss": 1.0307, "lr": 1.6506717850287908e-07, "epoch": 0.008322130465399142, "percentage": 0.83, "elapsed_time": "0:00:16", "remaining_time": "0:31:49", "throughput": 24970.17, "total_tokens": 400192} +{"current_steps": 135, "total_steps": 15621, "loss": 0.8953, "lr": 1.7146513115802943e-07, "epoch": 0.008642212406376032, "percentage": 0.86, "elapsed_time": "0:00:16", "remaining_time": "0:31:50", "throughput": 25010.75, "total_tokens": 416640} +{"current_steps": 140, "total_steps": 15621, "loss": 0.8263, "lr": 1.7786308381317976e-07, "epoch": 0.008962294347352922, "percentage": 0.9, "elapsed_time": "0:00:17", "remaining_time": "0:31:50", "throughput": 25041.03, "total_tokens": 432640} +{"current_steps": 145, "total_steps": 15621, "loss": 0.8971, "lr": 1.8426103646833014e-07, "epoch": 0.009282376288329812, "percentage": 0.93, "elapsed_time": "0:00:17", "remaining_time": "0:31:50", "throughput": 25057.87, "total_tokens": 448640} +{"current_steps": 150, "total_steps": 15621, "loss": 0.9544, "lr": 1.9065898912348046e-07, "epoch": 0.009602458229306703, "percentage": 0.96, "elapsed_time": "0:00:18", "remaining_time": "0:31:50", "throughput": 25076.32, "total_tokens": 464448} +{"current_steps": 155, "total_steps": 15621, "loss": 0.8598, "lr": 1.9705694177863084e-07, "epoch": 0.009922540170283593, "percentage": 0.99, "elapsed_time": "0:00:19", "remaining_time": "0:31:47", "throughput": 25083.95, "total_tokens": 479488} +{"current_steps": 160, "total_steps": 15621, "loss": 0.7343, "lr": 2.034548944337812e-07, "epoch": 0.010242622111260483, "percentage": 1.02, "elapsed_time": "0:00:19", "remaining_time": "0:31:46", "throughput": 25105.08, "total_tokens": 495296} +{"current_steps": 165, "total_steps": 15621, "loss": 0.7845, "lr": 2.0985284708893152e-07, "epoch": 0.010562704052237373, "percentage": 1.06, "elapsed_time": "0:00:20", "remaining_time": "0:31:42", "throughput": 25118.24, "total_tokens": 510144} +{"current_steps": 170, "total_steps": 15621, "loss": 0.8491, "lr": 2.162507997440819e-07, "epoch": 0.010882785993214263, "percentage": 1.09, "elapsed_time": "0:00:20", "remaining_time": "0:31:39", "throughput": 25118.75, "total_tokens": 524928} +{"current_steps": 175, "total_steps": 15621, "loss": 0.7122, "lr": 2.2264875239923222e-07, "epoch": 0.011202867934191153, "percentage": 1.12, "elapsed_time": "0:00:21", "remaining_time": "0:31:41", "throughput": 25139.89, "total_tokens": 541504} +{"current_steps": 180, "total_steps": 15621, "loss": 0.7354, "lr": 2.290467050543826e-07, "epoch": 0.011522949875168043, "percentage": 1.15, "elapsed_time": "0:00:22", "remaining_time": "0:31:37", "throughput": 25143.86, "total_tokens": 556096} +{"current_steps": 185, "total_steps": 15621, "loss": 0.734, "lr": 2.3544465770953295e-07, "epoch": 0.011843031816144933, "percentage": 1.18, "elapsed_time": "0:00:22", "remaining_time": "0:31:38", "throughput": 25167.09, "total_tokens": 572736} +{"current_steps": 190, "total_steps": 15621, "loss": 0.8565, "lr": 2.418426103646833e-07, "epoch": 0.012163113757121823, "percentage": 1.22, "elapsed_time": "0:00:23", "remaining_time": "0:31:37", "throughput": 25176.28, "total_tokens": 588352} +{"current_steps": 195, "total_steps": 15621, "loss": 0.9816, "lr": 2.4824056301983363e-07, "epoch": 0.012483195698098713, "percentage": 1.25, "elapsed_time": "0:00:23", "remaining_time": "0:31:36", "throughput": 25180.76, "total_tokens": 603520} +{"current_steps": 200, "total_steps": 15621, "loss": 0.8158, "lr": 2.54638515674984e-07, "epoch": 0.012803277639075603, "percentage": 1.28, "elapsed_time": "0:00:24", "remaining_time": "0:31:35", "throughput": 25190.16, "total_tokens": 619392} +{"current_steps": 205, "total_steps": 15621, "loss": 0.8032, "lr": 2.6103646833013433e-07, "epoch": 0.013123359580052493, "percentage": 1.31, "elapsed_time": "0:00:25", "remaining_time": "0:31:35", "throughput": 25209.08, "total_tokens": 635456} +{"current_steps": 210, "total_steps": 15621, "loss": 0.8716, "lr": 2.6743442098528466e-07, "epoch": 0.013443441521029383, "percentage": 1.34, "elapsed_time": "0:00:25", "remaining_time": "0:31:34", "throughput": 25215.54, "total_tokens": 650880} +{"current_steps": 215, "total_steps": 15621, "loss": 0.8278, "lr": 2.7383237364043504e-07, "epoch": 0.013763523462006273, "percentage": 1.38, "elapsed_time": "0:00:26", "remaining_time": "0:31:34", "throughput": 25220.34, "total_tokens": 666688} +{"current_steps": 220, "total_steps": 15621, "loss": 0.7898, "lr": 2.802303262955854e-07, "epoch": 0.014083605402983163, "percentage": 1.41, "elapsed_time": "0:00:27", "remaining_time": "0:31:32", "throughput": 25228.53, "total_tokens": 682112} +{"current_steps": 225, "total_steps": 15621, "loss": 0.8381, "lr": 2.866282789507358e-07, "epoch": 0.014403687343960053, "percentage": 1.44, "elapsed_time": "0:00:27", "remaining_time": "0:31:31", "throughput": 25239.98, "total_tokens": 697728} +{"current_steps": 230, "total_steps": 15621, "loss": 0.6829, "lr": 2.9302623160588607e-07, "epoch": 0.014723769284936943, "percentage": 1.47, "elapsed_time": "0:00:28", "remaining_time": "0:31:28", "throughput": 25250.23, "total_tokens": 712704} +{"current_steps": 235, "total_steps": 15621, "loss": 0.9619, "lr": 2.9942418426103644e-07, "epoch": 0.015043851225913833, "percentage": 1.5, "elapsed_time": "0:00:28", "remaining_time": "0:31:30", "throughput": 25263.62, "total_tokens": 729408} +{"current_steps": 240, "total_steps": 15621, "loss": 0.7854, "lr": 3.058221369161868e-07, "epoch": 0.015363933166890723, "percentage": 1.54, "elapsed_time": "0:00:29", "remaining_time": "0:31:29", "throughput": 25273.94, "total_tokens": 745344} +{"current_steps": 245, "total_steps": 15621, "loss": 0.6965, "lr": 3.1222008957133715e-07, "epoch": 0.015684015107867613, "percentage": 1.57, "elapsed_time": "0:00:30", "remaining_time": "0:31:31", "throughput": 25301.1, "total_tokens": 762688} +{"current_steps": 250, "total_steps": 15621, "loss": 0.7105, "lr": 3.186180422264875e-07, "epoch": 0.016004097048844503, "percentage": 1.6, "elapsed_time": "0:00:30", "remaining_time": "0:31:33", "throughput": 25314.36, "total_tokens": 779392} +{"current_steps": 255, "total_steps": 15621, "loss": 0.7964, "lr": 3.2501599488163785e-07, "epoch": 0.016324178989821393, "percentage": 1.63, "elapsed_time": "0:00:31", "remaining_time": "0:31:30", "throughput": 25315.93, "total_tokens": 794112} +{"current_steps": 260, "total_steps": 15621, "loss": 0.8427, "lr": 3.314139475367882e-07, "epoch": 0.016644260930798283, "percentage": 1.66, "elapsed_time": "0:00:31", "remaining_time": "0:31:29", "throughput": 25328.06, "total_tokens": 810112} +{"current_steps": 265, "total_steps": 15621, "loss": 0.8614, "lr": 3.3781190019193855e-07, "epoch": 0.016964342871775173, "percentage": 1.7, "elapsed_time": "0:00:32", "remaining_time": "0:31:28", "throughput": 25332.87, "total_tokens": 825472} +{"current_steps": 270, "total_steps": 15621, "loss": 0.9819, "lr": 3.4420985284708893e-07, "epoch": 0.017284424812752063, "percentage": 1.73, "elapsed_time": "0:00:33", "remaining_time": "0:31:25", "throughput": 25331.03, "total_tokens": 840128} +{"current_steps": 275, "total_steps": 15621, "loss": 0.7825, "lr": 3.5060780550223926e-07, "epoch": 0.017604506753728953, "percentage": 1.76, "elapsed_time": "0:00:33", "remaining_time": "0:31:23", "throughput": 25332.49, "total_tokens": 855104} +{"current_steps": 280, "total_steps": 15621, "loss": 0.8069, "lr": 3.570057581573896e-07, "epoch": 0.017924588694705843, "percentage": 1.79, "elapsed_time": "0:00:34", "remaining_time": "0:31:23", "throughput": 25332.9, "total_tokens": 870848} +{"current_steps": 285, "total_steps": 15621, "loss": 0.7403, "lr": 3.6340371081253996e-07, "epoch": 0.018244670635682733, "percentage": 1.82, "elapsed_time": "0:00:34", "remaining_time": "0:31:21", "throughput": 25335.84, "total_tokens": 885760} +{"current_steps": 290, "total_steps": 15621, "loss": 0.7078, "lr": 3.6980166346769034e-07, "epoch": 0.018564752576659623, "percentage": 1.86, "elapsed_time": "0:00:35", "remaining_time": "0:31:19", "throughput": 25337.39, "total_tokens": 900928} +{"current_steps": 295, "total_steps": 15621, "loss": 0.793, "lr": 3.7619961612284067e-07, "epoch": 0.018884834517636517, "percentage": 1.89, "elapsed_time": "0:00:36", "remaining_time": "0:31:17", "throughput": 25344.99, "total_tokens": 915968} +{"current_steps": 300, "total_steps": 15621, "loss": 0.9919, "lr": 3.8259756877799104e-07, "epoch": 0.019204916458613407, "percentage": 1.92, "elapsed_time": "0:00:36", "remaining_time": "0:31:18", "throughput": 25366.47, "total_tokens": 933056} +{"current_steps": 305, "total_steps": 15621, "loss": 0.7373, "lr": 3.889955214331414e-07, "epoch": 0.019524998399590297, "percentage": 1.95, "elapsed_time": "0:00:37", "remaining_time": "0:31:17", "throughput": 25367.65, "total_tokens": 948416} +{"current_steps": 310, "total_steps": 15621, "loss": 0.7694, "lr": 3.953934740882917e-07, "epoch": 0.019845080340567187, "percentage": 1.98, "elapsed_time": "0:00:37", "remaining_time": "0:31:14", "throughput": 25365.33, "total_tokens": 962880} +{"current_steps": 315, "total_steps": 15621, "loss": 0.8088, "lr": 4.0179142674344207e-07, "epoch": 0.020165162281544077, "percentage": 2.02, "elapsed_time": "0:00:38", "remaining_time": "0:31:16", "throughput": 25379.41, "total_tokens": 979904} +{"current_steps": 320, "total_steps": 15621, "loss": 0.8251, "lr": 4.0818937939859245e-07, "epoch": 0.020485244222520967, "percentage": 2.05, "elapsed_time": "0:00:39", "remaining_time": "0:31:14", "throughput": 25378.55, "total_tokens": 995136} +{"current_steps": 325, "total_steps": 15621, "loss": 0.7695, "lr": 4.145873320537428e-07, "epoch": 0.020805326163497857, "percentage": 2.08, "elapsed_time": "0:00:39", "remaining_time": "0:31:14", "throughput": 25384.44, "total_tokens": 1011008} +{"current_steps": 330, "total_steps": 15621, "loss": 0.8335, "lr": 4.2098528470889315e-07, "epoch": 0.021125408104474747, "percentage": 2.11, "elapsed_time": "0:00:40", "remaining_time": "0:31:12", "throughput": 25378.34, "total_tokens": 1025792} +{"current_steps": 335, "total_steps": 15621, "loss": 0.6901, "lr": 4.273832373640435e-07, "epoch": 0.021445490045451637, "percentage": 2.14, "elapsed_time": "0:00:41", "remaining_time": "0:31:14", "throughput": 25391.22, "total_tokens": 1042944} +{"current_steps": 340, "total_steps": 15621, "loss": 0.8267, "lr": 4.3378119001919386e-07, "epoch": 0.021765571986428527, "percentage": 2.18, "elapsed_time": "0:00:41", "remaining_time": "0:31:13", "throughput": 25394.56, "total_tokens": 1058688} +{"current_steps": 345, "total_steps": 15621, "loss": 0.7233, "lr": 4.401791426743442e-07, "epoch": 0.022085653927405417, "percentage": 2.21, "elapsed_time": "0:00:42", "remaining_time": "0:31:13", "throughput": 25398.58, "total_tokens": 1074560} +{"current_steps": 350, "total_steps": 15621, "loss": 0.6991, "lr": 4.4657709532949456e-07, "epoch": 0.022405735868382307, "percentage": 2.24, "elapsed_time": "0:00:42", "remaining_time": "0:31:11", "throughput": 25399.25, "total_tokens": 1089728} +{"current_steps": 355, "total_steps": 15621, "loss": 0.9114, "lr": 4.5297504798464494e-07, "epoch": 0.022725817809359197, "percentage": 2.27, "elapsed_time": "0:00:43", "remaining_time": "0:31:10", "throughput": 25400.13, "total_tokens": 1105024} +{"current_steps": 360, "total_steps": 15621, "loss": 0.7824, "lr": 4.593730006397952e-07, "epoch": 0.023045899750336087, "percentage": 2.3, "elapsed_time": "0:00:44", "remaining_time": "0:31:10", "throughput": 25403.56, "total_tokens": 1121088} +{"current_steps": 365, "total_steps": 15621, "loss": 0.7048, "lr": 4.657709532949456e-07, "epoch": 0.023365981691312977, "percentage": 2.34, "elapsed_time": "0:00:44", "remaining_time": "0:31:10", "throughput": 25409.38, "total_tokens": 1136896} +{"current_steps": 370, "total_steps": 15621, "loss": 0.7082, "lr": 4.7216890595009597e-07, "epoch": 0.023686063632289867, "percentage": 2.37, "elapsed_time": "0:00:45", "remaining_time": "0:31:10", "throughput": 25416.44, "total_tokens": 1153280} +{"current_steps": 375, "total_steps": 15621, "loss": 0.8338, "lr": 4.785668586052463e-07, "epoch": 0.024006145573266757, "percentage": 2.4, "elapsed_time": "0:00:46", "remaining_time": "0:31:10", "throughput": 25423.62, "total_tokens": 1169536} +{"current_steps": 380, "total_steps": 15621, "loss": 0.7577, "lr": 4.849648112603967e-07, "epoch": 0.024326227514243647, "percentage": 2.43, "elapsed_time": "0:00:46", "remaining_time": "0:31:09", "throughput": 25424.37, "total_tokens": 1185088} +{"current_steps": 385, "total_steps": 15621, "loss": 0.6664, "lr": 4.91362763915547e-07, "epoch": 0.024646309455220537, "percentage": 2.46, "elapsed_time": "0:00:47", "remaining_time": "0:31:09", "throughput": 25422.81, "total_tokens": 1200832} +{"current_steps": 390, "total_steps": 15621, "loss": 0.6605, "lr": 4.977607165706974e-07, "epoch": 0.024966391396197427, "percentage": 2.5, "elapsed_time": "0:00:47", "remaining_time": "0:31:08", "throughput": 25426.8, "total_tokens": 1216320} +{"current_steps": 395, "total_steps": 15621, "loss": 0.7361, "lr": 5.041586692258478e-07, "epoch": 0.025286473337174317, "percentage": 2.53, "elapsed_time": "0:00:48", "remaining_time": "0:31:08", "throughput": 25429.12, "total_tokens": 1232832} +{"current_steps": 400, "total_steps": 15621, "loss": 0.7037, "lr": 5.10556621880998e-07, "epoch": 0.025606555278151207, "percentage": 2.56, "elapsed_time": "0:00:49", "remaining_time": "0:31:07", "throughput": 25431.99, "total_tokens": 1248384} +{"current_steps": 405, "total_steps": 15621, "loss": 0.6727, "lr": 5.169545745361484e-07, "epoch": 0.025926637219128097, "percentage": 2.59, "elapsed_time": "0:00:49", "remaining_time": "0:31:06", "throughput": 25436.01, "total_tokens": 1263936} +{"current_steps": 410, "total_steps": 15621, "loss": 1.118, "lr": 5.233525271912988e-07, "epoch": 0.026246719160104987, "percentage": 2.62, "elapsed_time": "0:00:50", "remaining_time": "0:31:25", "throughput": 25464.26, "total_tokens": 1294208} +{"current_steps": 415, "total_steps": 15621, "loss": 0.7921, "lr": 5.297504798464492e-07, "epoch": 0.026566801101081877, "percentage": 2.66, "elapsed_time": "0:00:51", "remaining_time": "0:31:23", "throughput": 25464.91, "total_tokens": 1309120} +{"current_steps": 420, "total_steps": 15621, "loss": 0.8592, "lr": 5.361484325015994e-07, "epoch": 0.026886883042058767, "percentage": 2.69, "elapsed_time": "0:00:52", "remaining_time": "0:31:22", "throughput": 25465.61, "total_tokens": 1324224} +{"current_steps": 425, "total_steps": 15621, "loss": 0.6829, "lr": 5.425463851567498e-07, "epoch": 0.027206964983035656, "percentage": 2.72, "elapsed_time": "0:00:52", "remaining_time": "0:31:22", "throughput": 25470.88, "total_tokens": 1341056} +{"current_steps": 430, "total_steps": 15621, "loss": 0.7533, "lr": 5.489443378119002e-07, "epoch": 0.027527046924012546, "percentage": 2.75, "elapsed_time": "0:00:53", "remaining_time": "0:31:21", "throughput": 25471.1, "total_tokens": 1356544} +{"current_steps": 435, "total_steps": 15621, "loss": 0.6696, "lr": 5.553422904670505e-07, "epoch": 0.027847128864989436, "percentage": 2.78, "elapsed_time": "0:00:53", "remaining_time": "0:31:20", "throughput": 25471.16, "total_tokens": 1371840} +{"current_steps": 440, "total_steps": 15621, "loss": 0.6825, "lr": 5.61740243122201e-07, "epoch": 0.028167210805966326, "percentage": 2.82, "elapsed_time": "0:00:54", "remaining_time": "0:31:18", "throughput": 25470.53, "total_tokens": 1386816} +{"current_steps": 445, "total_steps": 15621, "loss": 0.7438, "lr": 5.681381957773512e-07, "epoch": 0.028487292746943216, "percentage": 2.85, "elapsed_time": "0:00:55", "remaining_time": "0:31:16", "throughput": 25470.73, "total_tokens": 1401792} +{"current_steps": 450, "total_steps": 15621, "loss": 0.6214, "lr": 5.745361484325015e-07, "epoch": 0.028807374687920106, "percentage": 2.88, "elapsed_time": "0:00:55", "remaining_time": "0:31:15", "throughput": 25470.34, "total_tokens": 1416896} +{"current_steps": 455, "total_steps": 15621, "loss": 0.7517, "lr": 5.80934101087652e-07, "epoch": 0.029127456628896996, "percentage": 2.91, "elapsed_time": "0:00:56", "remaining_time": "0:31:14", "throughput": 25470.16, "total_tokens": 1432704} +{"current_steps": 460, "total_steps": 15621, "loss": 0.7009, "lr": 5.873320537428022e-07, "epoch": 0.029447538569873886, "percentage": 2.94, "elapsed_time": "0:00:56", "remaining_time": "0:31:14", "throughput": 25469.75, "total_tokens": 1448384} +{"current_steps": 465, "total_steps": 15621, "loss": 0.7179, "lr": 5.937300063979526e-07, "epoch": 0.029767620510850776, "percentage": 2.98, "elapsed_time": "0:00:57", "remaining_time": "0:31:14", "throughput": 25476.67, "total_tokens": 1464832} +{"current_steps": 470, "total_steps": 15621, "loss": 0.6785, "lr": 6.00127959053103e-07, "epoch": 0.030087702451827666, "percentage": 3.01, "elapsed_time": "0:00:58", "remaining_time": "0:31:12", "throughput": 25472.49, "total_tokens": 1479424} +{"current_steps": 475, "total_steps": 15621, "loss": 0.7292, "lr": 6.065259117082533e-07, "epoch": 0.030407784392804556, "percentage": 3.04, "elapsed_time": "0:00:58", "remaining_time": "0:31:10", "throughput": 25469.78, "total_tokens": 1494336} +{"current_steps": 480, "total_steps": 15621, "loss": 0.6741, "lr": 6.129238643634037e-07, "epoch": 0.030727866333781446, "percentage": 3.07, "elapsed_time": "0:00:59", "remaining_time": "0:31:09", "throughput": 25468.0, "total_tokens": 1509184} +{"current_steps": 485, "total_steps": 15621, "loss": 0.8032, "lr": 6.19321817018554e-07, "epoch": 0.031047948274758336, "percentage": 3.1, "elapsed_time": "0:00:59", "remaining_time": "0:31:09", "throughput": 25471.4, "total_tokens": 1525504} +{"current_steps": 490, "total_steps": 15621, "loss": 0.5911, "lr": 6.257197696737044e-07, "epoch": 0.031368030215735226, "percentage": 3.14, "elapsed_time": "0:01:00", "remaining_time": "0:31:08", "throughput": 25475.97, "total_tokens": 1541504} +{"current_steps": 495, "total_steps": 15621, "loss": 0.6188, "lr": 6.321177223288548e-07, "epoch": 0.03168811215671212, "percentage": 3.17, "elapsed_time": "0:01:01", "remaining_time": "0:31:07", "throughput": 25478.98, "total_tokens": 1557184} +{"current_steps": 500, "total_steps": 15621, "loss": 0.7662, "lr": 6.385156749840051e-07, "epoch": 0.032008194097689006, "percentage": 3.2, "elapsed_time": "0:01:01", "remaining_time": "0:31:07", "throughput": 25483.55, "total_tokens": 1573440} +{"current_steps": 505, "total_steps": 15621, "loss": 0.8712, "lr": 6.449136276391554e-07, "epoch": 0.0323282760386659, "percentage": 3.23, "elapsed_time": "0:01:02", "remaining_time": "0:31:06", "throughput": 25484.69, "total_tokens": 1588736} +{"current_steps": 510, "total_steps": 15621, "loss": 0.6979, "lr": 6.513115802943058e-07, "epoch": 0.032648357979642786, "percentage": 3.26, "elapsed_time": "0:01:02", "remaining_time": "0:31:05", "throughput": 25486.61, "total_tokens": 1604352} +{"current_steps": 515, "total_steps": 15621, "loss": 0.6574, "lr": 6.577095329494562e-07, "epoch": 0.03296843992061968, "percentage": 3.3, "elapsed_time": "0:01:03", "remaining_time": "0:31:03", "throughput": 25484.33, "total_tokens": 1618816} +{"current_steps": 520, "total_steps": 15621, "loss": 0.7462, "lr": 6.641074856046065e-07, "epoch": 0.033288521861596566, "percentage": 3.33, "elapsed_time": "0:01:04", "remaining_time": "0:31:03", "throughput": 25494.18, "total_tokens": 1635648} +{"current_steps": 525, "total_steps": 15621, "loss": 0.719, "lr": 6.705054382597568e-07, "epoch": 0.03360860380257346, "percentage": 3.36, "elapsed_time": "0:01:04", "remaining_time": "0:31:02", "throughput": 25493.63, "total_tokens": 1651328} +{"current_steps": 530, "total_steps": 15621, "loss": 0.7345, "lr": 6.769033909149072e-07, "epoch": 0.033928685743550346, "percentage": 3.39, "elapsed_time": "0:01:05", "remaining_time": "0:31:03", "throughput": 25501.39, "total_tokens": 1668928} +{"current_steps": 535, "total_steps": 15621, "loss": 0.6202, "lr": 6.833013435700575e-07, "epoch": 0.03424876768452724, "percentage": 3.42, "elapsed_time": "0:01:06", "remaining_time": "0:31:03", "throughput": 25505.18, "total_tokens": 1685504} +{"current_steps": 540, "total_steps": 15621, "loss": 0.7053, "lr": 6.89699296225208e-07, "epoch": 0.034568849625504126, "percentage": 3.46, "elapsed_time": "0:01:06", "remaining_time": "0:31:03", "throughput": 25508.8, "total_tokens": 1701952} +{"current_steps": 545, "total_steps": 15621, "loss": 0.7308, "lr": 6.960972488803583e-07, "epoch": 0.03488893156648102, "percentage": 3.49, "elapsed_time": "0:01:07", "remaining_time": "0:31:02", "throughput": 25507.95, "total_tokens": 1716992} +{"current_steps": 550, "total_steps": 15621, "loss": 0.5835, "lr": 7.024952015355085e-07, "epoch": 0.035209013507457906, "percentage": 3.52, "elapsed_time": "0:01:07", "remaining_time": "0:31:01", "throughput": 25504.54, "total_tokens": 1732160} +{"current_steps": 555, "total_steps": 15621, "loss": 0.6553, "lr": 7.08893154190659e-07, "epoch": 0.0355290954484348, "percentage": 3.55, "elapsed_time": "0:01:08", "remaining_time": "0:31:00", "throughput": 25506.91, "total_tokens": 1748416} +{"current_steps": 560, "total_steps": 15621, "loss": 0.7096, "lr": 7.152911068458093e-07, "epoch": 0.035849177389411686, "percentage": 3.58, "elapsed_time": "0:01:09", "remaining_time": "0:30:59", "throughput": 25506.73, "total_tokens": 1763776} +{"current_steps": 565, "total_steps": 15621, "loss": 0.6985, "lr": 7.216890595009597e-07, "epoch": 0.03616925933038858, "percentage": 3.62, "elapsed_time": "0:01:09", "remaining_time": "0:30:59", "throughput": 25511.52, "total_tokens": 1780160} +{"current_steps": 570, "total_steps": 15621, "loss": 0.6057, "lr": 7.2808701215611e-07, "epoch": 0.036489341271365466, "percentage": 3.65, "elapsed_time": "0:01:10", "remaining_time": "0:30:58", "throughput": 25514.51, "total_tokens": 1795968} +{"current_steps": 575, "total_steps": 15621, "loss": 0.6327, "lr": 7.344849648112603e-07, "epoch": 0.03680942321234236, "percentage": 3.68, "elapsed_time": "0:01:11", "remaining_time": "0:31:00", "throughput": 25527.74, "total_tokens": 1815424} +{"current_steps": 580, "total_steps": 15621, "loss": 0.8275, "lr": 7.408829174664107e-07, "epoch": 0.037129505153319246, "percentage": 3.71, "elapsed_time": "0:01:11", "remaining_time": "0:31:00", "throughput": 25530.08, "total_tokens": 1831936} +{"current_steps": 585, "total_steps": 15621, "loss": 0.6155, "lr": 7.472808701215611e-07, "epoch": 0.03744958709429614, "percentage": 3.74, "elapsed_time": "0:01:12", "remaining_time": "0:31:00", "throughput": 25528.16, "total_tokens": 1847424} +{"current_steps": 590, "total_steps": 15621, "loss": 0.7381, "lr": 7.536788227767114e-07, "epoch": 0.03776966903527303, "percentage": 3.78, "elapsed_time": "0:01:12", "remaining_time": "0:30:58", "throughput": 25525.86, "total_tokens": 1862400} +{"current_steps": 595, "total_steps": 15621, "loss": 0.7694, "lr": 7.600767754318617e-07, "epoch": 0.03808975097624992, "percentage": 3.81, "elapsed_time": "0:01:13", "remaining_time": "0:30:57", "throughput": 25524.54, "total_tokens": 1876928} +{"current_steps": 600, "total_steps": 15621, "loss": 0.6363, "lr": 7.664747280870121e-07, "epoch": 0.03840983291722681, "percentage": 3.84, "elapsed_time": "0:01:14", "remaining_time": "0:30:56", "throughput": 25523.44, "total_tokens": 1892608} +{"current_steps": 605, "total_steps": 15621, "loss": 0.7292, "lr": 7.728726807421625e-07, "epoch": 0.0387299148582037, "percentage": 3.87, "elapsed_time": "0:01:14", "remaining_time": "0:30:56", "throughput": 25528.38, "total_tokens": 1909696} +{"current_steps": 610, "total_steps": 15621, "loss": 0.7601, "lr": 7.792706333973129e-07, "epoch": 0.03904999679918059, "percentage": 3.9, "elapsed_time": "0:01:15", "remaining_time": "0:30:55", "throughput": 25527.25, "total_tokens": 1924864} +{"current_steps": 615, "total_steps": 15621, "loss": 0.5592, "lr": 7.856685860524632e-07, "epoch": 0.03937007874015748, "percentage": 3.94, "elapsed_time": "0:01:15", "remaining_time": "0:30:54", "throughput": 25528.5, "total_tokens": 1939968} +{"current_steps": 620, "total_steps": 15621, "loss": 0.7152, "lr": 7.920665387076135e-07, "epoch": 0.03969016068113437, "percentage": 3.97, "elapsed_time": "0:01:16", "remaining_time": "0:30:53", "throughput": 25526.84, "total_tokens": 1955136} +{"current_steps": 625, "total_steps": 15621, "loss": 0.7036, "lr": 7.984644913627639e-07, "epoch": 0.04001024262211126, "percentage": 4.0, "elapsed_time": "0:01:17", "remaining_time": "0:30:52", "throughput": 25526.59, "total_tokens": 1970880} +{"current_steps": 630, "total_steps": 15621, "loss": 0.5794, "lr": 8.048624440179143e-07, "epoch": 0.04033032456308815, "percentage": 4.03, "elapsed_time": "0:01:17", "remaining_time": "0:30:51", "throughput": 25527.05, "total_tokens": 1986752} +{"current_steps": 635, "total_steps": 15621, "loss": 0.5994, "lr": 8.112603966730645e-07, "epoch": 0.04065040650406504, "percentage": 4.07, "elapsed_time": "0:01:18", "remaining_time": "0:30:50", "throughput": 25525.7, "total_tokens": 2001856} +{"current_steps": 640, "total_steps": 15621, "loss": 0.6586, "lr": 8.17658349328215e-07, "epoch": 0.04097048844504193, "percentage": 4.1, "elapsed_time": "0:01:19", "remaining_time": "0:30:51", "throughput": 25534.64, "total_tokens": 2019968} +{"current_steps": 645, "total_steps": 15621, "loss": 0.7047, "lr": 8.240563019833653e-07, "epoch": 0.04129057038601882, "percentage": 4.13, "elapsed_time": "0:01:19", "remaining_time": "0:30:50", "throughput": 25534.91, "total_tokens": 2035328} +{"current_steps": 650, "total_steps": 15621, "loss": 0.6282, "lr": 8.304542546385156e-07, "epoch": 0.04161065232699571, "percentage": 4.16, "elapsed_time": "0:01:20", "remaining_time": "0:30:53", "throughput": 25544.33, "total_tokens": 2055168} +{"current_steps": 655, "total_steps": 15621, "loss": 0.7521, "lr": 8.36852207293666e-07, "epoch": 0.0419307342679726, "percentage": 4.19, "elapsed_time": "0:01:21", "remaining_time": "0:30:52", "throughput": 25547.0, "total_tokens": 2071808} +{"current_steps": 660, "total_steps": 15621, "loss": 0.6527, "lr": 8.432501599488163e-07, "epoch": 0.04225081620894949, "percentage": 4.23, "elapsed_time": "0:01:21", "remaining_time": "0:30:52", "throughput": 25548.33, "total_tokens": 2087424} +{"current_steps": 665, "total_steps": 15621, "loss": 0.7682, "lr": 8.496481126039667e-07, "epoch": 0.04257089814992638, "percentage": 4.26, "elapsed_time": "0:01:22", "remaining_time": "0:30:50", "throughput": 25547.76, "total_tokens": 2102592} +{"current_steps": 670, "total_steps": 15621, "loss": 0.6517, "lr": 8.560460652591171e-07, "epoch": 0.04289098009090327, "percentage": 4.29, "elapsed_time": "0:01:22", "remaining_time": "0:30:50", "throughput": 25552.57, "total_tokens": 2119488} +{"current_steps": 675, "total_steps": 15621, "loss": 0.6454, "lr": 8.624440179142674e-07, "epoch": 0.04321106203188016, "percentage": 4.32, "elapsed_time": "0:01:23", "remaining_time": "0:30:50", "throughput": 25557.13, "total_tokens": 2136000} +{"current_steps": 680, "total_steps": 15621, "loss": 0.7404, "lr": 8.688419705694177e-07, "epoch": 0.04353114397285705, "percentage": 4.35, "elapsed_time": "0:01:24", "remaining_time": "0:30:50", "throughput": 25560.91, "total_tokens": 2152448} +{"current_steps": 685, "total_steps": 15621, "loss": 0.6177, "lr": 8.752399232245681e-07, "epoch": 0.04385122591383394, "percentage": 4.39, "elapsed_time": "0:01:24", "remaining_time": "0:30:49", "throughput": 25562.65, "total_tokens": 2168000} +{"current_steps": 690, "total_steps": 15621, "loss": 0.5953, "lr": 8.816378758797185e-07, "epoch": 0.04417130785481083, "percentage": 4.42, "elapsed_time": "0:01:25", "remaining_time": "0:30:48", "throughput": 25560.85, "total_tokens": 2183552} +{"current_steps": 695, "total_steps": 15621, "loss": 0.7135, "lr": 8.880358285348688e-07, "epoch": 0.04449138979578772, "percentage": 4.45, "elapsed_time": "0:01:26", "remaining_time": "0:30:47", "throughput": 25563.86, "total_tokens": 2199488} +{"current_steps": 700, "total_steps": 15621, "loss": 0.6167, "lr": 8.944337811900191e-07, "epoch": 0.04481147173676461, "percentage": 4.48, "elapsed_time": "0:01:26", "remaining_time": "0:30:47", "throughput": 25564.84, "total_tokens": 2215296} +{"current_steps": 705, "total_steps": 15621, "loss": 0.7051, "lr": 9.008317338451695e-07, "epoch": 0.0451315536777415, "percentage": 4.51, "elapsed_time": "0:01:27", "remaining_time": "0:30:45", "throughput": 25564.86, "total_tokens": 2230016} +{"current_steps": 710, "total_steps": 15621, "loss": 0.6629, "lr": 9.072296865003198e-07, "epoch": 0.04545163561871839, "percentage": 4.55, "elapsed_time": "0:01:27", "remaining_time": "0:30:44", "throughput": 25562.61, "total_tokens": 2245056} +{"current_steps": 715, "total_steps": 15621, "loss": 0.6166, "lr": 9.136276391554703e-07, "epoch": 0.04577171755969528, "percentage": 4.58, "elapsed_time": "0:01:28", "remaining_time": "0:30:44", "throughput": 25564.5, "total_tokens": 2261248} +{"current_steps": 720, "total_steps": 15621, "loss": 0.6516, "lr": 9.200255918106205e-07, "epoch": 0.04609179950067217, "percentage": 4.61, "elapsed_time": "0:01:29", "remaining_time": "0:30:43", "throughput": 25567.01, "total_tokens": 2278016} +{"current_steps": 725, "total_steps": 15621, "loss": 0.5696, "lr": 9.264235444657708e-07, "epoch": 0.04641188144164906, "percentage": 4.64, "elapsed_time": "0:01:29", "remaining_time": "0:30:42", "throughput": 25564.24, "total_tokens": 2292800} +{"current_steps": 730, "total_steps": 15621, "loss": 0.6049, "lr": 9.328214971209213e-07, "epoch": 0.04673196338262595, "percentage": 4.67, "elapsed_time": "0:01:30", "remaining_time": "0:30:41", "throughput": 25564.31, "total_tokens": 2308224} +{"current_steps": 735, "total_steps": 15621, "loss": 0.7005, "lr": 9.392194497760716e-07, "epoch": 0.04705204532360284, "percentage": 4.71, "elapsed_time": "0:01:30", "remaining_time": "0:30:42", "throughput": 25569.77, "total_tokens": 2325760} +{"current_steps": 740, "total_steps": 15621, "loss": 0.6971, "lr": 9.456174024312221e-07, "epoch": 0.04737212726457973, "percentage": 4.74, "elapsed_time": "0:01:31", "remaining_time": "0:30:41", "throughput": 25571.82, "total_tokens": 2341632} +{"current_steps": 745, "total_steps": 15621, "loss": 0.7066, "lr": 9.520153550863723e-07, "epoch": 0.04769220920555662, "percentage": 4.77, "elapsed_time": "0:01:32", "remaining_time": "0:30:40", "throughput": 25573.63, "total_tokens": 2357504} +{"current_steps": 750, "total_steps": 15621, "loss": 0.7294, "lr": 9.584133077415226e-07, "epoch": 0.04801229114653351, "percentage": 4.8, "elapsed_time": "0:01:32", "remaining_time": "0:30:39", "throughput": 25570.4, "total_tokens": 2372608} +{"current_steps": 755, "total_steps": 15621, "loss": 0.587, "lr": 9.64811260396673e-07, "epoch": 0.0483323730875104, "percentage": 4.83, "elapsed_time": "0:01:33", "remaining_time": "0:30:38", "throughput": 25572.36, "total_tokens": 2388352} +{"current_steps": 760, "total_steps": 15621, "loss": 0.6934, "lr": 9.712092130518234e-07, "epoch": 0.04865245502848729, "percentage": 4.87, "elapsed_time": "0:01:34", "remaining_time": "0:30:38", "throughput": 25573.11, "total_tokens": 2404480} +{"current_steps": 765, "total_steps": 15621, "loss": 0.518, "lr": 9.776071657069737e-07, "epoch": 0.04897253696946418, "percentage": 4.9, "elapsed_time": "0:01:34", "remaining_time": "0:30:37", "throughput": 25572.1, "total_tokens": 2419648} +{"current_steps": 770, "total_steps": 15621, "loss": 0.7121, "lr": 9.840051183621241e-07, "epoch": 0.04929261891044107, "percentage": 4.93, "elapsed_time": "0:01:35", "remaining_time": "0:30:37", "throughput": 25571.5, "total_tokens": 2435584} +{"current_steps": 775, "total_steps": 15621, "loss": 0.6265, "lr": 9.904030710172743e-07, "epoch": 0.04961270085141796, "percentage": 4.96, "elapsed_time": "0:01:35", "remaining_time": "0:30:36", "throughput": 25571.4, "total_tokens": 2451072} +{"current_steps": 780, "total_steps": 15621, "loss": 0.764, "lr": 9.968010236724249e-07, "epoch": 0.04993278279239485, "percentage": 4.99, "elapsed_time": "0:01:36", "remaining_time": "0:30:36", "throughput": 25575.18, "total_tokens": 2467968} +{"current_steps": 782, "total_steps": 15621, "eval_loss": 0.6362079381942749, "epoch": 0.05006081556878561, "percentage": 5.01, "elapsed_time": "0:02:25", "remaining_time": "0:46:08", "throughput": 16960.16, "total_tokens": 2474432} +{"current_steps": 785, "total_steps": 15621, "loss": 0.669, "lr": 1.0031989763275752e-06, "epoch": 0.05025286473337175, "percentage": 5.03, "elapsed_time": "0:02:59", "remaining_time": "0:56:40", "throughput": 13812.8, "total_tokens": 2484928} +{"current_steps": 790, "total_steps": 15621, "loss": 0.6777, "lr": 1.0095969289827256e-06, "epoch": 0.05057294667434863, "percentage": 5.06, "elapsed_time": "0:03:00", "remaining_time": "0:56:29", "throughput": 13856.13, "total_tokens": 2501504} +{"current_steps": 795, "total_steps": 15621, "loss": 0.5188, "lr": 1.0159948816378758e-06, "epoch": 0.050893028615325527, "percentage": 5.09, "elapsed_time": "0:03:01", "remaining_time": "0:56:18", "throughput": 13902.01, "total_tokens": 2518848} +{"current_steps": 800, "total_steps": 15621, "loss": 0.5482, "lr": 1.0223928342930262e-06, "epoch": 0.05121311055630241, "percentage": 5.12, "elapsed_time": "0:03:01", "remaining_time": "0:56:08", "throughput": 13945.58, "total_tokens": 2535680} +{"current_steps": 805, "total_steps": 15621, "loss": 0.676, "lr": 1.0287907869481766e-06, "epoch": 0.051533192497279307, "percentage": 5.15, "elapsed_time": "0:03:02", "remaining_time": "0:55:57", "throughput": 13983.26, "total_tokens": 2550976} +{"current_steps": 810, "total_steps": 15621, "loss": 0.5562, "lr": 1.035188739603327e-06, "epoch": 0.05185327443825619, "percentage": 5.19, "elapsed_time": "0:03:03", "remaining_time": "0:55:46", "throughput": 14022.65, "total_tokens": 2566656} +{"current_steps": 815, "total_steps": 15621, "loss": 0.6315, "lr": 1.0415866922584773e-06, "epoch": 0.052173356379233086, "percentage": 5.22, "elapsed_time": "0:03:03", "remaining_time": "0:55:35", "throughput": 14058.79, "total_tokens": 2581568} +{"current_steps": 820, "total_steps": 15621, "loss": 0.6426, "lr": 1.0479846449136277e-06, "epoch": 0.05249343832020997, "percentage": 5.25, "elapsed_time": "0:03:04", "remaining_time": "0:55:25", "throughput": 14095.35, "total_tokens": 2596608} +{"current_steps": 825, "total_steps": 15621, "loss": 0.6719, "lr": 1.0543825975687779e-06, "epoch": 0.052813520261186866, "percentage": 5.28, "elapsed_time": "0:03:04", "remaining_time": "0:55:14", "throughput": 14132.64, "total_tokens": 2612032} +{"current_steps": 830, "total_steps": 15621, "loss": 0.7313, "lr": 1.0607805502239282e-06, "epoch": 0.05313360220216375, "percentage": 5.31, "elapsed_time": "0:03:05", "remaining_time": "0:55:04", "throughput": 14169.83, "total_tokens": 2627264} +{"current_steps": 835, "total_steps": 15621, "loss": 0.548, "lr": 1.0671785028790788e-06, "epoch": 0.053453684143140646, "percentage": 5.35, "elapsed_time": "0:03:06", "remaining_time": "0:54:54", "throughput": 14208.81, "total_tokens": 2643264} +{"current_steps": 840, "total_steps": 15621, "loss": 0.5474, "lr": 1.073576455534229e-06, "epoch": 0.05377376608411753, "percentage": 5.38, "elapsed_time": "0:03:06", "remaining_time": "0:54:44", "throughput": 14247.58, "total_tokens": 2659264} +{"current_steps": 845, "total_steps": 15621, "loss": 0.5737, "lr": 1.0799744081893794e-06, "epoch": 0.054093848025094426, "percentage": 5.41, "elapsed_time": "0:03:07", "remaining_time": "0:54:33", "throughput": 14281.71, "total_tokens": 2673856} +{"current_steps": 850, "total_steps": 15621, "loss": 0.4779, "lr": 1.0863723608445297e-06, "epoch": 0.05441392996607131, "percentage": 5.44, "elapsed_time": "0:03:07", "remaining_time": "0:54:23", "throughput": 14315.13, "total_tokens": 2688448} +{"current_steps": 855, "total_steps": 15621, "loss": 0.6201, "lr": 1.09277031349968e-06, "epoch": 0.054734011907048206, "percentage": 5.47, "elapsed_time": "0:03:08", "remaining_time": "0:54:13", "throughput": 14351.12, "total_tokens": 2703872} +{"current_steps": 860, "total_steps": 15621, "loss": 0.6104, "lr": 1.0991682661548305e-06, "epoch": 0.05505409384802509, "percentage": 5.51, "elapsed_time": "0:03:09", "remaining_time": "0:54:04", "throughput": 14385.77, "total_tokens": 2719040} +{"current_steps": 865, "total_steps": 15621, "loss": 0.6205, "lr": 1.1055662188099809e-06, "epoch": 0.055374175789001986, "percentage": 5.54, "elapsed_time": "0:03:09", "remaining_time": "0:53:54", "throughput": 14423.91, "total_tokens": 2735168} +{"current_steps": 870, "total_steps": 15621, "loss": 0.5224, "lr": 1.111964171465131e-06, "epoch": 0.05569425772997887, "percentage": 5.57, "elapsed_time": "0:03:10", "remaining_time": "0:53:45", "throughput": 14459.56, "total_tokens": 2750592} +{"current_steps": 875, "total_steps": 15621, "loss": 0.6572, "lr": 1.1183621241202814e-06, "epoch": 0.056014339670955766, "percentage": 5.6, "elapsed_time": "0:03:10", "remaining_time": "0:53:36", "throughput": 14498.34, "total_tokens": 2767232} +{"current_steps": 880, "total_steps": 15621, "loss": 0.665, "lr": 1.1247600767754318e-06, "epoch": 0.05633442161193265, "percentage": 5.63, "elapsed_time": "0:03:11", "remaining_time": "0:53:28", "throughput": 14539.57, "total_tokens": 2784768} +{"current_steps": 885, "total_steps": 15621, "loss": 0.5809, "lr": 1.1311580294305822e-06, "epoch": 0.056654503552909546, "percentage": 5.67, "elapsed_time": "0:03:12", "remaining_time": "0:53:19", "throughput": 14573.29, "total_tokens": 2799872} +{"current_steps": 890, "total_steps": 15621, "loss": 0.6481, "lr": 1.1375559820857326e-06, "epoch": 0.05697458549388643, "percentage": 5.7, "elapsed_time": "0:03:12", "remaining_time": "0:53:10", "throughput": 14609.65, "total_tokens": 2816000} +{"current_steps": 895, "total_steps": 15621, "loss": 0.5859, "lr": 1.143953934740883e-06, "epoch": 0.057294667434863326, "percentage": 5.73, "elapsed_time": "0:03:13", "remaining_time": "0:53:01", "throughput": 14644.53, "total_tokens": 2831744} +{"current_steps": 900, "total_steps": 15621, "loss": 0.6183, "lr": 1.150351887396033e-06, "epoch": 0.05761474937584021, "percentage": 5.76, "elapsed_time": "0:03:13", "remaining_time": "0:52:52", "throughput": 14679.09, "total_tokens": 2847424} +{"current_steps": 905, "total_steps": 15621, "loss": 0.616, "lr": 1.1567498400511835e-06, "epoch": 0.057934831316817106, "percentage": 5.79, "elapsed_time": "0:03:14", "remaining_time": "0:52:43", "throughput": 14711.76, "total_tokens": 2862272} +{"current_steps": 910, "total_steps": 15621, "loss": 0.4927, "lr": 1.163147792706334e-06, "epoch": 0.05825491325779399, "percentage": 5.83, "elapsed_time": "0:03:15", "remaining_time": "0:52:34", "throughput": 14743.56, "total_tokens": 2877120} +{"current_steps": 915, "total_steps": 15621, "loss": 0.5249, "lr": 1.1695457453614842e-06, "epoch": 0.058574995198770886, "percentage": 5.86, "elapsed_time": "0:03:15", "remaining_time": "0:52:26", "throughput": 14783.18, "total_tokens": 2894592} +{"current_steps": 920, "total_steps": 15621, "loss": 0.6159, "lr": 1.1759436980166346e-06, "epoch": 0.05889507713974777, "percentage": 5.89, "elapsed_time": "0:03:16", "remaining_time": "0:52:18", "throughput": 14815.93, "total_tokens": 2909888} +{"current_steps": 925, "total_steps": 15621, "loss": 0.6195, "lr": 1.182341650671785e-06, "epoch": 0.059215159080724666, "percentage": 5.92, "elapsed_time": "0:03:17", "remaining_time": "0:52:10", "throughput": 14849.12, "total_tokens": 2925632} +{"current_steps": 930, "total_steps": 15621, "loss": 0.6153, "lr": 1.1887396033269352e-06, "epoch": 0.05953524102170155, "percentage": 5.95, "elapsed_time": "0:03:17", "remaining_time": "0:52:02", "throughput": 14883.96, "total_tokens": 2941760} +{"current_steps": 935, "total_steps": 15621, "loss": 0.7076, "lr": 1.1951375559820858e-06, "epoch": 0.059855322962678446, "percentage": 5.99, "elapsed_time": "0:03:18", "remaining_time": "0:51:53", "throughput": 14917.7, "total_tokens": 2957376} +{"current_steps": 940, "total_steps": 15621, "loss": 0.5704, "lr": 1.2015355086372361e-06, "epoch": 0.06017540490365533, "percentage": 6.02, "elapsed_time": "0:03:18", "remaining_time": "0:51:45", "throughput": 14949.93, "total_tokens": 2972800} +{"current_steps": 945, "total_steps": 15621, "loss": 0.7172, "lr": 1.2079334612923863e-06, "epoch": 0.060495486844632226, "percentage": 6.05, "elapsed_time": "0:03:19", "remaining_time": "0:51:37", "throughput": 14982.99, "total_tokens": 2988480} +{"current_steps": 950, "total_steps": 15621, "loss": 0.6613, "lr": 1.2143314139475367e-06, "epoch": 0.06081556878560911, "percentage": 6.08, "elapsed_time": "0:03:20", "remaining_time": "0:51:29", "throughput": 15016.44, "total_tokens": 3004480} +{"current_steps": 955, "total_steps": 15621, "loss": 0.444, "lr": 1.220729366602687e-06, "epoch": 0.061135650726586006, "percentage": 6.11, "elapsed_time": "0:03:20", "remaining_time": "0:51:21", "throughput": 15049.57, "total_tokens": 3020288} +{"current_steps": 960, "total_steps": 15621, "loss": 0.6011, "lr": 1.2271273192578374e-06, "epoch": 0.06145573266756289, "percentage": 6.15, "elapsed_time": "0:03:21", "remaining_time": "0:51:14", "throughput": 15081.19, "total_tokens": 3035968} +{"current_steps": 965, "total_steps": 15621, "loss": 0.7411, "lr": 1.2335252719129878e-06, "epoch": 0.061775814608539786, "percentage": 6.18, "elapsed_time": "0:03:21", "remaining_time": "0:51:06", "throughput": 15113.62, "total_tokens": 3051776} +{"current_steps": 970, "total_steps": 15621, "loss": 0.5575, "lr": 1.2399232245681382e-06, "epoch": 0.06209589654951667, "percentage": 6.21, "elapsed_time": "0:03:22", "remaining_time": "0:50:58", "throughput": 15142.94, "total_tokens": 3066560} +{"current_steps": 975, "total_steps": 15621, "loss": 0.6357, "lr": 1.2463211772232884e-06, "epoch": 0.062415978490493566, "percentage": 6.24, "elapsed_time": "0:03:23", "remaining_time": "0:50:51", "throughput": 15175.87, "total_tokens": 3082496} +{"current_steps": 980, "total_steps": 15621, "loss": 0.6233, "lr": 1.2527191298784387e-06, "epoch": 0.06273606043147045, "percentage": 6.27, "elapsed_time": "0:03:23", "remaining_time": "0:50:43", "throughput": 15206.37, "total_tokens": 3097856} +{"current_steps": 985, "total_steps": 15621, "loss": 0.5062, "lr": 1.2591170825335893e-06, "epoch": 0.06305614237244735, "percentage": 6.31, "elapsed_time": "0:03:24", "remaining_time": "0:50:36", "throughput": 15237.9, "total_tokens": 3113664} +{"current_steps": 990, "total_steps": 15621, "loss": 0.6242, "lr": 1.2655150351887395e-06, "epoch": 0.06337622431342424, "percentage": 6.34, "elapsed_time": "0:03:24", "remaining_time": "0:50:29", "throughput": 15270.48, "total_tokens": 3129792} +{"current_steps": 995, "total_steps": 15621, "loss": 0.5901, "lr": 1.2719129878438899e-06, "epoch": 0.06369630625440113, "percentage": 6.37, "elapsed_time": "0:03:25", "remaining_time": "0:50:21", "throughput": 15300.5, "total_tokens": 3145024} +{"current_steps": 1000, "total_steps": 15621, "loss": 0.7747, "lr": 1.2783109404990402e-06, "epoch": 0.06401638819537801, "percentage": 6.4, "elapsed_time": "0:03:26", "remaining_time": "0:50:14", "throughput": 15332.27, "total_tokens": 3161216} +{"current_steps": 1005, "total_steps": 15621, "loss": 0.4118, "lr": 1.2847088931541904e-06, "epoch": 0.0643364701363549, "percentage": 6.43, "elapsed_time": "0:03:26", "remaining_time": "0:50:07", "throughput": 15363.17, "total_tokens": 3176960} +{"current_steps": 1010, "total_steps": 15621, "loss": 0.607, "lr": 1.291106845809341e-06, "epoch": 0.0646565520773318, "percentage": 6.47, "elapsed_time": "0:03:27", "remaining_time": "0:50:00", "throughput": 15395.02, "total_tokens": 3193088} +{"current_steps": 1015, "total_steps": 15621, "loss": 0.6808, "lr": 1.2975047984644914e-06, "epoch": 0.0649766340183087, "percentage": 6.5, "elapsed_time": "0:03:28", "remaining_time": "0:49:54", "throughput": 15428.57, "total_tokens": 3210112} +{"current_steps": 1020, "total_steps": 15621, "loss": 0.5044, "lr": 1.3039027511196418e-06, "epoch": 0.06529671595928557, "percentage": 6.53, "elapsed_time": "0:03:28", "remaining_time": "0:49:46", "throughput": 15456.25, "total_tokens": 3224768} +{"current_steps": 1025, "total_steps": 15621, "loss": 0.6235, "lr": 1.310300703774792e-06, "epoch": 0.06561679790026247, "percentage": 6.56, "elapsed_time": "0:03:29", "remaining_time": "0:49:39", "throughput": 15485.4, "total_tokens": 3240128} +{"current_steps": 1030, "total_steps": 15621, "loss": 0.5605, "lr": 1.3166986564299423e-06, "epoch": 0.06593687984123936, "percentage": 6.59, "elapsed_time": "0:03:29", "remaining_time": "0:49:32", "throughput": 15517.48, "total_tokens": 3256576} +{"current_steps": 1035, "total_steps": 15621, "loss": 0.5942, "lr": 1.3230966090850929e-06, "epoch": 0.06625696178221625, "percentage": 6.63, "elapsed_time": "0:03:30", "remaining_time": "0:49:26", "throughput": 15547.03, "total_tokens": 3272384} +{"current_steps": 1040, "total_steps": 15621, "loss": 0.4108, "lr": 1.329494561740243e-06, "epoch": 0.06657704372319313, "percentage": 6.66, "elapsed_time": "0:03:31", "remaining_time": "0:49:19", "throughput": 15577.4, "total_tokens": 3288512} +{"current_steps": 1045, "total_steps": 15621, "loss": 0.4897, "lr": 1.3358925143953934e-06, "epoch": 0.06689712566417003, "percentage": 6.69, "elapsed_time": "0:03:31", "remaining_time": "0:49:13", "throughput": 15612.09, "total_tokens": 3306304} +{"current_steps": 1050, "total_steps": 15621, "loss": 0.4785, "lr": 1.3422904670505438e-06, "epoch": 0.06721720760514692, "percentage": 6.72, "elapsed_time": "0:03:32", "remaining_time": "0:49:07", "throughput": 15639.31, "total_tokens": 3321344} +{"current_steps": 1055, "total_steps": 15621, "loss": 0.6127, "lr": 1.348688419705694e-06, "epoch": 0.06753728954612381, "percentage": 6.75, "elapsed_time": "0:03:33", "remaining_time": "0:49:01", "throughput": 15671.92, "total_tokens": 3338560} +{"current_steps": 1060, "total_steps": 15621, "loss": 0.5135, "lr": 1.3550863723608446e-06, "epoch": 0.06785737148710069, "percentage": 6.79, "elapsed_time": "0:03:33", "remaining_time": "0:48:54", "throughput": 15697.36, "total_tokens": 3353152} +{"current_steps": 1065, "total_steps": 15621, "loss": 0.5401, "lr": 1.361484325015995e-06, "epoch": 0.06817745342807759, "percentage": 6.82, "elapsed_time": "0:03:34", "remaining_time": "0:48:48", "throughput": 15727.25, "total_tokens": 3369536} +{"current_steps": 1070, "total_steps": 15621, "loss": 0.6023, "lr": 1.3678822776711451e-06, "epoch": 0.06849753536905448, "percentage": 6.85, "elapsed_time": "0:03:34", "remaining_time": "0:48:41", "throughput": 15754.24, "total_tokens": 3384832} +{"current_steps": 1075, "total_steps": 15621, "loss": 0.4881, "lr": 1.3742802303262955e-06, "epoch": 0.06881761731003137, "percentage": 6.88, "elapsed_time": "0:03:35", "remaining_time": "0:48:35", "throughput": 15779.5, "total_tokens": 3399424} +{"current_steps": 1080, "total_steps": 15621, "loss": 0.6565, "lr": 1.3806781829814459e-06, "epoch": 0.06913769925100825, "percentage": 6.91, "elapsed_time": "0:03:36", "remaining_time": "0:48:29", "throughput": 15811.29, "total_tokens": 3416704} +{"current_steps": 1085, "total_steps": 15621, "loss": 0.5553, "lr": 1.3870761356365963e-06, "epoch": 0.06945778119198515, "percentage": 6.95, "elapsed_time": "0:03:36", "remaining_time": "0:48:22", "throughput": 15837.48, "total_tokens": 3431552} +{"current_steps": 1090, "total_steps": 15621, "loss": 0.6472, "lr": 1.3934740882917466e-06, "epoch": 0.06977786313296204, "percentage": 6.98, "elapsed_time": "0:03:37", "remaining_time": "0:48:16", "throughput": 15865.44, "total_tokens": 3447488} +{"current_steps": 1095, "total_steps": 15621, "loss": 0.5137, "lr": 1.399872040946897e-06, "epoch": 0.07009794507393893, "percentage": 7.01, "elapsed_time": "0:03:37", "remaining_time": "0:48:10", "throughput": 15893.59, "total_tokens": 3463424} +{"current_steps": 1100, "total_steps": 15621, "loss": 0.6527, "lr": 1.4062699936020472e-06, "epoch": 0.07041802701491581, "percentage": 7.04, "elapsed_time": "0:03:38", "remaining_time": "0:48:04", "throughput": 15922.07, "total_tokens": 3479680} +{"current_steps": 1105, "total_steps": 15621, "loss": 0.5117, "lr": 1.4126679462571976e-06, "epoch": 0.0707381089558927, "percentage": 7.07, "elapsed_time": "0:03:39", "remaining_time": "0:47:58", "throughput": 15950.11, "total_tokens": 3495552} +{"current_steps": 1110, "total_steps": 15621, "loss": 0.4748, "lr": 1.4190658989123481e-06, "epoch": 0.0710581908968696, "percentage": 7.11, "elapsed_time": "0:03:39", "remaining_time": "0:47:52", "throughput": 15976.14, "total_tokens": 3510976} +{"current_steps": 1115, "total_steps": 15621, "loss": 0.6499, "lr": 1.4254638515674983e-06, "epoch": 0.0713782728378465, "percentage": 7.14, "elapsed_time": "0:03:40", "remaining_time": "0:47:46", "throughput": 16001.01, "total_tokens": 3526016} +{"current_steps": 1120, "total_steps": 15621, "loss": 0.5645, "lr": 1.4318618042226487e-06, "epoch": 0.07169835477882337, "percentage": 7.17, "elapsed_time": "0:03:40", "remaining_time": "0:47:40", "throughput": 16025.08, "total_tokens": 3540544} +{"current_steps": 1125, "total_steps": 15621, "loss": 0.6069, "lr": 1.438259756877799e-06, "epoch": 0.07201843671980027, "percentage": 7.2, "elapsed_time": "0:03:41", "remaining_time": "0:47:34", "throughput": 16051.95, "total_tokens": 3556416} +{"current_steps": 1130, "total_steps": 15621, "loss": 0.5077, "lr": 1.4446577095329492e-06, "epoch": 0.07233851866077716, "percentage": 7.23, "elapsed_time": "0:03:42", "remaining_time": "0:47:29", "throughput": 16078.66, "total_tokens": 3572096} +{"current_steps": 1135, "total_steps": 15621, "loss": 0.4993, "lr": 1.4510556621880998e-06, "epoch": 0.07265860060175405, "percentage": 7.27, "elapsed_time": "0:03:42", "remaining_time": "0:47:23", "throughput": 16104.42, "total_tokens": 3587712} +{"current_steps": 1140, "total_steps": 15621, "loss": 0.5417, "lr": 1.4574536148432502e-06, "epoch": 0.07297868254273093, "percentage": 7.3, "elapsed_time": "0:03:43", "remaining_time": "0:47:18", "throughput": 16134.67, "total_tokens": 3605056} +{"current_steps": 1145, "total_steps": 15621, "loss": 0.6805, "lr": 1.4638515674984004e-06, "epoch": 0.07329876448370783, "percentage": 7.33, "elapsed_time": "0:03:44", "remaining_time": "0:47:12", "throughput": 16161.75, "total_tokens": 3621184} +{"current_steps": 1150, "total_steps": 15621, "loss": 0.5834, "lr": 1.4702495201535507e-06, "epoch": 0.07361884642468472, "percentage": 7.36, "elapsed_time": "0:03:44", "remaining_time": "0:47:06", "throughput": 16183.71, "total_tokens": 3635392} +{"current_steps": 1155, "total_steps": 15621, "loss": 0.5049, "lr": 1.4766474728087011e-06, "epoch": 0.07393892836566161, "percentage": 7.39, "elapsed_time": "0:03:45", "remaining_time": "0:47:00", "throughput": 16206.96, "total_tokens": 3649984} +{"current_steps": 1160, "total_steps": 15621, "loss": 0.5276, "lr": 1.4830454254638515e-06, "epoch": 0.07425901030663849, "percentage": 7.43, "elapsed_time": "0:03:45", "remaining_time": "0:46:55", "throughput": 16233.21, "total_tokens": 3665920} +{"current_steps": 1165, "total_steps": 15621, "loss": 0.4587, "lr": 1.4894433781190019e-06, "epoch": 0.07457909224761539, "percentage": 7.46, "elapsed_time": "0:03:46", "remaining_time": "0:46:49", "throughput": 16255.21, "total_tokens": 3680256} +{"current_steps": 1170, "total_steps": 15621, "loss": 0.5255, "lr": 1.4958413307741523e-06, "epoch": 0.07489917418859228, "percentage": 7.49, "elapsed_time": "0:03:47", "remaining_time": "0:46:44", "throughput": 16284.04, "total_tokens": 3697536} +{"current_steps": 1175, "total_steps": 15621, "loss": 0.6111, "lr": 1.5022392834293024e-06, "epoch": 0.07521925612956917, "percentage": 7.52, "elapsed_time": "0:03:47", "remaining_time": "0:46:39", "throughput": 16307.83, "total_tokens": 3713088} +{"current_steps": 1180, "total_steps": 15621, "loss": 0.6712, "lr": 1.5086372360844528e-06, "epoch": 0.07553933807054607, "percentage": 7.55, "elapsed_time": "0:03:48", "remaining_time": "0:46:34", "throughput": 16335.65, "total_tokens": 3729920} +{"current_steps": 1185, "total_steps": 15621, "loss": 0.5489, "lr": 1.5150351887396034e-06, "epoch": 0.07585942001152295, "percentage": 7.59, "elapsed_time": "0:03:48", "remaining_time": "0:46:29", "throughput": 16360.75, "total_tokens": 3745664} +{"current_steps": 1190, "total_steps": 15621, "loss": 0.5258, "lr": 1.5214331413947536e-06, "epoch": 0.07617950195249984, "percentage": 7.62, "elapsed_time": "0:03:49", "remaining_time": "0:46:23", "throughput": 16383.57, "total_tokens": 3760576} +{"current_steps": 1195, "total_steps": 15621, "loss": 0.5085, "lr": 1.527831094049904e-06, "epoch": 0.07649958389347673, "percentage": 7.65, "elapsed_time": "0:03:50", "remaining_time": "0:46:18", "throughput": 16408.2, "total_tokens": 3776576} +{"current_steps": 1200, "total_steps": 15621, "loss": 0.5857, "lr": 1.5342290467050543e-06, "epoch": 0.07681966583445363, "percentage": 7.68, "elapsed_time": "0:03:50", "remaining_time": "0:46:13", "throughput": 16432.55, "total_tokens": 3792384} +{"current_steps": 1205, "total_steps": 15621, "loss": 0.6438, "lr": 1.5406269993602045e-06, "epoch": 0.0771397477754305, "percentage": 7.71, "elapsed_time": "0:03:51", "remaining_time": "0:46:07", "throughput": 16452.89, "total_tokens": 3806592} +{"current_steps": 1210, "total_steps": 15621, "loss": 0.5775, "lr": 1.547024952015355e-06, "epoch": 0.0774598297164074, "percentage": 7.75, "elapsed_time": "0:03:51", "remaining_time": "0:46:02", "throughput": 16476.79, "total_tokens": 3822080} +{"current_steps": 1215, "total_steps": 15621, "loss": 0.5269, "lr": 1.5534229046705055e-06, "epoch": 0.07777991165738429, "percentage": 7.78, "elapsed_time": "0:03:52", "remaining_time": "0:45:57", "throughput": 16496.07, "total_tokens": 3837120} +{"current_steps": 1220, "total_steps": 15621, "loss": 0.6994, "lr": 1.5598208573256556e-06, "epoch": 0.07809999359836119, "percentage": 7.81, "elapsed_time": "0:03:53", "remaining_time": "0:45:52", "throughput": 16520.24, "total_tokens": 3852864} +{"current_steps": 1225, "total_steps": 15621, "loss": 0.515, "lr": 1.566218809980806e-06, "epoch": 0.07842007553933807, "percentage": 7.84, "elapsed_time": "0:03:53", "remaining_time": "0:45:48", "throughput": 16545.72, "total_tokens": 3869184} +{"current_steps": 1230, "total_steps": 15621, "loss": 0.5388, "lr": 1.5726167626359564e-06, "epoch": 0.07874015748031496, "percentage": 7.87, "elapsed_time": "0:03:54", "remaining_time": "0:45:43", "throughput": 16570.47, "total_tokens": 3885248} +{"current_steps": 1235, "total_steps": 15621, "loss": 0.4306, "lr": 1.5790147152911068e-06, "epoch": 0.07906023942129185, "percentage": 7.91, "elapsed_time": "0:03:55", "remaining_time": "0:45:38", "throughput": 16593.06, "total_tokens": 3900416} +{"current_steps": 1240, "total_steps": 15621, "loss": 0.5503, "lr": 1.5854126679462571e-06, "epoch": 0.07938032136226875, "percentage": 7.94, "elapsed_time": "0:03:55", "remaining_time": "0:45:33", "throughput": 16616.15, "total_tokens": 3916096} +{"current_steps": 1245, "total_steps": 15621, "loss": 0.6993, "lr": 1.5918106206014075e-06, "epoch": 0.07970040330324563, "percentage": 7.97, "elapsed_time": "0:03:56", "remaining_time": "0:45:28", "throughput": 16642.98, "total_tokens": 3933312} +{"current_steps": 1250, "total_steps": 15621, "loss": 0.6197, "lr": 1.5982085732565577e-06, "epoch": 0.08002048524422252, "percentage": 8.0, "elapsed_time": "0:03:56", "remaining_time": "0:45:24", "throughput": 16667.27, "total_tokens": 3949440} +{"current_steps": 1255, "total_steps": 15621, "loss": 0.6799, "lr": 1.604606525911708e-06, "epoch": 0.08034056718519941, "percentage": 8.03, "elapsed_time": "0:03:57", "remaining_time": "0:45:19", "throughput": 16690.38, "total_tokens": 3964992} +{"current_steps": 1260, "total_steps": 15621, "loss": 0.7324, "lr": 1.6110044785668586e-06, "epoch": 0.0806606491261763, "percentage": 8.07, "elapsed_time": "0:03:58", "remaining_time": "0:45:15", "throughput": 16711.69, "total_tokens": 3981696} +{"current_steps": 1265, "total_steps": 15621, "loss": 0.6136, "lr": 1.617402431222009e-06, "epoch": 0.08098073106715319, "percentage": 8.1, "elapsed_time": "0:03:58", "remaining_time": "0:45:10", "throughput": 16734.6, "total_tokens": 3997248} +{"current_steps": 1270, "total_steps": 15621, "loss": 0.6689, "lr": 1.6238003838771592e-06, "epoch": 0.08130081300813008, "percentage": 8.13, "elapsed_time": "0:03:59", "remaining_time": "0:45:05", "throughput": 16754.57, "total_tokens": 4011648} +{"current_steps": 1275, "total_steps": 15621, "loss": 0.5254, "lr": 1.6301983365323096e-06, "epoch": 0.08162089494910697, "percentage": 8.16, "elapsed_time": "0:04:00", "remaining_time": "0:45:01", "throughput": 16778.8, "total_tokens": 4028160} +{"current_steps": 1280, "total_steps": 15621, "loss": 0.5398, "lr": 1.63659628918746e-06, "epoch": 0.08194097689008387, "percentage": 8.19, "elapsed_time": "0:04:00", "remaining_time": "0:44:56", "throughput": 16800.68, "total_tokens": 4043584} +{"current_steps": 1285, "total_steps": 15621, "loss": 0.7076, "lr": 1.6429942418426103e-06, "epoch": 0.08226105883106075, "percentage": 8.23, "elapsed_time": "0:04:01", "remaining_time": "0:44:52", "throughput": 16823.34, "total_tokens": 4059456} +{"current_steps": 1290, "total_steps": 15621, "loss": 0.6103, "lr": 1.6493921944977607e-06, "epoch": 0.08258114077203764, "percentage": 8.26, "elapsed_time": "0:04:01", "remaining_time": "0:44:47", "throughput": 16847.93, "total_tokens": 4076096} +{"current_steps": 1295, "total_steps": 15621, "loss": 0.6111, "lr": 1.655790147152911e-06, "epoch": 0.08290122271301453, "percentage": 8.29, "elapsed_time": "0:04:02", "remaining_time": "0:44:43", "throughput": 16874.19, "total_tokens": 4093568} +{"current_steps": 1300, "total_steps": 15621, "loss": 0.6676, "lr": 1.6621880998080612e-06, "epoch": 0.08322130465399143, "percentage": 8.32, "elapsed_time": "0:04:03", "remaining_time": "0:44:38", "throughput": 16895.9, "total_tokens": 4108864} +{"current_steps": 1305, "total_steps": 15621, "loss": 0.6425, "lr": 1.6685860524632116e-06, "epoch": 0.0835413865949683, "percentage": 8.35, "elapsed_time": "0:04:03", "remaining_time": "0:44:34", "throughput": 16917.1, "total_tokens": 4124224} +{"current_steps": 1310, "total_steps": 15621, "loss": 0.5516, "lr": 1.6749840051183622e-06, "epoch": 0.0838614685359452, "percentage": 8.39, "elapsed_time": "0:04:04", "remaining_time": "0:44:29", "throughput": 16937.39, "total_tokens": 4139008} +{"current_steps": 1315, "total_steps": 15621, "loss": 0.5551, "lr": 1.6813819577735124e-06, "epoch": 0.08418155047692209, "percentage": 8.42, "elapsed_time": "0:04:04", "remaining_time": "0:44:25", "throughput": 16959.91, "total_tokens": 4155008} +{"current_steps": 1320, "total_steps": 15621, "loss": 0.4792, "lr": 1.6877799104286628e-06, "epoch": 0.08450163241789899, "percentage": 8.45, "elapsed_time": "0:04:05", "remaining_time": "0:44:21", "throughput": 16985.7, "total_tokens": 4172544} +{"current_steps": 1325, "total_steps": 15621, "loss": 0.6306, "lr": 1.6941778630838131e-06, "epoch": 0.08482171435887587, "percentage": 8.48, "elapsed_time": "0:04:06", "remaining_time": "0:44:17", "throughput": 17007.8, "total_tokens": 4188416} +{"current_steps": 1330, "total_steps": 15621, "loss": 0.5031, "lr": 1.7005758157389633e-06, "epoch": 0.08514179629985276, "percentage": 8.51, "elapsed_time": "0:04:06", "remaining_time": "0:44:12", "throughput": 17026.04, "total_tokens": 4202560} +{"current_steps": 1335, "total_steps": 15621, "loss": 0.5574, "lr": 1.706973768394114e-06, "epoch": 0.08546187824082965, "percentage": 8.55, "elapsed_time": "0:04:07", "remaining_time": "0:44:08", "throughput": 17049.47, "total_tokens": 4219392} +{"current_steps": 1340, "total_steps": 15621, "loss": 0.4844, "lr": 1.7133717210492643e-06, "epoch": 0.08578196018180655, "percentage": 8.58, "elapsed_time": "0:04:08", "remaining_time": "0:44:04", "throughput": 17071.02, "total_tokens": 4235328} +{"current_steps": 1345, "total_steps": 15621, "loss": 0.5778, "lr": 1.7197696737044144e-06, "epoch": 0.08610204212278343, "percentage": 8.61, "elapsed_time": "0:04:08", "remaining_time": "0:43:59", "throughput": 17091.04, "total_tokens": 4250368} +{"current_steps": 1350, "total_steps": 15621, "loss": 0.4549, "lr": 1.7261676263595648e-06, "epoch": 0.08642212406376032, "percentage": 8.64, "elapsed_time": "0:04:09", "remaining_time": "0:43:55", "throughput": 17111.84, "total_tokens": 4265856} +{"current_steps": 1355, "total_steps": 15621, "loss": 0.6627, "lr": 1.7325655790147152e-06, "epoch": 0.08674220600473721, "percentage": 8.67, "elapsed_time": "0:04:09", "remaining_time": "0:43:51", "throughput": 17133.31, "total_tokens": 4281792} +{"current_steps": 1360, "total_steps": 15621, "loss": 0.5873, "lr": 1.7389635316698656e-06, "epoch": 0.0870622879457141, "percentage": 8.71, "elapsed_time": "0:04:10", "remaining_time": "0:43:46", "throughput": 17153.49, "total_tokens": 4297088} +{"current_steps": 1365, "total_steps": 15621, "loss": 0.5028, "lr": 1.745361484325016e-06, "epoch": 0.087382369886691, "percentage": 8.74, "elapsed_time": "0:04:11", "remaining_time": "0:43:42", "throughput": 17172.61, "total_tokens": 4312192} +{"current_steps": 1370, "total_steps": 15621, "loss": 0.4819, "lr": 1.7517594369801663e-06, "epoch": 0.08770245182766788, "percentage": 8.77, "elapsed_time": "0:04:11", "remaining_time": "0:43:38", "throughput": 17191.03, "total_tokens": 4326720} +{"current_steps": 1375, "total_steps": 15621, "loss": 0.7894, "lr": 1.7581573896353165e-06, "epoch": 0.08802253376864477, "percentage": 8.8, "elapsed_time": "0:04:12", "remaining_time": "0:43:33", "throughput": 17210.28, "total_tokens": 4341760} +{"current_steps": 1380, "total_steps": 15621, "loss": 0.6215, "lr": 1.7645553422904669e-06, "epoch": 0.08834261570962167, "percentage": 8.83, "elapsed_time": "0:04:12", "remaining_time": "0:43:29", "throughput": 17231.69, "total_tokens": 4357760} +{"current_steps": 1385, "total_steps": 15621, "loss": 0.6267, "lr": 1.7709532949456175e-06, "epoch": 0.08866269765059856, "percentage": 8.87, "elapsed_time": "0:04:13", "remaining_time": "0:43:25", "throughput": 17252.74, "total_tokens": 4373824} +{"current_steps": 1390, "total_steps": 15621, "loss": 0.4739, "lr": 1.7773512476007676e-06, "epoch": 0.08898277959157544, "percentage": 8.9, "elapsed_time": "0:04:14", "remaining_time": "0:43:21", "throughput": 17272.25, "total_tokens": 4388992} +{"current_steps": 1395, "total_steps": 15621, "loss": 0.5295, "lr": 1.783749200255918e-06, "epoch": 0.08930286153255233, "percentage": 8.93, "elapsed_time": "0:04:14", "remaining_time": "0:43:17", "throughput": 17291.3, "total_tokens": 4404288} +{"current_steps": 1400, "total_steps": 15621, "loss": 0.5366, "lr": 1.7901471529110684e-06, "epoch": 0.08962294347352923, "percentage": 8.96, "elapsed_time": "0:04:15", "remaining_time": "0:43:13", "throughput": 17311.67, "total_tokens": 4419840} +{"current_steps": 1405, "total_steps": 15621, "loss": 0.5109, "lr": 1.7965451055662186e-06, "epoch": 0.08994302541450612, "percentage": 8.99, "elapsed_time": "0:04:15", "remaining_time": "0:43:09", "throughput": 17331.11, "total_tokens": 4435200} +{"current_steps": 1410, "total_steps": 15621, "loss": 0.6082, "lr": 1.8029430582213691e-06, "epoch": 0.090263107355483, "percentage": 9.03, "elapsed_time": "0:04:16", "remaining_time": "0:43:05", "throughput": 17349.74, "total_tokens": 4450368} +{"current_steps": 1415, "total_steps": 15621, "loss": 0.4889, "lr": 1.8093410108765195e-06, "epoch": 0.09058318929645989, "percentage": 9.06, "elapsed_time": "0:04:17", "remaining_time": "0:43:01", "throughput": 17369.76, "total_tokens": 4466048} +{"current_steps": 1420, "total_steps": 15621, "loss": 0.5985, "lr": 1.8157389635316697e-06, "epoch": 0.09090327123743679, "percentage": 9.09, "elapsed_time": "0:04:17", "remaining_time": "0:42:57", "throughput": 17390.02, "total_tokens": 4481920} +{"current_steps": 1425, "total_steps": 15621, "loss": 0.5671, "lr": 1.82213691618682e-06, "epoch": 0.09122335317841368, "percentage": 9.12, "elapsed_time": "0:04:18", "remaining_time": "0:42:53", "throughput": 17411.01, "total_tokens": 4498112} +{"current_steps": 1430, "total_steps": 15621, "loss": 0.4306, "lr": 1.8285348688419704e-06, "epoch": 0.09154343511939056, "percentage": 9.15, "elapsed_time": "0:04:19", "remaining_time": "0:42:50", "throughput": 17434.17, "total_tokens": 4515648} +{"current_steps": 1435, "total_steps": 15621, "loss": 0.5719, "lr": 1.8349328214971208e-06, "epoch": 0.09186351706036745, "percentage": 9.19, "elapsed_time": "0:04:19", "remaining_time": "0:42:46", "throughput": 17454.49, "total_tokens": 4531840} +{"current_steps": 1440, "total_steps": 15621, "loss": 0.5478, "lr": 1.8413307741522712e-06, "epoch": 0.09218359900134435, "percentage": 9.22, "elapsed_time": "0:04:20", "remaining_time": "0:42:42", "throughput": 17473.75, "total_tokens": 4547456} +{"current_steps": 1445, "total_steps": 15621, "loss": 0.557, "lr": 1.8477287268074216e-06, "epoch": 0.09250368094232124, "percentage": 9.25, "elapsed_time": "0:04:20", "remaining_time": "0:42:39", "throughput": 17493.48, "total_tokens": 4563328} +{"current_steps": 1450, "total_steps": 15621, "loss": 0.5856, "lr": 1.8541266794625718e-06, "epoch": 0.09282376288329812, "percentage": 9.28, "elapsed_time": "0:04:21", "remaining_time": "0:42:35", "throughput": 17512.95, "total_tokens": 4579392} +{"current_steps": 1455, "total_steps": 15621, "loss": 0.6149, "lr": 1.8605246321177221e-06, "epoch": 0.09314384482427501, "percentage": 9.31, "elapsed_time": "0:04:22", "remaining_time": "0:42:31", "throughput": 17533.03, "total_tokens": 4595584} +{"current_steps": 1460, "total_steps": 15621, "loss": 0.5711, "lr": 1.8669225847728727e-06, "epoch": 0.0934639267652519, "percentage": 9.35, "elapsed_time": "0:04:22", "remaining_time": "0:42:27", "throughput": 17549.28, "total_tokens": 4610112} +{"current_steps": 1465, "total_steps": 15621, "loss": 0.6948, "lr": 1.8733205374280229e-06, "epoch": 0.0937840087062288, "percentage": 9.38, "elapsed_time": "0:04:23", "remaining_time": "0:42:24", "throughput": 17569.55, "total_tokens": 4626432} +{"current_steps": 1470, "total_steps": 15621, "loss": 0.5771, "lr": 1.8797184900831733e-06, "epoch": 0.09410409064720568, "percentage": 9.41, "elapsed_time": "0:04:23", "remaining_time": "0:42:20", "throughput": 17587.71, "total_tokens": 4641792} +{"current_steps": 1475, "total_steps": 15621, "loss": 0.4046, "lr": 1.8861164427383236e-06, "epoch": 0.09442417258818257, "percentage": 9.44, "elapsed_time": "0:04:24", "remaining_time": "0:42:16", "throughput": 17605.07, "total_tokens": 4656896} +{"current_steps": 1480, "total_steps": 15621, "loss": 0.605, "lr": 1.8925143953934738e-06, "epoch": 0.09474425452915947, "percentage": 9.47, "elapsed_time": "0:04:25", "remaining_time": "0:42:13", "throughput": 17624.7, "total_tokens": 4673472} +{"current_steps": 1485, "total_steps": 15621, "loss": 0.426, "lr": 1.8989123480486244e-06, "epoch": 0.09506433647013636, "percentage": 9.51, "elapsed_time": "0:04:25", "remaining_time": "0:42:09", "throughput": 17642.54, "total_tokens": 4688896} +{"current_steps": 1490, "total_steps": 15621, "loss": 0.6785, "lr": 1.9053103007037748e-06, "epoch": 0.09538441841111324, "percentage": 9.54, "elapsed_time": "0:04:26", "remaining_time": "0:42:06", "throughput": 17660.7, "total_tokens": 4704576} +{"current_steps": 1495, "total_steps": 15621, "loss": 0.6069, "lr": 1.911708253358925e-06, "epoch": 0.09570450035209013, "percentage": 9.57, "elapsed_time": "0:04:26", "remaining_time": "0:42:02", "throughput": 17676.89, "total_tokens": 4719040} +{"current_steps": 1500, "total_steps": 15621, "loss": 0.4831, "lr": 1.9181062060140753e-06, "epoch": 0.09602458229306703, "percentage": 9.6, "elapsed_time": "0:04:27", "remaining_time": "0:41:58", "throughput": 17693.63, "total_tokens": 4733696} +{"current_steps": 1505, "total_steps": 15621, "loss": 0.6045, "lr": 1.9245041586692255e-06, "epoch": 0.09634466423404392, "percentage": 9.63, "elapsed_time": "0:04:28", "remaining_time": "0:41:54", "throughput": 17711.34, "total_tokens": 4748992} +{"current_steps": 1510, "total_steps": 15621, "loss": 0.6876, "lr": 1.930902111324376e-06, "epoch": 0.0966647461750208, "percentage": 9.67, "elapsed_time": "0:04:28", "remaining_time": "0:41:51", "throughput": 17730.21, "total_tokens": 4764992} +{"current_steps": 1515, "total_steps": 15621, "loss": 0.6773, "lr": 1.9373000639795267e-06, "epoch": 0.09698482811599769, "percentage": 9.7, "elapsed_time": "0:04:29", "remaining_time": "0:41:47", "throughput": 17747.51, "total_tokens": 4780352} +{"current_steps": 1520, "total_steps": 15621, "loss": 0.5393, "lr": 1.943698016634677e-06, "epoch": 0.09730491005697459, "percentage": 9.73, "elapsed_time": "0:04:29", "remaining_time": "0:41:44", "throughput": 17765.38, "total_tokens": 4796224} +{"current_steps": 1525, "total_steps": 15621, "loss": 0.5401, "lr": 1.950095969289827e-06, "epoch": 0.09762499199795148, "percentage": 9.76, "elapsed_time": "0:04:30", "remaining_time": "0:41:41", "throughput": 17782.85, "total_tokens": 4811840} +{"current_steps": 1530, "total_steps": 15621, "loss": 0.5811, "lr": 1.9564939219449776e-06, "epoch": 0.09794507393892836, "percentage": 9.79, "elapsed_time": "0:04:31", "remaining_time": "0:41:37", "throughput": 17798.53, "total_tokens": 4826432} +{"current_steps": 1535, "total_steps": 15621, "loss": 0.393, "lr": 1.9628918746001278e-06, "epoch": 0.09826515587990525, "percentage": 9.83, "elapsed_time": "0:04:31", "remaining_time": "0:41:33", "throughput": 17815.96, "total_tokens": 4841920} +{"current_steps": 1540, "total_steps": 15621, "loss": 0.5971, "lr": 1.9692898272552783e-06, "epoch": 0.09858523782088215, "percentage": 9.86, "elapsed_time": "0:04:32", "remaining_time": "0:41:30", "throughput": 17833.68, "total_tokens": 4857536} +{"current_steps": 1545, "total_steps": 15621, "loss": 0.6844, "lr": 1.9756877799104285e-06, "epoch": 0.09890531976185904, "percentage": 9.89, "elapsed_time": "0:04:32", "remaining_time": "0:41:27", "throughput": 17851.69, "total_tokens": 4873408} +{"current_steps": 1550, "total_steps": 15621, "loss": 0.5973, "lr": 1.9820857325655787e-06, "epoch": 0.09922540170283592, "percentage": 9.92, "elapsed_time": "0:04:33", "remaining_time": "0:41:23", "throughput": 17869.92, "total_tokens": 4889536} +{"current_steps": 1555, "total_steps": 15621, "loss": 0.627, "lr": 1.9884836852207293e-06, "epoch": 0.09954548364381281, "percentage": 9.95, "elapsed_time": "0:04:34", "remaining_time": "0:41:20", "throughput": 17885.91, "total_tokens": 4904448} +{"current_steps": 1560, "total_steps": 15621, "loss": 0.6569, "lr": 1.99488163787588e-06, "epoch": 0.0998655655847897, "percentage": 9.99, "elapsed_time": "0:04:34", "remaining_time": "0:41:16", "throughput": 17902.01, "total_tokens": 4919616} +{"current_steps": 1564, "total_steps": 15621, "eval_loss": 0.5394634008407593, "epoch": 0.10012163113757122, "percentage": 10.01, "elapsed_time": "0:05:24", "remaining_time": "0:48:36", "throughput": 15199.04, "total_tokens": 4931328} +{"current_steps": 1565, "total_steps": 15621, "loss": 0.516, "lr": 1.9999999750297625e-06, "epoch": 0.1001856475257666, "percentage": 10.02, "elapsed_time": "0:06:07", "remaining_time": "0:54:58", "throughput": 13437.08, "total_tokens": 4934144} +{"current_steps": 1570, "total_steps": 15621, "loss": 0.523, "lr": 1.9999991010715873e-06, "epoch": 0.1005057294667435, "percentage": 10.05, "elapsed_time": "0:06:07", "remaining_time": "0:54:51", "throughput": 13458.09, "total_tokens": 4950272} +{"current_steps": 1575, "total_steps": 15621, "loss": 0.5346, "lr": 1.999996978602793e-06, "epoch": 0.10082581140772037, "percentage": 10.08, "elapsed_time": "0:06:08", "remaining_time": "0:54:45", "throughput": 13476.85, "total_tokens": 4965056} +{"current_steps": 1580, "total_steps": 15621, "loss": 0.5489, "lr": 1.99999360762603e-06, "epoch": 0.10114589334869727, "percentage": 10.11, "elapsed_time": "0:06:09", "remaining_time": "0:54:39", "throughput": 13495.84, "total_tokens": 4980160} +{"current_steps": 1585, "total_steps": 15621, "loss": 0.453, "lr": 1.9999889881455065e-06, "epoch": 0.10146597528967416, "percentage": 10.15, "elapsed_time": "0:06:09", "remaining_time": "0:54:33", "throughput": 13517.74, "total_tokens": 4996992} +{"current_steps": 1590, "total_steps": 15621, "loss": 0.5146, "lr": 1.9999831201669897e-06, "epoch": 0.10178605723065105, "percentage": 10.18, "elapsed_time": "0:06:10", "remaining_time": "0:54:27", "throughput": 13537.87, "total_tokens": 5012608} +{"current_steps": 1595, "total_steps": 15621, "loss": 0.4848, "lr": 1.9999760036978067e-06, "epoch": 0.10210613917162793, "percentage": 10.21, "elapsed_time": "0:06:10", "remaining_time": "0:54:21", "throughput": 13557.1, "total_tokens": 5027840} +{"current_steps": 1600, "total_steps": 15621, "loss": 0.5746, "lr": 1.9999676387468417e-06, "epoch": 0.10242622111260483, "percentage": 10.24, "elapsed_time": "0:06:11", "remaining_time": "0:54:15", "throughput": 13575.9, "total_tokens": 5042752} +{"current_steps": 1605, "total_steps": 15621, "loss": 0.5487, "lr": 1.999958025324539e-06, "epoch": 0.10274630305358172, "percentage": 10.27, "elapsed_time": "0:06:12", "remaining_time": "0:54:09", "throughput": 13596.1, "total_tokens": 5058624} +{"current_steps": 1610, "total_steps": 15621, "loss": 0.6233, "lr": 1.999947163442901e-06, "epoch": 0.10306638499455861, "percentage": 10.31, "elapsed_time": "0:06:12", "remaining_time": "0:54:03", "throughput": 13617.23, "total_tokens": 5075008} +{"current_steps": 1615, "total_steps": 15621, "loss": 0.5332, "lr": 1.9999350531154884e-06, "epoch": 0.10338646693553549, "percentage": 10.34, "elapsed_time": "0:06:13", "remaining_time": "0:53:57", "throughput": 13637.33, "total_tokens": 5090880} +{"current_steps": 1620, "total_steps": 15621, "loss": 0.5713, "lr": 1.9999216943574223e-06, "epoch": 0.10370654887651239, "percentage": 10.37, "elapsed_time": "0:06:13", "remaining_time": "0:53:51", "throughput": 13657.18, "total_tokens": 5106816} +{"current_steps": 1625, "total_steps": 15621, "loss": 0.4563, "lr": 1.9999070871853796e-06, "epoch": 0.10402663081748928, "percentage": 10.4, "elapsed_time": "0:06:14", "remaining_time": "0:53:46", "throughput": 13678.99, "total_tokens": 5123904} +{"current_steps": 1630, "total_steps": 15621, "loss": 0.4954, "lr": 1.9998912316175986e-06, "epoch": 0.10434671275846617, "percentage": 10.43, "elapsed_time": "0:06:15", "remaining_time": "0:53:40", "throughput": 13699.5, "total_tokens": 5140160} +{"current_steps": 1635, "total_steps": 15621, "loss": 0.5159, "lr": 1.9998741276738752e-06, "epoch": 0.10466679469944305, "percentage": 10.47, "elapsed_time": "0:06:15", "remaining_time": "0:53:34", "throughput": 13719.42, "total_tokens": 5156288} +{"current_steps": 1640, "total_steps": 15621, "loss": 0.5823, "lr": 1.999855775375563e-06, "epoch": 0.10498687664041995, "percentage": 10.5, "elapsed_time": "0:06:16", "remaining_time": "0:53:29", "throughput": 13738.6, "total_tokens": 5171776} +{"current_steps": 1645, "total_steps": 15621, "loss": 0.683, "lr": 1.999836174745576e-06, "epoch": 0.10530695858139684, "percentage": 10.53, "elapsed_time": "0:06:17", "remaining_time": "0:53:23", "throughput": 13761.13, "total_tokens": 5189504} +{"current_steps": 1650, "total_steps": 15621, "loss": 0.5783, "lr": 1.9998153258083853e-06, "epoch": 0.10562704052237373, "percentage": 10.56, "elapsed_time": "0:06:17", "remaining_time": "0:53:18", "throughput": 13780.38, "total_tokens": 5205056} +{"current_steps": 1655, "total_steps": 15621, "loss": 0.586, "lr": 1.9997932285900214e-06, "epoch": 0.10594712246335061, "percentage": 10.59, "elapsed_time": "0:06:18", "remaining_time": "0:53:13", "throughput": 13802.75, "total_tokens": 5222656} +{"current_steps": 1660, "total_steps": 15621, "loss": 0.6272, "lr": 1.9997698831180726e-06, "epoch": 0.1062672044043275, "percentage": 10.63, "elapsed_time": "0:06:19", "remaining_time": "0:53:07", "throughput": 13822.59, "total_tokens": 5238848} +{"current_steps": 1665, "total_steps": 15621, "loss": 0.5203, "lr": 1.999745289421686e-06, "epoch": 0.1065872863453044, "percentage": 10.66, "elapsed_time": "0:06:19", "remaining_time": "0:53:02", "throughput": 13843.03, "total_tokens": 5255296} +{"current_steps": 1670, "total_steps": 15621, "loss": 0.7716, "lr": 1.9997194475315674e-06, "epoch": 0.10690736828628129, "percentage": 10.69, "elapsed_time": "0:06:20", "remaining_time": "0:52:56", "throughput": 13861.05, "total_tokens": 5270336} +{"current_steps": 1675, "total_steps": 15621, "loss": 0.4842, "lr": 1.9996923574799808e-06, "epoch": 0.10722745022725817, "percentage": 10.72, "elapsed_time": "0:06:20", "remaining_time": "0:52:50", "throughput": 13881.16, "total_tokens": 5286720} +{"current_steps": 1680, "total_steps": 15621, "loss": 0.6428, "lr": 1.9996640193007476e-06, "epoch": 0.10754753216823507, "percentage": 10.75, "elapsed_time": "0:06:21", "remaining_time": "0:52:45", "throughput": 13898.87, "total_tokens": 5301632} +{"current_steps": 1685, "total_steps": 15621, "loss": 0.403, "lr": 1.9996344330292495e-06, "epoch": 0.10786761410921196, "percentage": 10.79, "elapsed_time": "0:06:22", "remaining_time": "0:52:39", "throughput": 13916.51, "total_tokens": 5316544} +{"current_steps": 1690, "total_steps": 15621, "loss": 0.5503, "lr": 1.9996035987024245e-06, "epoch": 0.10818769605018885, "percentage": 10.82, "elapsed_time": "0:06:22", "remaining_time": "0:52:34", "throughput": 13935.64, "total_tokens": 5332544} +{"current_steps": 1695, "total_steps": 15621, "loss": 0.5388, "lr": 1.99957151635877e-06, "epoch": 0.10850777799116573, "percentage": 10.85, "elapsed_time": "0:06:23", "remaining_time": "0:52:28", "throughput": 13954.12, "total_tokens": 5348096} +{"current_steps": 1700, "total_steps": 15621, "loss": 0.6275, "lr": 1.999538186038341e-06, "epoch": 0.10882785993214263, "percentage": 10.88, "elapsed_time": "0:06:23", "remaining_time": "0:52:23", "throughput": 13970.51, "total_tokens": 5362368} +{"current_steps": 1705, "total_steps": 15621, "loss": 0.5426, "lr": 1.999503607782751e-06, "epoch": 0.10914794187311952, "percentage": 10.91, "elapsed_time": "0:06:24", "remaining_time": "0:52:17", "throughput": 13989.29, "total_tokens": 5378176} +{"current_steps": 1710, "total_steps": 15621, "loss": 0.5163, "lr": 1.999467781635171e-06, "epoch": 0.10946802381409641, "percentage": 10.95, "elapsed_time": "0:06:25", "remaining_time": "0:52:12", "throughput": 14009.26, "total_tokens": 5394752} +{"current_steps": 1715, "total_steps": 15621, "loss": 0.6991, "lr": 1.9994307076403306e-06, "epoch": 0.10978810575507329, "percentage": 10.98, "elapsed_time": "0:06:25", "remaining_time": "0:52:07", "throughput": 14030.5, "total_tokens": 5412160} +{"current_steps": 1720, "total_steps": 15621, "loss": 0.5245, "lr": 1.999392385844517e-06, "epoch": 0.11010818769605019, "percentage": 11.01, "elapsed_time": "0:06:26", "remaining_time": "0:52:02", "throughput": 14048.81, "total_tokens": 5427840} +{"current_steps": 1725, "total_steps": 15621, "loss": 0.4035, "lr": 1.9993528162955753e-06, "epoch": 0.11042826963702708, "percentage": 11.04, "elapsed_time": "0:06:26", "remaining_time": "0:51:57", "throughput": 14068.19, "total_tokens": 5444224} +{"current_steps": 1730, "total_steps": 15621, "loss": 0.5767, "lr": 1.9993119990429095e-06, "epoch": 0.11074835157800397, "percentage": 11.07, "elapsed_time": "0:06:27", "remaining_time": "0:51:52", "throughput": 14085.98, "total_tokens": 5459648} +{"current_steps": 1735, "total_steps": 15621, "loss": 0.7821, "lr": 1.9992699341374794e-06, "epoch": 0.11106843351898085, "percentage": 11.11, "elapsed_time": "0:06:28", "remaining_time": "0:51:46", "throughput": 14103.62, "total_tokens": 5475008} +{"current_steps": 1740, "total_steps": 15621, "loss": 0.5285, "lr": 1.9992266216318033e-06, "epoch": 0.11138851545995775, "percentage": 11.14, "elapsed_time": "0:06:28", "remaining_time": "0:51:41", "throughput": 14122.77, "total_tokens": 5491456} +{"current_steps": 1745, "total_steps": 15621, "loss": 0.5674, "lr": 1.9991820615799583e-06, "epoch": 0.11170859740093464, "percentage": 11.17, "elapsed_time": "0:06:29", "remaining_time": "0:51:36", "throughput": 14141.45, "total_tokens": 5507520} +{"current_steps": 1750, "total_steps": 15621, "loss": 0.6917, "lr": 1.999136254037578e-06, "epoch": 0.11202867934191153, "percentage": 11.2, "elapsed_time": "0:06:30", "remaining_time": "0:51:31", "throughput": 14159.28, "total_tokens": 5523072} +{"current_steps": 1755, "total_steps": 15621, "loss": 0.5094, "lr": 1.999089199061853e-06, "epoch": 0.11234876128288843, "percentage": 11.23, "elapsed_time": "0:06:30", "remaining_time": "0:51:26", "throughput": 14176.49, "total_tokens": 5538304} +{"current_steps": 1760, "total_steps": 15621, "loss": 0.4612, "lr": 1.9990408967115326e-06, "epoch": 0.1126688432238653, "percentage": 11.27, "elapsed_time": "0:06:31", "remaining_time": "0:51:21", "throughput": 14194.39, "total_tokens": 5553920} +{"current_steps": 1765, "total_steps": 15621, "loss": 0.4599, "lr": 1.998991347046922e-06, "epoch": 0.1129889251648422, "percentage": 11.3, "elapsed_time": "0:06:31", "remaining_time": "0:51:16", "throughput": 14211.69, "total_tokens": 5569344} +{"current_steps": 1770, "total_steps": 15621, "loss": 0.5104, "lr": 1.9989405501298857e-06, "epoch": 0.11330900710581909, "percentage": 11.33, "elapsed_time": "0:06:32", "remaining_time": "0:51:11", "throughput": 14231.06, "total_tokens": 5585856} +{"current_steps": 1775, "total_steps": 15621, "loss": 0.5755, "lr": 1.9988885060238436e-06, "epoch": 0.11362908904679599, "percentage": 11.36, "elapsed_time": "0:06:33", "remaining_time": "0:51:07", "throughput": 14252.32, "total_tokens": 5603840} +{"current_steps": 1780, "total_steps": 15621, "loss": 0.5167, "lr": 1.9988352147937735e-06, "epoch": 0.11394917098777287, "percentage": 11.39, "elapsed_time": "0:06:33", "remaining_time": "0:51:02", "throughput": 14271.05, "total_tokens": 5620352} +{"current_steps": 1785, "total_steps": 15621, "loss": 0.5382, "lr": 1.99878067650621e-06, "epoch": 0.11426925292874976, "percentage": 11.43, "elapsed_time": "0:06:34", "remaining_time": "0:50:57", "throughput": 14289.09, "total_tokens": 5636544} +{"current_steps": 1790, "total_steps": 15621, "loss": 0.5438, "lr": 1.998724891229245e-06, "epoch": 0.11458933486972665, "percentage": 11.46, "elapsed_time": "0:06:35", "remaining_time": "0:50:52", "throughput": 14307.1, "total_tokens": 5652672} +{"current_steps": 1795, "total_steps": 15621, "loss": 0.4956, "lr": 1.998667859032527e-06, "epoch": 0.11490941681070355, "percentage": 11.49, "elapsed_time": "0:06:35", "remaining_time": "0:50:47", "throughput": 14324.31, "total_tokens": 5668224} +{"current_steps": 1800, "total_steps": 15621, "loss": 0.4506, "lr": 1.9986095799872613e-06, "epoch": 0.11522949875168043, "percentage": 11.52, "elapsed_time": "0:06:36", "remaining_time": "0:50:43", "throughput": 14342.73, "total_tokens": 5684480} +{"current_steps": 1805, "total_steps": 15621, "loss": 0.472, "lr": 1.99855005416621e-06, "epoch": 0.11554958069265732, "percentage": 11.55, "elapsed_time": "0:06:36", "remaining_time": "0:50:38", "throughput": 14361.01, "total_tokens": 5700864} +{"current_steps": 1810, "total_steps": 15621, "loss": 0.5965, "lr": 1.998489281643692e-06, "epoch": 0.11586966263363421, "percentage": 11.59, "elapsed_time": "0:06:37", "remaining_time": "0:50:33", "throughput": 14377.85, "total_tokens": 5716224} +{"current_steps": 1815, "total_steps": 15621, "loss": 0.4977, "lr": 1.998427262495582e-06, "epoch": 0.1161897445746111, "percentage": 11.62, "elapsed_time": "0:06:38", "remaining_time": "0:50:29", "throughput": 14396.77, "total_tokens": 5733056} +{"current_steps": 1820, "total_steps": 15621, "loss": 0.6683, "lr": 1.9983639967993124e-06, "epoch": 0.11650982651558799, "percentage": 11.65, "elapsed_time": "0:06:38", "remaining_time": "0:50:24", "throughput": 14414.48, "total_tokens": 5749120} +{"current_steps": 1825, "total_steps": 15621, "loss": 0.7297, "lr": 1.99829948463387e-06, "epoch": 0.11682990845656488, "percentage": 11.68, "elapsed_time": "0:06:39", "remaining_time": "0:50:19", "throughput": 14430.34, "total_tokens": 5763968} +{"current_steps": 1830, "total_steps": 15621, "loss": 0.543, "lr": 1.9982337260798e-06, "epoch": 0.11714999039754177, "percentage": 11.71, "elapsed_time": "0:06:40", "remaining_time": "0:50:14", "throughput": 14447.24, "total_tokens": 5779520} +{"current_steps": 1835, "total_steps": 15621, "loss": 0.5856, "lr": 1.998166721219203e-06, "epoch": 0.11747007233851867, "percentage": 11.75, "elapsed_time": "0:06:40", "remaining_time": "0:50:10", "throughput": 14469.24, "total_tokens": 5798848} +{"current_steps": 1840, "total_steps": 15621, "loss": 0.5155, "lr": 1.9980984701357338e-06, "epoch": 0.11779015427949555, "percentage": 11.78, "elapsed_time": "0:06:41", "remaining_time": "0:50:06", "throughput": 14485.42, "total_tokens": 5813952} +{"current_steps": 1845, "total_steps": 15621, "loss": 0.4362, "lr": 1.998028972914606e-06, "epoch": 0.11811023622047244, "percentage": 11.81, "elapsed_time": "0:06:41", "remaining_time": "0:50:01", "throughput": 14502.85, "total_tokens": 5830016} +{"current_steps": 1850, "total_steps": 15621, "loss": 0.5893, "lr": 1.9979582296425877e-06, "epoch": 0.11843031816144933, "percentage": 11.84, "elapsed_time": "0:06:42", "remaining_time": "0:49:56", "throughput": 14519.04, "total_tokens": 5845312} +{"current_steps": 1855, "total_steps": 15621, "loss": 0.5851, "lr": 1.9978862404080022e-06, "epoch": 0.11875040010242623, "percentage": 11.88, "elapsed_time": "0:06:43", "remaining_time": "0:49:52", "throughput": 14535.34, "total_tokens": 5860672} +{"current_steps": 1860, "total_steps": 15621, "loss": 0.5376, "lr": 1.9978130053007295e-06, "epoch": 0.1190704820434031, "percentage": 11.91, "elapsed_time": "0:06:44", "remaining_time": "0:49:53", "throughput": 14524.04, "total_tokens": 5875776} +{"current_steps": 1865, "total_steps": 15621, "loss": 0.4319, "lr": 1.9977385244122034e-06, "epoch": 0.11939056398438, "percentage": 11.94, "elapsed_time": "0:06:45", "remaining_time": "0:49:48", "throughput": 14540.38, "total_tokens": 5891200} +{"current_steps": 1870, "total_steps": 15621, "loss": 0.4821, "lr": 1.997662797835415e-06, "epoch": 0.11971064592535689, "percentage": 11.97, "elapsed_time": "0:06:45", "remaining_time": "0:49:43", "throughput": 14557.16, "total_tokens": 5907008} +{"current_steps": 1875, "total_steps": 15621, "loss": 0.4645, "lr": 1.9975858256649097e-06, "epoch": 0.12003072786633379, "percentage": 12.0, "elapsed_time": "0:06:46", "remaining_time": "0:49:39", "throughput": 14574.47, "total_tokens": 5923264} +{"current_steps": 1880, "total_steps": 15621, "loss": 0.4911, "lr": 1.997507607996788e-06, "epoch": 0.12035080980731067, "percentage": 12.04, "elapsed_time": "0:06:47", "remaining_time": "0:49:35", "throughput": 14592.14, "total_tokens": 5939648} +{"current_steps": 1885, "total_steps": 15621, "loss": 0.4557, "lr": 1.997428144928706e-06, "epoch": 0.12067089174828756, "percentage": 12.07, "elapsed_time": "0:06:47", "remaining_time": "0:49:30", "throughput": 14609.34, "total_tokens": 5955520} +{"current_steps": 1890, "total_steps": 15621, "loss": 0.5237, "lr": 1.9973474365598736e-06, "epoch": 0.12099097368926445, "percentage": 12.1, "elapsed_time": "0:06:48", "remaining_time": "0:49:26", "throughput": 14625.67, "total_tokens": 5971072} +{"current_steps": 1895, "total_steps": 15621, "loss": 0.5787, "lr": 1.9972654829910568e-06, "epoch": 0.12131105563024135, "percentage": 12.13, "elapsed_time": "0:06:48", "remaining_time": "0:49:21", "throughput": 14642.68, "total_tokens": 5987264} +{"current_steps": 1900, "total_steps": 15621, "loss": 0.6193, "lr": 1.9971822843245748e-06, "epoch": 0.12163113757121823, "percentage": 12.16, "elapsed_time": "0:06:49", "remaining_time": "0:49:17", "throughput": 14659.05, "total_tokens": 6002880} +{"current_steps": 1905, "total_steps": 15621, "loss": 0.5277, "lr": 1.997097840664303e-06, "epoch": 0.12195121951219512, "percentage": 12.2, "elapsed_time": "0:06:50", "remaining_time": "0:49:12", "throughput": 14676.91, "total_tokens": 6019520} +{"current_steps": 1910, "total_steps": 15621, "loss": 0.5641, "lr": 1.99701215211567e-06, "epoch": 0.12227130145317201, "percentage": 12.23, "elapsed_time": "0:06:50", "remaining_time": "0:49:08", "throughput": 14694.24, "total_tokens": 6035904} +{"current_steps": 1915, "total_steps": 15621, "loss": 0.6009, "lr": 1.9969252187856587e-06, "epoch": 0.1225913833941489, "percentage": 12.26, "elapsed_time": "0:06:51", "remaining_time": "0:49:04", "throughput": 14709.44, "total_tokens": 6050816} +{"current_steps": 1920, "total_steps": 15621, "loss": 0.4204, "lr": 1.9968370407828065e-06, "epoch": 0.12291146533512579, "percentage": 12.29, "elapsed_time": "0:06:51", "remaining_time": "0:48:59", "throughput": 14725.04, "total_tokens": 6065920} +{"current_steps": 1925, "total_steps": 15621, "loss": 0.5962, "lr": 1.996747618217205e-06, "epoch": 0.12323154727610268, "percentage": 12.32, "elapsed_time": "0:06:52", "remaining_time": "0:48:55", "throughput": 14741.28, "total_tokens": 6081728} +{"current_steps": 1930, "total_steps": 15621, "loss": 0.4945, "lr": 1.9966569512004987e-06, "epoch": 0.12355162921707957, "percentage": 12.36, "elapsed_time": "0:06:53", "remaining_time": "0:48:50", "throughput": 14757.77, "total_tokens": 6097472} +{"current_steps": 1935, "total_steps": 15621, "loss": 0.5101, "lr": 1.996565039845887e-06, "epoch": 0.12387171115805647, "percentage": 12.39, "elapsed_time": "0:06:53", "remaining_time": "0:48:46", "throughput": 14773.84, "total_tokens": 6113152} +{"current_steps": 1940, "total_steps": 15621, "loss": 0.614, "lr": 1.996471884268122e-06, "epoch": 0.12419179309903335, "percentage": 12.42, "elapsed_time": "0:06:54", "remaining_time": "0:48:42", "throughput": 14790.91, "total_tokens": 6129408} +{"current_steps": 1945, "total_steps": 15621, "loss": 0.545, "lr": 1.9963774845835097e-06, "epoch": 0.12451187504001024, "percentage": 12.45, "elapsed_time": "0:06:55", "remaining_time": "0:48:38", "throughput": 14806.89, "total_tokens": 6144896} +{"current_steps": 1950, "total_steps": 15621, "loss": 0.5868, "lr": 1.996281840909909e-06, "epoch": 0.12483195698098713, "percentage": 12.48, "elapsed_time": "0:06:55", "remaining_time": "0:48:33", "throughput": 14822.38, "total_tokens": 6160256} +{"current_steps": 1955, "total_steps": 15621, "loss": 0.6354, "lr": 1.9961849533667322e-06, "epoch": 0.12515203892196403, "percentage": 12.52, "elapsed_time": "0:06:56", "remaining_time": "0:48:29", "throughput": 14837.33, "total_tokens": 6175104} +{"current_steps": 1960, "total_steps": 15621, "loss": 0.5185, "lr": 1.9960868220749447e-06, "epoch": 0.1254721208629409, "percentage": 12.55, "elapsed_time": "0:06:56", "remaining_time": "0:48:24", "throughput": 14852.43, "total_tokens": 6190272} +{"current_steps": 1965, "total_steps": 15621, "loss": 0.5855, "lr": 1.9959874471570644e-06, "epoch": 0.1257922028039178, "percentage": 12.58, "elapsed_time": "0:06:57", "remaining_time": "0:48:20", "throughput": 14868.1, "total_tokens": 6205952} +{"current_steps": 1970, "total_steps": 15621, "loss": 0.56, "lr": 1.9958868287371625e-06, "epoch": 0.1261122847448947, "percentage": 12.61, "elapsed_time": "0:06:58", "remaining_time": "0:48:16", "throughput": 14885.3, "total_tokens": 6222592} +{"current_steps": 1975, "total_steps": 15621, "loss": 0.4803, "lr": 1.9957849669408617e-06, "epoch": 0.12643236668587157, "percentage": 12.64, "elapsed_time": "0:06:58", "remaining_time": "0:48:12", "throughput": 14900.38, "total_tokens": 6237696} +{"current_steps": 1980, "total_steps": 15621, "loss": 0.4858, "lr": 1.995681861895338e-06, "epoch": 0.12675244862684848, "percentage": 12.68, "elapsed_time": "0:06:59", "remaining_time": "0:48:08", "throughput": 14917.17, "total_tokens": 6254080} +{"current_steps": 1985, "total_steps": 15621, "loss": 0.5741, "lr": 1.9955775137293187e-06, "epoch": 0.12707253056782536, "percentage": 12.71, "elapsed_time": "0:06:59", "remaining_time": "0:48:04", "throughput": 14932.87, "total_tokens": 6270016} +{"current_steps": 1990, "total_steps": 15621, "loss": 0.6124, "lr": 1.9954719225730845e-06, "epoch": 0.12739261250880227, "percentage": 12.74, "elapsed_time": "0:07:00", "remaining_time": "0:48:00", "throughput": 14947.96, "total_tokens": 6285184} +{"current_steps": 1995, "total_steps": 15621, "loss": 0.4774, "lr": 1.9953650885584666e-06, "epoch": 0.12771269444977915, "percentage": 12.77, "elapsed_time": "0:07:01", "remaining_time": "0:47:56", "throughput": 14963.51, "total_tokens": 6300992} +{"current_steps": 2000, "total_steps": 15621, "loss": 0.5445, "lr": 1.995257011818849e-06, "epoch": 0.12803277639075603, "percentage": 12.8, "elapsed_time": "0:07:01", "remaining_time": "0:47:51", "throughput": 14977.13, "total_tokens": 6315392} +{"current_steps": 2005, "total_steps": 15621, "loss": 0.4739, "lr": 1.9951476924891666e-06, "epoch": 0.12835285833173293, "percentage": 12.84, "elapsed_time": "0:07:02", "remaining_time": "0:47:47", "throughput": 14992.28, "total_tokens": 6331136} +{"current_steps": 2010, "total_steps": 15621, "loss": 0.5553, "lr": 1.9950371307059056e-06, "epoch": 0.1286729402727098, "percentage": 12.87, "elapsed_time": "0:07:02", "remaining_time": "0:47:43", "throughput": 15008.58, "total_tokens": 6347584} +{"current_steps": 2015, "total_steps": 15621, "loss": 0.5728, "lr": 1.9949253266071036e-06, "epoch": 0.1289930222136867, "percentage": 12.9, "elapsed_time": "0:07:03", "remaining_time": "0:47:39", "throughput": 15023.08, "total_tokens": 6362560} +{"current_steps": 2020, "total_steps": 15621, "loss": 0.5075, "lr": 1.9948122803323503e-06, "epoch": 0.1293131041546636, "percentage": 12.93, "elapsed_time": "0:07:04", "remaining_time": "0:47:35", "throughput": 15038.66, "total_tokens": 6378304} +{"current_steps": 2025, "total_steps": 15621, "loss": 0.5147, "lr": 1.9946979920227844e-06, "epoch": 0.12963318609564048, "percentage": 12.96, "elapsed_time": "0:07:04", "remaining_time": "0:47:31", "throughput": 15053.16, "total_tokens": 6393280} +{"current_steps": 2030, "total_steps": 15621, "loss": 0.5251, "lr": 1.994582461821096e-06, "epoch": 0.1299532680366174, "percentage": 13.0, "elapsed_time": "0:07:05", "remaining_time": "0:47:27", "throughput": 15069.01, "total_tokens": 6409472} +{"current_steps": 2035, "total_steps": 15621, "loss": 0.7157, "lr": 1.9944656898715267e-06, "epoch": 0.13027334997759427, "percentage": 13.03, "elapsed_time": "0:07:05", "remaining_time": "0:47:23", "throughput": 15083.99, "total_tokens": 6424960} +{"current_steps": 2040, "total_steps": 15621, "loss": 0.6057, "lr": 1.994347676319867e-06, "epoch": 0.13059343191857115, "percentage": 13.06, "elapsed_time": "0:07:06", "remaining_time": "0:47:19", "throughput": 15098.35, "total_tokens": 6440000} +{"current_steps": 2045, "total_steps": 15621, "loss": 0.453, "lr": 1.994228421313459e-06, "epoch": 0.13091351385954805, "percentage": 13.09, "elapsed_time": "0:07:07", "remaining_time": "0:47:16", "throughput": 15116.11, "total_tokens": 6457600} +{"current_steps": 2050, "total_steps": 15621, "loss": 0.5143, "lr": 1.994107925001193e-06, "epoch": 0.13123359580052493, "percentage": 13.12, "elapsed_time": "0:07:07", "remaining_time": "0:47:12", "throughput": 15130.99, "total_tokens": 6473088} +{"current_steps": 2055, "total_steps": 15621, "loss": 0.6013, "lr": 1.9939861875335108e-06, "epoch": 0.1315536777415018, "percentage": 13.16, "elapsed_time": "0:07:08", "remaining_time": "0:47:07", "throughput": 15144.51, "total_tokens": 6487680} +{"current_steps": 2060, "total_steps": 15621, "loss": 0.4831, "lr": 1.9938632090624025e-06, "epoch": 0.13187375968247872, "percentage": 13.19, "elapsed_time": "0:07:08", "remaining_time": "0:47:04", "throughput": 15159.5, "total_tokens": 6503296} +{"current_steps": 2065, "total_steps": 15621, "loss": 0.5363, "lr": 1.9937389897414087e-06, "epoch": 0.1321938416234556, "percentage": 13.22, "elapsed_time": "0:07:09", "remaining_time": "0:47:00", "throughput": 15174.43, "total_tokens": 6518912} +{"current_steps": 2070, "total_steps": 15621, "loss": 0.5631, "lr": 1.993613529725618e-06, "epoch": 0.1325139235644325, "percentage": 13.25, "elapsed_time": "0:07:10", "remaining_time": "0:46:56", "throughput": 15189.53, "total_tokens": 6534784} +{"current_steps": 2075, "total_steps": 15621, "loss": 0.5248, "lr": 1.99348682917167e-06, "epoch": 0.13283400550540939, "percentage": 13.28, "elapsed_time": "0:07:10", "remaining_time": "0:46:52", "throughput": 15204.47, "total_tokens": 6550528} +{"current_steps": 2080, "total_steps": 15621, "loss": 0.5344, "lr": 1.99335888823775e-06, "epoch": 0.13315408744638627, "percentage": 13.32, "elapsed_time": "0:07:11", "remaining_time": "0:46:48", "throughput": 15219.15, "total_tokens": 6566144} +{"current_steps": 2085, "total_steps": 15621, "loss": 0.5605, "lr": 1.993229707083595e-06, "epoch": 0.13347416938736317, "percentage": 13.35, "elapsed_time": "0:07:12", "remaining_time": "0:46:45", "throughput": 15236.44, "total_tokens": 6583872} +{"current_steps": 2090, "total_steps": 15621, "loss": 0.4144, "lr": 1.993099285870489e-06, "epoch": 0.13379425132834005, "percentage": 13.38, "elapsed_time": "0:07:12", "remaining_time": "0:46:42", "throughput": 15254.61, "total_tokens": 6602304} +{"current_steps": 2095, "total_steps": 15621, "loss": 0.4607, "lr": 1.992967624761264e-06, "epoch": 0.13411433326931693, "percentage": 13.41, "elapsed_time": "0:07:13", "remaining_time": "0:46:38", "throughput": 15269.52, "total_tokens": 6618112} +{"current_steps": 2100, "total_steps": 15621, "loss": 0.6174, "lr": 1.9928347239203014e-06, "epoch": 0.13443441521029384, "percentage": 13.44, "elapsed_time": "0:07:14", "remaining_time": "0:46:34", "throughput": 15286.51, "total_tokens": 6635584} +{"current_steps": 2105, "total_steps": 15621, "loss": 0.5339, "lr": 1.9927005835135282e-06, "epoch": 0.13475449715127072, "percentage": 13.48, "elapsed_time": "0:07:14", "remaining_time": "0:46:31", "throughput": 15303.86, "total_tokens": 6653568} +{"current_steps": 2110, "total_steps": 15621, "loss": 0.4604, "lr": 1.9925652037084214e-06, "epoch": 0.13507457909224763, "percentage": 13.51, "elapsed_time": "0:07:15", "remaining_time": "0:46:27", "throughput": 15317.8, "total_tokens": 6668864} +{"current_steps": 2115, "total_steps": 15621, "loss": 0.4852, "lr": 1.9924285846740037e-06, "epoch": 0.1353946610332245, "percentage": 13.54, "elapsed_time": "0:07:15", "remaining_time": "0:46:24", "throughput": 15332.05, "total_tokens": 6684416} +{"current_steps": 2120, "total_steps": 15621, "loss": 0.5927, "lr": 1.9922907265808452e-06, "epoch": 0.13571474297420139, "percentage": 13.57, "elapsed_time": "0:07:16", "remaining_time": "0:46:20", "throughput": 15345.72, "total_tokens": 6699392} +{"current_steps": 2125, "total_steps": 15621, "loss": 0.5477, "lr": 1.9921516296010643e-06, "epoch": 0.1360348249151783, "percentage": 13.6, "elapsed_time": "0:07:17", "remaining_time": "0:46:16", "throughput": 15359.29, "total_tokens": 6714560} +{"current_steps": 2130, "total_steps": 15621, "loss": 0.5584, "lr": 1.9920112939083246e-06, "epoch": 0.13635490685615517, "percentage": 13.64, "elapsed_time": "0:07:17", "remaining_time": "0:46:12", "throughput": 15373.5, "total_tokens": 6729920} +{"current_steps": 2135, "total_steps": 15621, "loss": 0.5555, "lr": 1.9918697196778367e-06, "epoch": 0.13667498879713205, "percentage": 13.67, "elapsed_time": "0:07:18", "remaining_time": "0:46:08", "throughput": 15386.77, "total_tokens": 6744768} +{"current_steps": 2140, "total_steps": 15621, "loss": 0.4607, "lr": 1.9917269070863578e-06, "epoch": 0.13699507073810896, "percentage": 13.7, "elapsed_time": "0:07:18", "remaining_time": "0:46:05", "throughput": 15400.05, "total_tokens": 6759680} +{"current_steps": 2145, "total_steps": 15621, "loss": 0.5094, "lr": 1.9915828563121915e-06, "epoch": 0.13731515267908584, "percentage": 13.73, "elapsed_time": "0:07:19", "remaining_time": "0:46:01", "throughput": 15414.24, "total_tokens": 6775168} +{"current_steps": 2150, "total_steps": 15621, "loss": 0.5364, "lr": 1.9914375675351865e-06, "epoch": 0.13763523462006275, "percentage": 13.76, "elapsed_time": "0:07:20", "remaining_time": "0:45:57", "throughput": 15429.05, "total_tokens": 6791296} +{"current_steps": 2155, "total_steps": 15621, "loss": 0.43, "lr": 1.991291040936738e-06, "epoch": 0.13795531656103963, "percentage": 13.8, "elapsed_time": "0:07:20", "remaining_time": "0:45:54", "throughput": 15445.37, "total_tokens": 6808640} +{"current_steps": 2160, "total_steps": 15621, "loss": 0.6627, "lr": 1.9911432766997857e-06, "epoch": 0.1382753985020165, "percentage": 13.83, "elapsed_time": "0:07:21", "remaining_time": "0:45:50", "throughput": 15459.29, "total_tokens": 6824064} +{"current_steps": 2165, "total_steps": 15621, "loss": 0.4426, "lr": 1.990994275008815e-06, "epoch": 0.1385954804429934, "percentage": 13.86, "elapsed_time": "0:07:22", "remaining_time": "0:45:47", "throughput": 15473.86, "total_tokens": 6839872} +{"current_steps": 2170, "total_steps": 15621, "loss": 0.5081, "lr": 1.9908440360498565e-06, "epoch": 0.1389155623839703, "percentage": 13.89, "elapsed_time": "0:07:22", "remaining_time": "0:45:43", "throughput": 15487.97, "total_tokens": 6855744} +{"current_steps": 2175, "total_steps": 15621, "loss": 0.5566, "lr": 1.990692560010485e-06, "epoch": 0.1392356443249472, "percentage": 13.92, "elapsed_time": "0:07:23", "remaining_time": "0:45:39", "throughput": 15499.77, "total_tokens": 6869632} +{"current_steps": 2180, "total_steps": 15621, "loss": 0.448, "lr": 1.9905398470798206e-06, "epoch": 0.13955572626592408, "percentage": 13.96, "elapsed_time": "0:07:23", "remaining_time": "0:45:36", "throughput": 15514.09, "total_tokens": 6885696} +{"current_steps": 2185, "total_steps": 15621, "loss": 0.3634, "lr": 1.990385897448527e-06, "epoch": 0.13987580820690096, "percentage": 13.99, "elapsed_time": "0:07:24", "remaining_time": "0:45:32", "throughput": 15528.28, "total_tokens": 6901504} +{"current_steps": 2190, "total_steps": 15621, "loss": 0.5822, "lr": 1.9902307113088114e-06, "epoch": 0.14019589014787787, "percentage": 14.02, "elapsed_time": "0:07:25", "remaining_time": "0:45:29", "throughput": 15541.55, "total_tokens": 6916480} +{"current_steps": 2195, "total_steps": 15621, "loss": 0.4818, "lr": 1.9900742888544264e-06, "epoch": 0.14051597208885475, "percentage": 14.05, "elapsed_time": "0:07:25", "remaining_time": "0:45:25", "throughput": 15555.64, "total_tokens": 6932416} +{"current_steps": 2200, "total_steps": 15621, "loss": 0.534, "lr": 1.989916630280667e-06, "epoch": 0.14083605402983163, "percentage": 14.08, "elapsed_time": "0:07:26", "remaining_time": "0:45:22", "throughput": 15570.44, "total_tokens": 6948992} +{"current_steps": 2205, "total_steps": 15621, "loss": 0.4636, "lr": 1.989757735784372e-06, "epoch": 0.14115613597080853, "percentage": 14.12, "elapsed_time": "0:07:26", "remaining_time": "0:45:19", "throughput": 15584.04, "total_tokens": 6964416} +{"current_steps": 2210, "total_steps": 15621, "loss": 0.4218, "lr": 1.989597605563923e-06, "epoch": 0.1414762179117854, "percentage": 14.15, "elapsed_time": "0:07:27", "remaining_time": "0:45:15", "throughput": 15598.44, "total_tokens": 6980544} +{"current_steps": 2215, "total_steps": 15621, "loss": 0.5658, "lr": 1.9894362398192437e-06, "epoch": 0.14179629985276232, "percentage": 14.18, "elapsed_time": "0:07:28", "remaining_time": "0:45:12", "throughput": 15613.64, "total_tokens": 6997440} +{"current_steps": 2220, "total_steps": 15621, "loss": 0.4163, "lr": 1.9892736387518023e-06, "epoch": 0.1421163817937392, "percentage": 14.21, "elapsed_time": "0:07:28", "remaining_time": "0:45:08", "throughput": 15626.86, "total_tokens": 7012672} +{"current_steps": 2225, "total_steps": 15621, "loss": 0.4773, "lr": 1.9891098025646075e-06, "epoch": 0.14243646373471608, "percentage": 14.24, "elapsed_time": "0:07:29", "remaining_time": "0:45:05", "throughput": 15639.6, "total_tokens": 7027648} +{"current_steps": 2230, "total_steps": 15621, "loss": 0.5303, "lr": 1.9889447314622105e-06, "epoch": 0.142756545675693, "percentage": 14.28, "elapsed_time": "0:07:29", "remaining_time": "0:45:01", "throughput": 15653.05, "total_tokens": 7043200} +{"current_steps": 2235, "total_steps": 15621, "loss": 0.7152, "lr": 1.9887784256507046e-06, "epoch": 0.14307662761666987, "percentage": 14.31, "elapsed_time": "0:07:30", "remaining_time": "0:44:58", "throughput": 15666.24, "total_tokens": 7058688} +{"current_steps": 2240, "total_steps": 15621, "loss": 0.6679, "lr": 1.988610885337725e-06, "epoch": 0.14339670955764675, "percentage": 14.34, "elapsed_time": "0:07:31", "remaining_time": "0:44:55", "throughput": 15679.12, "total_tokens": 7074048} +{"current_steps": 2245, "total_steps": 15621, "loss": 0.5261, "lr": 1.9884421107324476e-06, "epoch": 0.14371679149862365, "percentage": 14.37, "elapsed_time": "0:07:31", "remaining_time": "0:44:51", "throughput": 15692.73, "total_tokens": 7089792} +{"current_steps": 2250, "total_steps": 15621, "loss": 0.4755, "lr": 1.9882721020455893e-06, "epoch": 0.14403687343960053, "percentage": 14.4, "elapsed_time": "0:07:32", "remaining_time": "0:44:48", "throughput": 15705.0, "total_tokens": 7104640} +{"current_steps": 2255, "total_steps": 15621, "loss": 0.5019, "lr": 1.988100859489408e-06, "epoch": 0.14435695538057744, "percentage": 14.44, "elapsed_time": "0:07:32", "remaining_time": "0:44:44", "throughput": 15718.1, "total_tokens": 7120064} +{"current_steps": 2260, "total_steps": 15621, "loss": 0.4754, "lr": 1.9879283832777017e-06, "epoch": 0.14467703732155432, "percentage": 14.47, "elapsed_time": "0:07:33", "remaining_time": "0:44:41", "throughput": 15730.81, "total_tokens": 7135232} +{"current_steps": 2265, "total_steps": 15621, "loss": 0.5075, "lr": 1.9877546736258096e-06, "epoch": 0.1449971192625312, "percentage": 14.5, "elapsed_time": "0:07:34", "remaining_time": "0:44:38", "throughput": 15742.6, "total_tokens": 7149632} +{"current_steps": 2270, "total_steps": 15621, "loss": 0.4107, "lr": 1.98757973075061e-06, "epoch": 0.1453172012035081, "percentage": 14.53, "elapsed_time": "0:07:34", "remaining_time": "0:44:34", "throughput": 15754.93, "total_tokens": 7164352} +{"current_steps": 2275, "total_steps": 15621, "loss": 0.5188, "lr": 1.987403554870521e-06, "epoch": 0.14563728314448499, "percentage": 14.56, "elapsed_time": "0:07:35", "remaining_time": "0:44:31", "throughput": 15767.85, "total_tokens": 7179776} +{"current_steps": 2280, "total_steps": 15621, "loss": 0.4212, "lr": 1.9872261462055003e-06, "epoch": 0.14595736508546187, "percentage": 14.6, "elapsed_time": "0:07:35", "remaining_time": "0:44:27", "throughput": 15779.62, "total_tokens": 7194240} +{"current_steps": 2285, "total_steps": 15621, "loss": 0.4335, "lr": 1.987047504977045e-06, "epoch": 0.14627744702643877, "percentage": 14.63, "elapsed_time": "0:07:36", "remaining_time": "0:44:24", "throughput": 15792.52, "total_tokens": 7209472} +{"current_steps": 2290, "total_steps": 15621, "loss": 0.414, "lr": 1.9868676314081902e-06, "epoch": 0.14659752896741565, "percentage": 14.66, "elapsed_time": "0:07:37", "remaining_time": "0:44:21", "throughput": 15805.72, "total_tokens": 7225088} +{"current_steps": 2295, "total_steps": 15621, "loss": 0.6901, "lr": 1.9866865257235107e-06, "epoch": 0.14691761090839256, "percentage": 14.69, "elapsed_time": "0:07:37", "remaining_time": "0:44:17", "throughput": 15818.9, "total_tokens": 7240704} +{"current_steps": 2300, "total_steps": 15621, "loss": 0.4177, "lr": 1.9865041881491188e-06, "epoch": 0.14723769284936944, "percentage": 14.72, "elapsed_time": "0:07:38", "remaining_time": "0:44:14", "throughput": 15832.07, "total_tokens": 7256000} +{"current_steps": 2305, "total_steps": 15621, "loss": 0.6016, "lr": 1.9863206189126653e-06, "epoch": 0.14755777479034632, "percentage": 14.76, "elapsed_time": "0:07:38", "remaining_time": "0:44:10", "throughput": 15843.56, "total_tokens": 7270336} +{"current_steps": 2310, "total_steps": 15621, "loss": 0.5612, "lr": 1.9861358182433382e-06, "epoch": 0.14787785673132323, "percentage": 14.79, "elapsed_time": "0:07:39", "remaining_time": "0:44:07", "throughput": 15856.1, "total_tokens": 7285440} +{"current_steps": 2315, "total_steps": 15621, "loss": 0.4711, "lr": 1.9859497863718634e-06, "epoch": 0.1481979386723001, "percentage": 14.82, "elapsed_time": "0:07:40", "remaining_time": "0:44:04", "throughput": 15868.93, "total_tokens": 7301120} +{"current_steps": 2320, "total_steps": 15621, "loss": 0.5204, "lr": 1.985762523530504e-06, "epoch": 0.14851802061327699, "percentage": 14.85, "elapsed_time": "0:07:40", "remaining_time": "0:44:01", "throughput": 15881.29, "total_tokens": 7316416} +{"current_steps": 2325, "total_steps": 15621, "loss": 0.5051, "lr": 1.98557402995306e-06, "epoch": 0.1488381025542539, "percentage": 14.88, "elapsed_time": "0:07:41", "remaining_time": "0:43:58", "throughput": 15894.45, "total_tokens": 7332160} +{"current_steps": 2330, "total_steps": 15621, "loss": 0.7069, "lr": 1.985384305874868e-06, "epoch": 0.14915818449523077, "percentage": 14.92, "elapsed_time": "0:07:41", "remaining_time": "0:43:54", "throughput": 15907.3, "total_tokens": 7347776} +{"current_steps": 2335, "total_steps": 15621, "loss": 0.5467, "lr": 1.9851933515328e-06, "epoch": 0.14947826643620768, "percentage": 14.95, "elapsed_time": "0:07:42", "remaining_time": "0:43:51", "throughput": 15919.83, "total_tokens": 7363200} +{"current_steps": 2340, "total_steps": 15621, "loss": 0.4699, "lr": 1.985001167165265e-06, "epoch": 0.14979834837718456, "percentage": 14.98, "elapsed_time": "0:07:43", "remaining_time": "0:43:48", "throughput": 15932.71, "total_tokens": 7378752} +{"current_steps": 2345, "total_steps": 15621, "loss": 0.5165, "lr": 1.984807753012208e-06, "epoch": 0.15011843031816144, "percentage": 15.01, "elapsed_time": "0:07:43", "remaining_time": "0:43:45", "throughput": 15945.07, "total_tokens": 7393984} +{"current_steps": 2346, "total_steps": 15621, "eval_loss": 0.5076366662979126, "epoch": 0.15018244670635683, "percentage": 15.02, "elapsed_time": "0:08:32", "remaining_time": "0:48:22", "throughput": 14419.3, "total_tokens": 7397056} +{"current_steps": 2350, "total_steps": 15621, "loss": 0.5902, "lr": 1.9846131093151086e-06, "epoch": 0.15043851225913835, "percentage": 15.04, "elapsed_time": "0:09:16", "remaining_time": "0:52:24", "throughput": 13306.39, "total_tokens": 7408832} +{"current_steps": 2355, "total_steps": 15621, "loss": 0.4582, "lr": 1.9844172363169808e-06, "epoch": 0.15075859420011523, "percentage": 15.08, "elapsed_time": "0:09:17", "remaining_time": "0:52:19", "throughput": 13318.28, "total_tokens": 7423040} +{"current_steps": 2360, "total_steps": 15621, "loss": 0.5117, "lr": 1.9842201342623756e-06, "epoch": 0.15107867614109213, "percentage": 15.11, "elapsed_time": "0:09:17", "remaining_time": "0:52:15", "throughput": 13331.44, "total_tokens": 7438464} +{"current_steps": 2365, "total_steps": 15621, "loss": 0.5205, "lr": 1.9840218033973766e-06, "epoch": 0.151398758082069, "percentage": 15.14, "elapsed_time": "0:09:18", "remaining_time": "0:52:10", "throughput": 13344.42, "total_tokens": 7453824} +{"current_steps": 2370, "total_steps": 15621, "loss": 0.5717, "lr": 1.9838222439696027e-06, "epoch": 0.1517188400230459, "percentage": 15.17, "elapsed_time": "0:09:19", "remaining_time": "0:52:06", "throughput": 13357.56, "total_tokens": 7469312} +{"current_steps": 2375, "total_steps": 15621, "loss": 0.7065, "lr": 1.9836214562282058e-06, "epoch": 0.1520389219640228, "percentage": 15.2, "elapsed_time": "0:09:19", "remaining_time": "0:52:02", "throughput": 13371.13, "total_tokens": 7485120} +{"current_steps": 2380, "total_steps": 15621, "loss": 0.4971, "lr": 1.9834194404238715e-06, "epoch": 0.15235900390499968, "percentage": 15.24, "elapsed_time": "0:09:20", "remaining_time": "0:51:57", "throughput": 13384.16, "total_tokens": 7500416} +{"current_steps": 2385, "total_steps": 15621, "loss": 0.4125, "lr": 1.9832161968088193e-06, "epoch": 0.15267908584597656, "percentage": 15.27, "elapsed_time": "0:09:21", "remaining_time": "0:51:53", "throughput": 13398.2, "total_tokens": 7516672} +{"current_steps": 2390, "total_steps": 15621, "loss": 0.4764, "lr": 1.9830117256368015e-06, "epoch": 0.15299916778695347, "percentage": 15.3, "elapsed_time": "0:09:21", "remaining_time": "0:51:49", "throughput": 13412.07, "total_tokens": 7532800} +{"current_steps": 2395, "total_steps": 15621, "loss": 0.4924, "lr": 1.982806027163102e-06, "epoch": 0.15331924972793035, "percentage": 15.33, "elapsed_time": "0:09:22", "remaining_time": "0:51:44", "throughput": 13424.66, "total_tokens": 7547776} +{"current_steps": 2400, "total_steps": 15621, "loss": 0.5579, "lr": 1.9825991016445386e-06, "epoch": 0.15363933166890725, "percentage": 15.36, "elapsed_time": "0:09:22", "remaining_time": "0:51:40", "throughput": 13436.85, "total_tokens": 7562496} +{"current_steps": 2405, "total_steps": 15621, "loss": 0.5286, "lr": 1.9823909493394594e-06, "epoch": 0.15395941360988413, "percentage": 15.4, "elapsed_time": "0:09:23", "remaining_time": "0:51:36", "throughput": 13449.68, "total_tokens": 7577920} +{"current_steps": 2410, "total_steps": 15621, "loss": 0.5331, "lr": 1.9821815705077455e-06, "epoch": 0.154279495550861, "percentage": 15.43, "elapsed_time": "0:09:24", "remaining_time": "0:51:31", "throughput": 13462.55, "total_tokens": 7593216} +{"current_steps": 2415, "total_steps": 15621, "loss": 0.5768, "lr": 1.9819709654108087e-06, "epoch": 0.15459957749183792, "percentage": 15.46, "elapsed_time": "0:09:24", "remaining_time": "0:51:27", "throughput": 13474.85, "total_tokens": 7608192} +{"current_steps": 2420, "total_steps": 15621, "loss": 0.4652, "lr": 1.981759134311592e-06, "epoch": 0.1549196594328148, "percentage": 15.49, "elapsed_time": "0:09:25", "remaining_time": "0:51:23", "throughput": 13488.66, "total_tokens": 7624448} +{"current_steps": 2425, "total_steps": 15621, "loss": 0.4847, "lr": 1.981546077474569e-06, "epoch": 0.15523974137379168, "percentage": 15.52, "elapsed_time": "0:09:25", "remaining_time": "0:51:19", "throughput": 13501.86, "total_tokens": 7640192} +{"current_steps": 2430, "total_steps": 15621, "loss": 0.5143, "lr": 1.981331795165744e-06, "epoch": 0.15555982331476859, "percentage": 15.56, "elapsed_time": "0:09:26", "remaining_time": "0:51:14", "throughput": 13514.01, "total_tokens": 7654848} +{"current_steps": 2435, "total_steps": 15621, "loss": 0.6067, "lr": 1.9811162876526498e-06, "epoch": 0.15587990525574547, "percentage": 15.59, "elapsed_time": "0:09:27", "remaining_time": "0:51:10", "throughput": 13527.39, "total_tokens": 7670848} +{"current_steps": 2440, "total_steps": 15621, "loss": 0.6387, "lr": 1.9808995552043515e-06, "epoch": 0.15619998719672237, "percentage": 15.62, "elapsed_time": "0:09:27", "remaining_time": "0:51:06", "throughput": 13539.94, "total_tokens": 7686016} +{"current_steps": 2445, "total_steps": 15621, "loss": 0.5478, "lr": 1.9806815980914413e-06, "epoch": 0.15652006913769925, "percentage": 15.65, "elapsed_time": "0:09:28", "remaining_time": "0:51:02", "throughput": 13553.12, "total_tokens": 7701760} +{"current_steps": 2450, "total_steps": 15621, "loss": 0.5624, "lr": 1.9804624165860417e-06, "epoch": 0.15684015107867613, "percentage": 15.68, "elapsed_time": "0:09:28", "remaining_time": "0:50:58", "throughput": 13566.41, "total_tokens": 7717760} +{"current_steps": 2455, "total_steps": 15621, "loss": 0.3852, "lr": 1.9802420109618028e-06, "epoch": 0.15716023301965304, "percentage": 15.72, "elapsed_time": "0:09:29", "remaining_time": "0:50:54", "throughput": 13579.22, "total_tokens": 7733376} +{"current_steps": 2460, "total_steps": 15621, "loss": 0.4984, "lr": 1.980020381493904e-06, "epoch": 0.15748031496062992, "percentage": 15.75, "elapsed_time": "0:09:30", "remaining_time": "0:50:50", "throughput": 13593.65, "total_tokens": 7750464} +{"current_steps": 2465, "total_steps": 15621, "loss": 0.4942, "lr": 1.979797528459052e-06, "epoch": 0.1578003969016068, "percentage": 15.78, "elapsed_time": "0:09:30", "remaining_time": "0:50:46", "throughput": 13609.27, "total_tokens": 7768576} +{"current_steps": 2470, "total_steps": 15621, "loss": 0.5334, "lr": 1.979573452135482e-06, "epoch": 0.1581204788425837, "percentage": 15.81, "elapsed_time": "0:09:31", "remaining_time": "0:50:42", "throughput": 13622.28, "total_tokens": 7784256} +{"current_steps": 2475, "total_steps": 15621, "loss": 0.3186, "lr": 1.979348152802955e-06, "epoch": 0.15844056078356059, "percentage": 15.84, "elapsed_time": "0:09:32", "remaining_time": "0:50:38", "throughput": 13634.46, "total_tokens": 7799232} +{"current_steps": 2480, "total_steps": 15621, "loss": 0.592, "lr": 1.979121630742761e-06, "epoch": 0.1587606427245375, "percentage": 15.88, "elapsed_time": "0:09:32", "remaining_time": "0:50:34", "throughput": 13647.52, "total_tokens": 7815040} +{"current_steps": 2485, "total_steps": 15621, "loss": 0.4479, "lr": 1.9788938862377146e-06, "epoch": 0.15908072466551437, "percentage": 15.91, "elapsed_time": "0:09:33", "remaining_time": "0:50:30", "throughput": 13660.01, "total_tokens": 7830400} +{"current_steps": 2490, "total_steps": 15621, "loss": 0.4818, "lr": 1.9786649195721577e-06, "epoch": 0.15940080660649125, "percentage": 15.94, "elapsed_time": "0:09:33", "remaining_time": "0:50:26", "throughput": 13672.93, "total_tokens": 7846336} +{"current_steps": 2495, "total_steps": 15621, "loss": 0.6323, "lr": 1.978434731031958e-06, "epoch": 0.15972088854746816, "percentage": 15.97, "elapsed_time": "0:09:34", "remaining_time": "0:50:22", "throughput": 13686.35, "total_tokens": 7862528} +{"current_steps": 2500, "total_steps": 15621, "loss": 0.4541, "lr": 1.9782033209045085e-06, "epoch": 0.16004097048844504, "percentage": 16.0, "elapsed_time": "0:09:35", "remaining_time": "0:50:18", "throughput": 13700.84, "total_tokens": 7880000} +{"current_steps": 2505, "total_steps": 15621, "loss": 0.4053, "lr": 1.977970689478727e-06, "epoch": 0.16036105242942192, "percentage": 16.04, "elapsed_time": "0:09:35", "remaining_time": "0:50:14", "throughput": 13713.23, "total_tokens": 7895296} +{"current_steps": 2510, "total_steps": 15621, "loss": 0.5884, "lr": 1.9777368370450577e-06, "epoch": 0.16068113437039883, "percentage": 16.07, "elapsed_time": "0:09:36", "remaining_time": "0:50:10", "throughput": 13725.99, "total_tokens": 7911104} +{"current_steps": 2515, "total_steps": 15621, "loss": 0.521, "lr": 1.9775017638954674e-06, "epoch": 0.1610012163113757, "percentage": 16.1, "elapsed_time": "0:09:36", "remaining_time": "0:50:06", "throughput": 13737.84, "total_tokens": 7925952} +{"current_steps": 2520, "total_steps": 15621, "loss": 0.5943, "lr": 1.9772654703234476e-06, "epoch": 0.1613212982523526, "percentage": 16.13, "elapsed_time": "0:09:37", "remaining_time": "0:50:02", "throughput": 13749.62, "total_tokens": 7940928} +{"current_steps": 2525, "total_steps": 15621, "loss": 0.5665, "lr": 1.977027956624014e-06, "epoch": 0.1616413801933295, "percentage": 16.16, "elapsed_time": "0:09:38", "remaining_time": "0:49:58", "throughput": 13760.74, "total_tokens": 7955200} +{"current_steps": 2530, "total_steps": 15621, "loss": 0.5819, "lr": 1.9767892230937046e-06, "epoch": 0.16196146213430637, "percentage": 16.2, "elapsed_time": "0:09:38", "remaining_time": "0:49:54", "throughput": 13773.26, "total_tokens": 7970944} +{"current_steps": 2535, "total_steps": 15621, "loss": 0.4311, "lr": 1.976549270030581e-06, "epoch": 0.16228154407528328, "percentage": 16.23, "elapsed_time": "0:09:39", "remaining_time": "0:49:50", "throughput": 13785.08, "total_tokens": 7985856} +{"current_steps": 2540, "total_steps": 15621, "loss": 0.4678, "lr": 1.9763080977342286e-06, "epoch": 0.16260162601626016, "percentage": 16.26, "elapsed_time": "0:09:39", "remaining_time": "0:49:46", "throughput": 13796.11, "total_tokens": 8001088} +{"current_steps": 2545, "total_steps": 15621, "loss": 0.4965, "lr": 1.9760657065057527e-06, "epoch": 0.16292170795723707, "percentage": 16.29, "elapsed_time": "0:09:40", "remaining_time": "0:49:43", "throughput": 13809.56, "total_tokens": 8017856} +{"current_steps": 2550, "total_steps": 15621, "loss": 0.4527, "lr": 1.975822096647782e-06, "epoch": 0.16324178989821395, "percentage": 16.32, "elapsed_time": "0:09:41", "remaining_time": "0:49:39", "throughput": 13822.32, "total_tokens": 8033792} +{"current_steps": 2555, "total_steps": 15621, "loss": 0.4821, "lr": 1.975577268464466e-06, "epoch": 0.16356187183919083, "percentage": 16.36, "elapsed_time": "0:09:41", "remaining_time": "0:49:35", "throughput": 13833.33, "total_tokens": 8048256} +{"current_steps": 2560, "total_steps": 15621, "loss": 0.5626, "lr": 1.9753312222614765e-06, "epoch": 0.16388195378016773, "percentage": 16.39, "elapsed_time": "0:09:42", "remaining_time": "0:49:31", "throughput": 13845.51, "total_tokens": 8063680} +{"current_steps": 2565, "total_steps": 15621, "loss": 0.4853, "lr": 1.9750839583460036e-06, "epoch": 0.1642020357211446, "percentage": 16.42, "elapsed_time": "0:09:43", "remaining_time": "0:49:27", "throughput": 13858.27, "total_tokens": 8079744} +{"current_steps": 2570, "total_steps": 15621, "loss": 0.502, "lr": 1.9748354770267603e-06, "epoch": 0.1645221176621215, "percentage": 16.45, "elapsed_time": "0:09:43", "remaining_time": "0:49:23", "throughput": 13869.94, "total_tokens": 8094656} +{"current_steps": 2575, "total_steps": 15621, "loss": 0.5116, "lr": 1.9745857786139777e-06, "epoch": 0.1648421996030984, "percentage": 16.48, "elapsed_time": "0:09:44", "remaining_time": "0:49:19", "throughput": 13882.48, "total_tokens": 8110528} +{"current_steps": 2580, "total_steps": 15621, "loss": 0.6028, "lr": 1.974334863419408e-06, "epoch": 0.16516228154407528, "percentage": 16.52, "elapsed_time": "0:09:44", "remaining_time": "0:49:16", "throughput": 13895.19, "total_tokens": 8126720} +{"current_steps": 2585, "total_steps": 15621, "loss": 0.518, "lr": 1.9740827317563212e-06, "epoch": 0.1654823634850522, "percentage": 16.55, "elapsed_time": "0:09:45", "remaining_time": "0:49:12", "throughput": 13906.34, "total_tokens": 8141312} +{"current_steps": 2590, "total_steps": 15621, "loss": 0.4889, "lr": 1.973829383939507e-06, "epoch": 0.16580244542602907, "percentage": 16.58, "elapsed_time": "0:09:46", "remaining_time": "0:49:08", "throughput": 13918.18, "total_tokens": 8156736} +{"current_steps": 2595, "total_steps": 15621, "loss": 0.4987, "lr": 1.973574820285273e-06, "epoch": 0.16612252736700595, "percentage": 16.61, "elapsed_time": "0:09:46", "remaining_time": "0:49:04", "throughput": 13930.52, "total_tokens": 8172480} +{"current_steps": 2600, "total_steps": 15621, "loss": 0.5702, "lr": 1.9733190411114443e-06, "epoch": 0.16644260930798285, "percentage": 16.64, "elapsed_time": "0:09:47", "remaining_time": "0:49:01", "throughput": 13942.68, "total_tokens": 8188224} +{"current_steps": 2605, "total_steps": 15621, "loss": 0.438, "lr": 1.9730620467373654e-06, "epoch": 0.16676269124895973, "percentage": 16.68, "elapsed_time": "0:09:47", "remaining_time": "0:48:57", "throughput": 13955.37, "total_tokens": 8204352} +{"current_steps": 2610, "total_steps": 15621, "loss": 0.5744, "lr": 1.9728038374838958e-06, "epoch": 0.1670827731899366, "percentage": 16.71, "elapsed_time": "0:09:48", "remaining_time": "0:48:53", "throughput": 13966.83, "total_tokens": 8219328} +{"current_steps": 2615, "total_steps": 15621, "loss": 0.3913, "lr": 1.972544413673413e-06, "epoch": 0.16740285513091352, "percentage": 16.74, "elapsed_time": "0:09:49", "remaining_time": "0:48:49", "throughput": 13978.34, "total_tokens": 8234560} +{"current_steps": 2620, "total_steps": 15621, "loss": 0.5779, "lr": 1.9722837756298108e-06, "epoch": 0.1677229370718904, "percentage": 16.77, "elapsed_time": "0:09:49", "remaining_time": "0:48:46", "throughput": 13989.41, "total_tokens": 8249344} +{"current_steps": 2625, "total_steps": 15621, "loss": 0.5548, "lr": 1.972021923678499e-06, "epoch": 0.1680430190128673, "percentage": 16.8, "elapsed_time": "0:09:50", "remaining_time": "0:48:42", "throughput": 14001.85, "total_tokens": 8265600} +{"current_steps": 2630, "total_steps": 15621, "loss": 0.4861, "lr": 1.971758858146403e-06, "epoch": 0.16836310095384419, "percentage": 16.84, "elapsed_time": "0:09:50", "remaining_time": "0:48:38", "throughput": 14012.94, "total_tokens": 8280384} +{"current_steps": 2635, "total_steps": 15621, "loss": 0.4897, "lr": 1.9714945793619626e-06, "epoch": 0.16868318289482107, "percentage": 16.87, "elapsed_time": "0:09:51", "remaining_time": "0:48:35", "throughput": 14024.36, "total_tokens": 8295744} +{"current_steps": 2640, "total_steps": 15621, "loss": 0.5052, "lr": 1.971229087655133e-06, "epoch": 0.16900326483579797, "percentage": 16.9, "elapsed_time": "0:09:52", "remaining_time": "0:48:31", "throughput": 14036.43, "total_tokens": 8311680} +{"current_steps": 2645, "total_steps": 15621, "loss": 0.4678, "lr": 1.9709623833573842e-06, "epoch": 0.16932334677677485, "percentage": 16.93, "elapsed_time": "0:09:52", "remaining_time": "0:48:27", "throughput": 14047.57, "total_tokens": 8326592} +{"current_steps": 2650, "total_steps": 15621, "loss": 0.4588, "lr": 1.9706944668016994e-06, "epoch": 0.16964342871775173, "percentage": 16.96, "elapsed_time": "0:09:53", "remaining_time": "0:48:24", "throughput": 14058.78, "total_tokens": 8341632} +{"current_steps": 2655, "total_steps": 15621, "loss": 0.4627, "lr": 1.9704253383225756e-06, "epoch": 0.16996351065872864, "percentage": 17.0, "elapsed_time": "0:09:53", "remaining_time": "0:48:20", "throughput": 14071.62, "total_tokens": 8358400} +{"current_steps": 2660, "total_steps": 15621, "loss": 0.4845, "lr": 1.970154998256023e-06, "epoch": 0.17028359259970552, "percentage": 17.03, "elapsed_time": "0:09:54", "remaining_time": "0:48:17", "throughput": 14083.65, "total_tokens": 8374144} +{"current_steps": 2665, "total_steps": 15621, "loss": 0.4215, "lr": 1.9698834469395644e-06, "epoch": 0.17060367454068243, "percentage": 17.06, "elapsed_time": "0:09:55", "remaining_time": "0:48:13", "throughput": 14095.08, "total_tokens": 8389440} +{"current_steps": 2670, "total_steps": 15621, "loss": 0.5408, "lr": 1.969610684712234e-06, "epoch": 0.1709237564816593, "percentage": 17.09, "elapsed_time": "0:09:55", "remaining_time": "0:48:09", "throughput": 14106.47, "total_tokens": 8404672} +{"current_steps": 2675, "total_steps": 15621, "loss": 0.5508, "lr": 1.9693367119145794e-06, "epoch": 0.17124383842263619, "percentage": 17.12, "elapsed_time": "0:09:56", "remaining_time": "0:48:06", "throughput": 14117.94, "total_tokens": 8420096} +{"current_steps": 2680, "total_steps": 15621, "loss": 0.6684, "lr": 1.969061528888659e-06, "epoch": 0.1715639203636131, "percentage": 17.16, "elapsed_time": "0:09:57", "remaining_time": "0:48:02", "throughput": 14130.24, "total_tokens": 8436288} +{"current_steps": 2685, "total_steps": 15621, "loss": 0.5401, "lr": 1.9687851359780415e-06, "epoch": 0.17188400230458997, "percentage": 17.19, "elapsed_time": "0:09:57", "remaining_time": "0:47:59", "throughput": 14142.7, "total_tokens": 8452672} +{"current_steps": 2690, "total_steps": 15621, "loss": 0.4867, "lr": 1.968507533527807e-06, "epoch": 0.17220408424556685, "percentage": 17.22, "elapsed_time": "0:09:58", "remaining_time": "0:47:56", "throughput": 14155.12, "total_tokens": 8469120} +{"current_steps": 2695, "total_steps": 15621, "loss": 0.4748, "lr": 1.9682287218845455e-06, "epoch": 0.17252416618654376, "percentage": 17.25, "elapsed_time": "0:09:58", "remaining_time": "0:47:52", "throughput": 14166.61, "total_tokens": 8484736} +{"current_steps": 2700, "total_steps": 15621, "loss": 0.7367, "lr": 1.967948701396356e-06, "epoch": 0.17284424812752064, "percentage": 17.28, "elapsed_time": "0:09:59", "remaining_time": "0:47:49", "throughput": 14178.37, "total_tokens": 8500480} +{"current_steps": 2705, "total_steps": 15621, "loss": 0.3977, "lr": 1.9676674724128485e-06, "epoch": 0.17316433006849755, "percentage": 17.32, "elapsed_time": "0:10:00", "remaining_time": "0:47:45", "throughput": 14188.43, "total_tokens": 8514624} +{"current_steps": 2710, "total_steps": 15621, "loss": 0.4543, "lr": 1.9673850352851397e-06, "epoch": 0.17348441200947443, "percentage": 17.35, "elapsed_time": "0:10:00", "remaining_time": "0:47:41", "throughput": 14199.24, "total_tokens": 8529664} +{"current_steps": 2715, "total_steps": 15621, "loss": 0.5825, "lr": 1.967101390365856e-06, "epoch": 0.1738044939504513, "percentage": 17.38, "elapsed_time": "0:10:01", "remaining_time": "0:47:38", "throughput": 14210.96, "total_tokens": 8545280} +{"current_steps": 2720, "total_steps": 15621, "loss": 0.492, "lr": 1.966816538009131e-06, "epoch": 0.1741245758914282, "percentage": 17.41, "elapsed_time": "0:10:01", "remaining_time": "0:47:34", "throughput": 14222.14, "total_tokens": 8560384} +{"current_steps": 2725, "total_steps": 15621, "loss": 0.5425, "lr": 1.966530478570607e-06, "epoch": 0.1744446578324051, "percentage": 17.44, "elapsed_time": "0:10:02", "remaining_time": "0:47:31", "throughput": 14234.67, "total_tokens": 8576960} +{"current_steps": 2730, "total_steps": 15621, "loss": 0.4635, "lr": 1.9662432124074325e-06, "epoch": 0.174764739773382, "percentage": 17.48, "elapsed_time": "0:10:03", "remaining_time": "0:47:28", "throughput": 14245.85, "total_tokens": 8592384} +{"current_steps": 2735, "total_steps": 15621, "loss": 0.4836, "lr": 1.965954739878262e-06, "epoch": 0.17508482171435888, "percentage": 17.51, "elapsed_time": "0:10:03", "remaining_time": "0:47:24", "throughput": 14258.33, "total_tokens": 8609024} +{"current_steps": 2740, "total_steps": 15621, "loss": 0.4283, "lr": 1.965665061343257e-06, "epoch": 0.17540490365533576, "percentage": 17.54, "elapsed_time": "0:10:04", "remaining_time": "0:47:21", "throughput": 14270.02, "total_tokens": 8624768} +{"current_steps": 2745, "total_steps": 15621, "loss": 0.4646, "lr": 1.965374177164085e-06, "epoch": 0.17572498559631267, "percentage": 17.57, "elapsed_time": "0:10:05", "remaining_time": "0:47:17", "throughput": 14281.43, "total_tokens": 8640448} +{"current_steps": 2750, "total_steps": 15621, "loss": 0.5427, "lr": 1.9650820877039182e-06, "epoch": 0.17604506753728955, "percentage": 17.6, "elapsed_time": "0:10:05", "remaining_time": "0:47:14", "throughput": 14292.16, "total_tokens": 8655296} +{"current_steps": 2755, "total_steps": 15621, "loss": 0.4878, "lr": 1.9647887933274334e-06, "epoch": 0.17636514947826643, "percentage": 17.64, "elapsed_time": "0:10:06", "remaining_time": "0:47:11", "throughput": 14304.36, "total_tokens": 8671872} +{"current_steps": 2760, "total_steps": 15621, "loss": 0.4822, "lr": 1.9644942944008124e-06, "epoch": 0.17668523141924333, "percentage": 17.67, "elapsed_time": "0:10:06", "remaining_time": "0:47:07", "throughput": 14316.11, "total_tokens": 8687680} +{"current_steps": 2765, "total_steps": 15621, "loss": 0.5914, "lr": 1.96419859129174e-06, "epoch": 0.1770053133602202, "percentage": 17.7, "elapsed_time": "0:10:07", "remaining_time": "0:47:04", "throughput": 14326.99, "total_tokens": 8702912} +{"current_steps": 2770, "total_steps": 15621, "loss": 0.4702, "lr": 1.963901684369406e-06, "epoch": 0.17732539530119712, "percentage": 17.73, "elapsed_time": "0:10:08", "remaining_time": "0:47:00", "throughput": 14338.04, "total_tokens": 8718144} +{"current_steps": 2775, "total_steps": 15621, "loss": 0.4989, "lr": 1.9636035740045013e-06, "epoch": 0.177645477242174, "percentage": 17.76, "elapsed_time": "0:10:08", "remaining_time": "0:46:57", "throughput": 14348.77, "total_tokens": 8732992} +{"current_steps": 2780, "total_steps": 15621, "loss": 0.6024, "lr": 1.9633042605692207e-06, "epoch": 0.17796555918315088, "percentage": 17.8, "elapsed_time": "0:10:09", "remaining_time": "0:46:54", "throughput": 14360.47, "total_tokens": 8749056} +{"current_steps": 2785, "total_steps": 15621, "loss": 0.4879, "lr": 1.9630037444372597e-06, "epoch": 0.17828564112412779, "percentage": 17.83, "elapsed_time": "0:10:09", "remaining_time": "0:46:50", "throughput": 14372.21, "total_tokens": 8765184} +{"current_steps": 2790, "total_steps": 15621, "loss": 0.4133, "lr": 1.9627020259838177e-06, "epoch": 0.17860572306510467, "percentage": 17.86, "elapsed_time": "0:10:10", "remaining_time": "0:46:47", "throughput": 14383.2, "total_tokens": 8780480} +{"current_steps": 2795, "total_steps": 15621, "loss": 0.5539, "lr": 1.9623991055855925e-06, "epoch": 0.17892580500608155, "percentage": 17.89, "elapsed_time": "0:10:11", "remaining_time": "0:46:44", "throughput": 14394.69, "total_tokens": 8796352} +{"current_steps": 2800, "total_steps": 15621, "loss": 0.443, "lr": 1.962094983620784e-06, "epoch": 0.17924588694705845, "percentage": 17.92, "elapsed_time": "0:10:11", "remaining_time": "0:46:40", "throughput": 14404.56, "total_tokens": 8810688} +{"current_steps": 2805, "total_steps": 15621, "loss": 0.4279, "lr": 1.9617896604690925e-06, "epoch": 0.17956596888803533, "percentage": 17.96, "elapsed_time": "0:10:12", "remaining_time": "0:46:37", "throughput": 14415.71, "total_tokens": 8826304} +{"current_steps": 2810, "total_steps": 15621, "loss": 0.4628, "lr": 1.961483136511717e-06, "epoch": 0.17988605082901224, "percentage": 17.99, "elapsed_time": "0:10:12", "remaining_time": "0:46:34", "throughput": 14426.25, "total_tokens": 8841344} +{"current_steps": 2815, "total_steps": 15621, "loss": 0.6058, "lr": 1.9611754121313567e-06, "epoch": 0.18020613276998912, "percentage": 18.02, "elapsed_time": "0:10:13", "remaining_time": "0:46:30", "throughput": 14438.2, "total_tokens": 8857664} +{"current_steps": 2820, "total_steps": 15621, "loss": 0.5762, "lr": 1.960866487712209e-06, "epoch": 0.180526214710966, "percentage": 18.05, "elapsed_time": "0:10:14", "remaining_time": "0:46:27", "throughput": 14449.35, "total_tokens": 8873408} +{"current_steps": 2825, "total_steps": 15621, "loss": 0.425, "lr": 1.9605563636399695e-06, "epoch": 0.1808462966519429, "percentage": 18.08, "elapsed_time": "0:10:14", "remaining_time": "0:46:24", "throughput": 14460.95, "total_tokens": 8889472} +{"current_steps": 2830, "total_steps": 15621, "loss": 0.5908, "lr": 1.9602450403018315e-06, "epoch": 0.18116637859291979, "percentage": 18.12, "elapsed_time": "0:10:15", "remaining_time": "0:46:21", "throughput": 14471.6, "total_tokens": 8904640} +{"current_steps": 2835, "total_steps": 15621, "loss": 0.4446, "lr": 1.9599325180864864e-06, "epoch": 0.18148646053389667, "percentage": 18.15, "elapsed_time": "0:10:15", "remaining_time": "0:46:17", "throughput": 14482.17, "total_tokens": 8919680} +{"current_steps": 2840, "total_steps": 15621, "loss": 0.4418, "lr": 1.9596187973841216e-06, "epoch": 0.18180654247487357, "percentage": 18.18, "elapsed_time": "0:10:16", "remaining_time": "0:46:14", "throughput": 14493.31, "total_tokens": 8935360} +{"current_steps": 2845, "total_steps": 15621, "loss": 0.4892, "lr": 1.959303878586421e-06, "epoch": 0.18212662441585045, "percentage": 18.21, "elapsed_time": "0:10:17", "remaining_time": "0:46:11", "throughput": 14504.95, "total_tokens": 8951552} +{"current_steps": 2850, "total_steps": 15621, "loss": 0.5694, "lr": 1.9589877620865647e-06, "epoch": 0.18244670635682736, "percentage": 18.24, "elapsed_time": "0:10:17", "remaining_time": "0:46:08", "throughput": 14517.28, "total_tokens": 8968576} +{"current_steps": 2855, "total_steps": 15621, "loss": 0.4559, "lr": 1.9586704482792277e-06, "epoch": 0.18276678829780424, "percentage": 18.28, "elapsed_time": "0:10:18", "remaining_time": "0:46:05", "throughput": 14527.88, "total_tokens": 8983744} +{"current_steps": 2860, "total_steps": 15621, "loss": 0.4376, "lr": 1.95835193756058e-06, "epoch": 0.18308687023878112, "percentage": 18.31, "elapsed_time": "0:10:18", "remaining_time": "0:46:01", "throughput": 14538.36, "total_tokens": 8999040} +{"current_steps": 2865, "total_steps": 15621, "loss": 0.4186, "lr": 1.9580322303282858e-06, "epoch": 0.18340695217975803, "percentage": 18.34, "elapsed_time": "0:10:19", "remaining_time": "0:45:58", "throughput": 14550.43, "total_tokens": 9015872} +{"current_steps": 2870, "total_steps": 15621, "loss": 0.4001, "lr": 1.9577113269815038e-06, "epoch": 0.1837270341207349, "percentage": 18.37, "elapsed_time": "0:10:20", "remaining_time": "0:45:55", "throughput": 14561.67, "total_tokens": 9031744} +{"current_steps": 2875, "total_steps": 15621, "loss": 0.5877, "lr": 1.957389227920885e-06, "epoch": 0.18404711606171179, "percentage": 18.4, "elapsed_time": "0:10:20", "remaining_time": "0:45:52", "throughput": 14572.88, "total_tokens": 9047872} +{"current_steps": 2880, "total_steps": 15621, "loss": 0.5101, "lr": 1.957065933548574e-06, "epoch": 0.1843671980026887, "percentage": 18.44, "elapsed_time": "0:10:21", "remaining_time": "0:45:49", "throughput": 14583.23, "total_tokens": 9062976} +{"current_steps": 2885, "total_steps": 15621, "loss": 0.5899, "lr": 1.956741444268208e-06, "epoch": 0.18468727994366557, "percentage": 18.47, "elapsed_time": "0:10:22", "remaining_time": "0:45:46", "throughput": 14593.81, "total_tokens": 9078208} +{"current_steps": 2890, "total_steps": 15621, "loss": 0.4744, "lr": 1.9564157604849154e-06, "epoch": 0.18500736188464248, "percentage": 18.5, "elapsed_time": "0:10:22", "remaining_time": "0:45:43", "throughput": 14605.27, "total_tokens": 9094720} +{"current_steps": 2895, "total_steps": 15621, "loss": 0.5274, "lr": 1.9560888826053163e-06, "epoch": 0.18532744382561936, "percentage": 18.53, "elapsed_time": "0:10:23", "remaining_time": "0:45:39", "throughput": 14616.09, "total_tokens": 9110336} +{"current_steps": 2900, "total_steps": 15621, "loss": 0.5573, "lr": 1.9557608110375212e-06, "epoch": 0.18564752576659624, "percentage": 18.56, "elapsed_time": "0:10:23", "remaining_time": "0:45:36", "throughput": 14627.7, "total_tokens": 9126912} +{"current_steps": 2905, "total_steps": 15621, "loss": 0.549, "lr": 1.955431546191132e-06, "epoch": 0.18596760770757315, "percentage": 18.6, "elapsed_time": "0:10:24", "remaining_time": "0:45:33", "throughput": 14638.41, "total_tokens": 9142400} +{"current_steps": 2910, "total_steps": 15621, "loss": 0.5161, "lr": 1.95510108847724e-06, "epoch": 0.18628768964855003, "percentage": 18.63, "elapsed_time": "0:10:25", "remaining_time": "0:45:30", "throughput": 14648.35, "total_tokens": 9157184} +{"current_steps": 2915, "total_steps": 15621, "loss": 0.5237, "lr": 1.954769438308424e-06, "epoch": 0.1866077715895269, "percentage": 18.66, "elapsed_time": "0:10:25", "remaining_time": "0:45:27", "throughput": 14659.75, "total_tokens": 9173696} +{"current_steps": 2920, "total_steps": 15621, "loss": 0.4992, "lr": 1.954436596098754e-06, "epoch": 0.1869278535305038, "percentage": 18.69, "elapsed_time": "0:10:26", "remaining_time": "0:45:24", "throughput": 14670.89, "total_tokens": 9190080} +{"current_steps": 2925, "total_steps": 15621, "loss": 0.5761, "lr": 1.9541025622637875e-06, "epoch": 0.1872479354714807, "percentage": 18.72, "elapsed_time": "0:10:26", "remaining_time": "0:45:21", "throughput": 14680.2, "total_tokens": 9204352} +{"current_steps": 2930, "total_steps": 15621, "loss": 0.6098, "lr": 1.95376733722057e-06, "epoch": 0.1875680174124576, "percentage": 18.76, "elapsed_time": "0:10:27", "remaining_time": "0:45:18", "throughput": 14690.18, "total_tokens": 9219200} +{"current_steps": 2935, "total_steps": 15621, "loss": 0.4702, "lr": 1.9534309213876337e-06, "epoch": 0.18788809935343448, "percentage": 18.79, "elapsed_time": "0:10:28", "remaining_time": "0:45:15", "throughput": 14699.74, "total_tokens": 9233600} +{"current_steps": 2940, "total_steps": 15621, "loss": 0.4343, "lr": 1.953093315184997e-06, "epoch": 0.18820818129441136, "percentage": 18.82, "elapsed_time": "0:10:28", "remaining_time": "0:45:12", "throughput": 14710.7, "total_tokens": 9249536} +{"current_steps": 2945, "total_steps": 15621, "loss": 0.6391, "lr": 1.952754519034166e-06, "epoch": 0.18852826323538827, "percentage": 18.85, "elapsed_time": "0:10:29", "remaining_time": "0:45:08", "throughput": 14720.5, "total_tokens": 9264256} +{"current_steps": 2950, "total_steps": 15621, "loss": 0.4487, "lr": 1.9524145333581313e-06, "epoch": 0.18884834517636515, "percentage": 18.88, "elapsed_time": "0:10:29", "remaining_time": "0:45:05", "throughput": 14730.7, "total_tokens": 9279488} +{"current_steps": 2955, "total_steps": 15621, "loss": 0.5122, "lr": 1.952073358581369e-06, "epoch": 0.18916842711734205, "percentage": 18.92, "elapsed_time": "0:10:30", "remaining_time": "0:45:02", "throughput": 14740.55, "total_tokens": 9294336} +{"current_steps": 2960, "total_steps": 15621, "loss": 0.5552, "lr": 1.95173099512984e-06, "epoch": 0.18948850905831893, "percentage": 18.95, "elapsed_time": "0:10:31", "remaining_time": "0:44:59", "throughput": 14750.61, "total_tokens": 9309376} +{"current_steps": 2965, "total_steps": 15621, "loss": 0.4579, "lr": 1.9513874434309894e-06, "epoch": 0.1898085909992958, "percentage": 18.98, "elapsed_time": "0:10:31", "remaining_time": "0:44:56", "throughput": 14760.33, "total_tokens": 9324224} +{"current_steps": 2970, "total_steps": 15621, "loss": 0.4466, "lr": 1.951042703913745e-06, "epoch": 0.19012867294027272, "percentage": 19.01, "elapsed_time": "0:10:32", "remaining_time": "0:44:53", "throughput": 14770.17, "total_tokens": 9339136} +{"current_steps": 2975, "total_steps": 15621, "loss": 0.4491, "lr": 1.950696777008518e-06, "epoch": 0.1904487548812496, "percentage": 19.04, "elapsed_time": "0:10:32", "remaining_time": "0:44:50", "throughput": 14780.57, "total_tokens": 9354688} +{"current_steps": 2980, "total_steps": 15621, "loss": 0.4917, "lr": 1.9503496631472025e-06, "epoch": 0.19076883682222648, "percentage": 19.08, "elapsed_time": "0:10:33", "remaining_time": "0:44:47", "throughput": 14790.36, "total_tokens": 9369664} +{"current_steps": 2985, "total_steps": 15621, "loss": 0.6324, "lr": 1.9500013627631746e-06, "epoch": 0.19108891876320339, "percentage": 19.11, "elapsed_time": "0:10:34", "remaining_time": "0:44:44", "throughput": 14800.31, "total_tokens": 9384768} +{"current_steps": 2990, "total_steps": 15621, "loss": 0.3728, "lr": 1.949651876291291e-06, "epoch": 0.19140900070418027, "percentage": 19.14, "elapsed_time": "0:10:34", "remaining_time": "0:44:41", "throughput": 14810.71, "total_tokens": 9400320} +{"current_steps": 2995, "total_steps": 15621, "loss": 0.4739, "lr": 1.9493012041678894e-06, "epoch": 0.19172908264515717, "percentage": 19.17, "elapsed_time": "0:10:35", "remaining_time": "0:44:40", "throughput": 14811.23, "total_tokens": 9415872} +{"current_steps": 3000, "total_steps": 15621, "loss": 0.6013, "lr": 1.9489493468307883e-06, "epoch": 0.19204916458613405, "percentage": 19.2, "elapsed_time": "0:10:36", "remaining_time": "0:44:37", "throughput": 14822.75, "total_tokens": 9432704} +{"current_steps": 3005, "total_steps": 15621, "loss": 0.5159, "lr": 1.948596304719286e-06, "epoch": 0.19236924652711093, "percentage": 19.24, "elapsed_time": "0:10:36", "remaining_time": "0:44:34", "throughput": 14832.88, "total_tokens": 9448192} +{"current_steps": 3010, "total_steps": 15621, "loss": 0.4322, "lr": 1.9482420782741594e-06, "epoch": 0.19268932846808784, "percentage": 19.27, "elapsed_time": "0:10:37", "remaining_time": "0:44:31", "throughput": 14843.85, "total_tokens": 9464576} +{"current_steps": 3015, "total_steps": 15621, "loss": 0.5546, "lr": 1.9478866679376647e-06, "epoch": 0.19300941040906472, "percentage": 19.3, "elapsed_time": "0:10:38", "remaining_time": "0:44:28", "throughput": 14853.97, "total_tokens": 9479936} +{"current_steps": 3020, "total_steps": 15621, "loss": 0.5447, "lr": 1.9475300741535353e-06, "epoch": 0.1933294923500416, "percentage": 19.33, "elapsed_time": "0:10:38", "remaining_time": "0:44:25", "throughput": 14865.81, "total_tokens": 9497280} +{"current_steps": 3025, "total_steps": 15621, "loss": 0.4568, "lr": 1.9471722973669833e-06, "epoch": 0.1936495742910185, "percentage": 19.36, "elapsed_time": "0:10:39", "remaining_time": "0:44:22", "throughput": 14877.62, "total_tokens": 9514496} +{"current_steps": 3030, "total_steps": 15621, "loss": 0.3932, "lr": 1.946813338024697e-06, "epoch": 0.19396965623199539, "percentage": 19.4, "elapsed_time": "0:10:40", "remaining_time": "0:44:19", "throughput": 14887.37, "total_tokens": 9529536} +{"current_steps": 3035, "total_steps": 15621, "loss": 0.526, "lr": 1.9464531965748414e-06, "epoch": 0.1942897381729723, "percentage": 19.43, "elapsed_time": "0:10:40", "remaining_time": "0:44:17", "throughput": 14897.94, "total_tokens": 9545472} +{"current_steps": 3040, "total_steps": 15621, "loss": 0.585, "lr": 1.9460918734670573e-06, "epoch": 0.19460982011394917, "percentage": 19.46, "elapsed_time": "0:10:41", "remaining_time": "0:44:14", "throughput": 14907.95, "total_tokens": 9560960} +{"current_steps": 3045, "total_steps": 15621, "loss": 0.5221, "lr": 1.945729369152461e-06, "epoch": 0.19492990205492605, "percentage": 19.49, "elapsed_time": "0:10:41", "remaining_time": "0:44:11", "throughput": 14917.71, "total_tokens": 9576320} +{"current_steps": 3050, "total_steps": 15621, "loss": 0.5632, "lr": 1.945365684083643e-06, "epoch": 0.19524998399590296, "percentage": 19.52, "elapsed_time": "0:10:42", "remaining_time": "0:44:08", "throughput": 14927.86, "total_tokens": 9592192} +{"current_steps": 3055, "total_steps": 15621, "loss": 0.6164, "lr": 1.945000818714668e-06, "epoch": 0.19557006593687984, "percentage": 19.56, "elapsed_time": "0:10:43", "remaining_time": "0:44:05", "throughput": 14938.28, "total_tokens": 9608128} +{"current_steps": 3060, "total_steps": 15621, "loss": 0.5338, "lr": 1.944634773501076e-06, "epoch": 0.19589014787785672, "percentage": 19.59, "elapsed_time": "0:10:43", "remaining_time": "0:44:02", "throughput": 14948.6, "total_tokens": 9623872} +{"current_steps": 3065, "total_steps": 15621, "loss": 0.5496, "lr": 1.9442675488998783e-06, "epoch": 0.19621022981883363, "percentage": 19.62, "elapsed_time": "0:10:44", "remaining_time": "0:43:59", "throughput": 14958.52, "total_tokens": 9639488} +{"current_steps": 3070, "total_steps": 15621, "loss": 0.4913, "lr": 1.9438991453695587e-06, "epoch": 0.1965303117598105, "percentage": 19.65, "elapsed_time": "0:10:45", "remaining_time": "0:43:57", "throughput": 14968.87, "total_tokens": 9655680} +{"current_steps": 3075, "total_steps": 15621, "loss": 0.5489, "lr": 1.943529563370073e-06, "epoch": 0.1968503937007874, "percentage": 19.69, "elapsed_time": "0:10:45", "remaining_time": "0:43:54", "throughput": 14978.2, "total_tokens": 9670400} +{"current_steps": 3080, "total_steps": 15621, "loss": 0.3868, "lr": 1.9431588033628495e-06, "epoch": 0.1971704756417643, "percentage": 19.72, "elapsed_time": "0:10:46", "remaining_time": "0:43:51", "throughput": 14987.71, "total_tokens": 9685504} +{"current_steps": 3085, "total_steps": 15621, "loss": 0.635, "lr": 1.9427868658107862e-06, "epoch": 0.19749055758274117, "percentage": 19.75, "elapsed_time": "0:10:46", "remaining_time": "0:43:48", "throughput": 14998.4, "total_tokens": 9701952} +{"current_steps": 3090, "total_steps": 15621, "loss": 0.4485, "lr": 1.942413751178251e-06, "epoch": 0.19781063952371808, "percentage": 19.78, "elapsed_time": "0:10:47", "remaining_time": "0:43:45", "throughput": 15007.91, "total_tokens": 9716928} +{"current_steps": 3095, "total_steps": 15621, "loss": 0.6516, "lr": 1.9420394599310826e-06, "epoch": 0.19813072146469496, "percentage": 19.81, "elapsed_time": "0:10:48", "remaining_time": "0:43:42", "throughput": 15017.46, "total_tokens": 9732096} +{"current_steps": 3100, "total_steps": 15621, "loss": 0.5307, "lr": 1.941663992536588e-06, "epoch": 0.19845080340567184, "percentage": 19.85, "elapsed_time": "0:10:48", "remaining_time": "0:43:39", "throughput": 15027.24, "total_tokens": 9747648} +{"current_steps": 3105, "total_steps": 15621, "loss": 0.4371, "lr": 1.941287349463542e-06, "epoch": 0.19877088534664875, "percentage": 19.88, "elapsed_time": "0:10:49", "remaining_time": "0:43:37", "throughput": 15036.88, "total_tokens": 9763072} +{"current_steps": 3110, "total_steps": 15621, "loss": 0.4726, "lr": 1.940909531182188e-06, "epoch": 0.19909096728762563, "percentage": 19.91, "elapsed_time": "0:10:49", "remaining_time": "0:43:34", "throughput": 15046.35, "total_tokens": 9778176} +{"current_steps": 3115, "total_steps": 15621, "loss": 0.6129, "lr": 1.9405305381642375e-06, "epoch": 0.19941104922860253, "percentage": 19.94, "elapsed_time": "0:10:50", "remaining_time": "0:43:31", "throughput": 15056.15, "total_tokens": 9793536} +{"current_steps": 3120, "total_steps": 15621, "loss": 0.4986, "lr": 1.9401503708828665e-06, "epoch": 0.1997311311695794, "percentage": 19.97, "elapsed_time": "0:10:51", "remaining_time": "0:43:28", "throughput": 15065.29, "total_tokens": 9808192} +{"current_steps": 3125, "total_steps": 15621, "loss": 0.5774, "lr": 1.939769029812719e-06, "epoch": 0.2000512131105563, "percentage": 20.01, "elapsed_time": "0:10:51", "remaining_time": "0:43:25", "throughput": 15074.63, "total_tokens": 9823232} +{"current_steps": 3128, "total_steps": 15621, "eval_loss": 0.48840755224227905, "epoch": 0.20024326227514244, "percentage": 20.02, "elapsed_time": "0:11:41", "remaining_time": "0:46:40", "throughput": 14022.09, "total_tokens": 9832064} +{"current_steps": 3130, "total_steps": 15621, "loss": 0.5893, "lr": 1.939386515429904e-06, "epoch": 0.2003712950515332, "percentage": 20.04, "elapsed_time": "0:12:14", "remaining_time": "0:48:51", "throughput": 13395.05, "total_tokens": 9839488} +{"current_steps": 3135, "total_steps": 15621, "loss": 0.421, "lr": 1.9390028282119942e-06, "epoch": 0.20069137699251008, "percentage": 20.07, "elapsed_time": "0:12:15", "remaining_time": "0:48:48", "throughput": 13406.17, "total_tokens": 9856192} +{"current_steps": 3140, "total_steps": 15621, "loss": 0.5122, "lr": 1.938617968638029e-06, "epoch": 0.201011458933487, "percentage": 20.1, "elapsed_time": "0:12:15", "remaining_time": "0:48:44", "throughput": 13415.9, "total_tokens": 9871552} +{"current_steps": 3145, "total_steps": 15621, "loss": 0.5077, "lr": 1.938231937188509e-06, "epoch": 0.20133154087446387, "percentage": 20.13, "elapsed_time": "0:12:16", "remaining_time": "0:48:41", "throughput": 13425.03, "total_tokens": 9886016} +{"current_steps": 3150, "total_steps": 15621, "loss": 0.6156, "lr": 1.9378447343453995e-06, "epoch": 0.20165162281544075, "percentage": 20.17, "elapsed_time": "0:12:17", "remaining_time": "0:48:38", "throughput": 13436.7, "total_tokens": 9903552} +{"current_steps": 3155, "total_steps": 15621, "loss": 0.3458, "lr": 1.9374563605921275e-06, "epoch": 0.20197170475641765, "percentage": 20.2, "elapsed_time": "0:12:17", "remaining_time": "0:48:34", "throughput": 13447.73, "total_tokens": 9920320} +{"current_steps": 3160, "total_steps": 15621, "loss": 0.5926, "lr": 1.937066816413582e-06, "epoch": 0.20229178669739453, "percentage": 20.23, "elapsed_time": "0:12:18", "remaining_time": "0:48:31", "throughput": 13457.85, "total_tokens": 9935936} +{"current_steps": 3165, "total_steps": 15621, "loss": 0.4757, "lr": 1.9366761022961146e-06, "epoch": 0.2026118686383714, "percentage": 20.26, "elapsed_time": "0:12:18", "remaining_time": "0:48:27", "throughput": 13467.24, "total_tokens": 9950912} +{"current_steps": 3170, "total_steps": 15621, "loss": 0.5615, "lr": 1.9362842187275354e-06, "epoch": 0.20293195057934832, "percentage": 20.29, "elapsed_time": "0:12:19", "remaining_time": "0:48:24", "throughput": 13476.81, "total_tokens": 9966080} +{"current_steps": 3175, "total_steps": 15621, "loss": 0.4789, "lr": 1.9358911661971155e-06, "epoch": 0.2032520325203252, "percentage": 20.33, "elapsed_time": "0:12:20", "remaining_time": "0:48:21", "throughput": 13487.08, "total_tokens": 9982080} +{"current_steps": 3180, "total_steps": 15621, "loss": 0.4647, "lr": 1.9354969451955864e-06, "epoch": 0.2035721144613021, "percentage": 20.36, "elapsed_time": "0:12:20", "remaining_time": "0:48:17", "throughput": 13496.09, "total_tokens": 9996544} +{"current_steps": 3185, "total_steps": 15621, "loss": 0.5497, "lr": 1.9351015562151375e-06, "epoch": 0.20389219640227899, "percentage": 20.39, "elapsed_time": "0:12:21", "remaining_time": "0:48:14", "throughput": 13505.8, "total_tokens": 10011776} +{"current_steps": 3190, "total_steps": 15621, "loss": 0.4331, "lr": 1.934704999749416e-06, "epoch": 0.20421227834325587, "percentage": 20.42, "elapsed_time": "0:12:21", "remaining_time": "0:48:11", "throughput": 13515.61, "total_tokens": 10027264} +{"current_steps": 3195, "total_steps": 15621, "loss": 0.4203, "lr": 1.9343072762935274e-06, "epoch": 0.20453236028423277, "percentage": 20.45, "elapsed_time": "0:12:22", "remaining_time": "0:48:07", "throughput": 13525.24, "total_tokens": 10042432} +{"current_steps": 3200, "total_steps": 15621, "loss": 0.4135, "lr": 1.933908386344035e-06, "epoch": 0.20485244222520965, "percentage": 20.49, "elapsed_time": "0:12:23", "remaining_time": "0:48:04", "throughput": 13534.8, "total_tokens": 10057792} +{"current_steps": 3205, "total_steps": 15621, "loss": 0.5222, "lr": 1.9335083303989565e-06, "epoch": 0.20517252416618653, "percentage": 20.52, "elapsed_time": "0:12:23", "remaining_time": "0:48:01", "throughput": 13546.0, "total_tokens": 10074752} +{"current_steps": 3210, "total_steps": 15621, "loss": 0.576, "lr": 1.9331071089577674e-06, "epoch": 0.20549260610716344, "percentage": 20.55, "elapsed_time": "0:12:24", "remaining_time": "0:47:57", "throughput": 13556.25, "total_tokens": 10090752} +{"current_steps": 3215, "total_steps": 15621, "loss": 0.4961, "lr": 1.9327047225213963e-06, "epoch": 0.20581268804814032, "percentage": 20.58, "elapsed_time": "0:12:24", "remaining_time": "0:47:54", "throughput": 13565.89, "total_tokens": 10106240} +{"current_steps": 3220, "total_steps": 15621, "loss": 0.4128, "lr": 1.9323011715922283e-06, "epoch": 0.20613276998911723, "percentage": 20.61, "elapsed_time": "0:12:25", "remaining_time": "0:47:51", "throughput": 13575.7, "total_tokens": 10121856} +{"current_steps": 3225, "total_steps": 15621, "loss": 0.4764, "lr": 1.931896456674101e-06, "epoch": 0.2064528519300941, "percentage": 20.65, "elapsed_time": "0:12:26", "remaining_time": "0:47:48", "throughput": 13585.52, "total_tokens": 10137408} +{"current_steps": 3230, "total_steps": 15621, "loss": 0.4548, "lr": 1.931490578272306e-06, "epoch": 0.20677293387107099, "percentage": 20.68, "elapsed_time": "0:12:26", "remaining_time": "0:47:44", "throughput": 13594.98, "total_tokens": 10152640} +{"current_steps": 3235, "total_steps": 15621, "loss": 0.3538, "lr": 1.9310835368935867e-06, "epoch": 0.2070930158120479, "percentage": 20.71, "elapsed_time": "0:12:27", "remaining_time": "0:47:41", "throughput": 13604.45, "total_tokens": 10167936} +{"current_steps": 3240, "total_steps": 15621, "loss": 0.4205, "lr": 1.93067533304614e-06, "epoch": 0.20741309775302477, "percentage": 20.74, "elapsed_time": "0:12:27", "remaining_time": "0:47:38", "throughput": 13614.12, "total_tokens": 10183360} +{"current_steps": 3245, "total_steps": 15621, "loss": 0.5557, "lr": 1.9302659672396128e-06, "epoch": 0.20773317969400165, "percentage": 20.77, "elapsed_time": "0:12:28", "remaining_time": "0:47:34", "throughput": 13623.39, "total_tokens": 10198208} +{"current_steps": 3250, "total_steps": 15621, "loss": 0.4903, "lr": 1.9298554399851025e-06, "epoch": 0.20805326163497856, "percentage": 20.81, "elapsed_time": "0:12:29", "remaining_time": "0:47:31", "throughput": 13632.9, "total_tokens": 10213568} +{"current_steps": 3255, "total_steps": 15621, "loss": 0.4833, "lr": 1.929443751795158e-06, "epoch": 0.20837334357595544, "percentage": 20.84, "elapsed_time": "0:12:29", "remaining_time": "0:47:28", "throughput": 13643.36, "total_tokens": 10230080} +{"current_steps": 3260, "total_steps": 15621, "loss": 0.4759, "lr": 1.929030903183776e-06, "epoch": 0.20869342551693235, "percentage": 20.87, "elapsed_time": "0:12:30", "remaining_time": "0:47:25", "throughput": 13654.0, "total_tokens": 10246912} +{"current_steps": 3265, "total_steps": 15621, "loss": 0.5368, "lr": 1.9286168946664033e-06, "epoch": 0.20901350745790923, "percentage": 20.9, "elapsed_time": "0:12:31", "remaining_time": "0:47:22", "throughput": 13663.56, "total_tokens": 10262464} +{"current_steps": 3270, "total_steps": 15621, "loss": 0.6679, "lr": 1.9282017267599352e-06, "epoch": 0.2093335893988861, "percentage": 20.93, "elapsed_time": "0:12:31", "remaining_time": "0:47:19", "throughput": 13673.14, "total_tokens": 10278016} +{"current_steps": 3275, "total_steps": 15621, "loss": 0.5054, "lr": 1.9277853999827125e-06, "epoch": 0.209653671339863, "percentage": 20.97, "elapsed_time": "0:12:32", "remaining_time": "0:47:16", "throughput": 13682.95, "total_tokens": 10293824} +{"current_steps": 3280, "total_steps": 15621, "loss": 0.5116, "lr": 1.9273679148545244e-06, "epoch": 0.2099737532808399, "percentage": 21.0, "elapsed_time": "0:12:32", "remaining_time": "0:47:12", "throughput": 13692.8, "total_tokens": 10309568} +{"current_steps": 3285, "total_steps": 15621, "loss": 0.4229, "lr": 1.9269492718966062e-06, "epoch": 0.21029383522181677, "percentage": 21.03, "elapsed_time": "0:12:33", "remaining_time": "0:47:09", "throughput": 13702.86, "total_tokens": 10325696} +{"current_steps": 3290, "total_steps": 15621, "loss": 0.5261, "lr": 1.9265294716316384e-06, "epoch": 0.21061391716279368, "percentage": 21.06, "elapsed_time": "0:12:34", "remaining_time": "0:47:06", "throughput": 13713.04, "total_tokens": 10342016} +{"current_steps": 3295, "total_steps": 15621, "loss": 0.4688, "lr": 1.926108514583747e-06, "epoch": 0.21093399910377056, "percentage": 21.09, "elapsed_time": "0:12:34", "remaining_time": "0:47:03", "throughput": 13722.54, "total_tokens": 10357632} +{"current_steps": 3300, "total_steps": 15621, "loss": 0.4801, "lr": 1.925686401278501e-06, "epoch": 0.21125408104474747, "percentage": 21.13, "elapsed_time": "0:12:35", "remaining_time": "0:47:00", "throughput": 13731.96, "total_tokens": 10373056} +{"current_steps": 3305, "total_steps": 15621, "loss": 0.6373, "lr": 1.9252631322429143e-06, "epoch": 0.21157416298572435, "percentage": 21.16, "elapsed_time": "0:12:36", "remaining_time": "0:46:57", "throughput": 13742.05, "total_tokens": 10389248} +{"current_steps": 3310, "total_steps": 15621, "loss": 0.439, "lr": 1.9248387080054435e-06, "epoch": 0.21189424492670123, "percentage": 21.19, "elapsed_time": "0:12:36", "remaining_time": "0:46:54", "throughput": 13751.68, "total_tokens": 10404864} +{"current_steps": 3315, "total_steps": 15621, "loss": 0.4878, "lr": 1.9244131290959864e-06, "epoch": 0.21221432686767813, "percentage": 21.22, "elapsed_time": "0:12:37", "remaining_time": "0:46:51", "throughput": 13761.03, "total_tokens": 10420416} +{"current_steps": 3320, "total_steps": 15621, "loss": 0.4244, "lr": 1.9239863960458845e-06, "epoch": 0.212534408808655, "percentage": 21.25, "elapsed_time": "0:12:37", "remaining_time": "0:46:47", "throughput": 13769.99, "total_tokens": 10435456} +{"current_steps": 3325, "total_steps": 15621, "loss": 0.4881, "lr": 1.923558509387918e-06, "epoch": 0.21285449074963192, "percentage": 21.29, "elapsed_time": "0:12:38", "remaining_time": "0:46:44", "throughput": 13779.88, "total_tokens": 10451584} +{"current_steps": 3330, "total_steps": 15621, "loss": 0.3745, "lr": 1.9231294696563086e-06, "epoch": 0.2131745726906088, "percentage": 21.32, "elapsed_time": "0:12:39", "remaining_time": "0:46:41", "throughput": 13789.71, "total_tokens": 10467584} +{"current_steps": 3335, "total_steps": 15621, "loss": 0.4146, "lr": 1.922699277386718e-06, "epoch": 0.21349465463158568, "percentage": 21.35, "elapsed_time": "0:12:39", "remaining_time": "0:46:38", "throughput": 13799.17, "total_tokens": 10483264} +{"current_steps": 3340, "total_steps": 15621, "loss": 0.5865, "lr": 1.9222679331162454e-06, "epoch": 0.21381473657256259, "percentage": 21.38, "elapsed_time": "0:12:40", "remaining_time": "0:46:35", "throughput": 13808.34, "total_tokens": 10498560} +{"current_steps": 3345, "total_steps": 15621, "loss": 0.4515, "lr": 1.92183543738343e-06, "epoch": 0.21413481851353947, "percentage": 21.41, "elapsed_time": "0:12:40", "remaining_time": "0:46:32", "throughput": 13817.71, "total_tokens": 10514176} +{"current_steps": 3350, "total_steps": 15621, "loss": 0.4363, "lr": 1.9214017907282475e-06, "epoch": 0.21445490045451635, "percentage": 21.45, "elapsed_time": "0:12:41", "remaining_time": "0:46:29", "throughput": 13827.24, "total_tokens": 10529792} +{"current_steps": 3355, "total_steps": 15621, "loss": 0.4809, "lr": 1.9209669936921105e-06, "epoch": 0.21477498239549325, "percentage": 21.48, "elapsed_time": "0:12:42", "remaining_time": "0:46:26", "throughput": 13836.94, "total_tokens": 10545856} +{"current_steps": 3360, "total_steps": 15621, "loss": 0.4092, "lr": 1.920531046817869e-06, "epoch": 0.21509506433647013, "percentage": 21.51, "elapsed_time": "0:12:42", "remaining_time": "0:46:23", "throughput": 13846.95, "total_tokens": 10562368} +{"current_steps": 3365, "total_steps": 15621, "loss": 0.6238, "lr": 1.9200939506498067e-06, "epoch": 0.21541514627744704, "percentage": 21.54, "elapsed_time": "0:12:43", "remaining_time": "0:46:20", "throughput": 13855.72, "total_tokens": 10577280} +{"current_steps": 3370, "total_steps": 15621, "loss": 0.5817, "lr": 1.9196557057336446e-06, "epoch": 0.21573522821842392, "percentage": 21.57, "elapsed_time": "0:12:43", "remaining_time": "0:46:17", "throughput": 13864.71, "total_tokens": 10592384} +{"current_steps": 3375, "total_steps": 15621, "loss": 0.4498, "lr": 1.9192163126165354e-06, "epoch": 0.2160553101594008, "percentage": 21.61, "elapsed_time": "0:12:44", "remaining_time": "0:46:14", "throughput": 13874.42, "total_tokens": 10608704} +{"current_steps": 3380, "total_steps": 15621, "loss": 0.3997, "lr": 1.9187757718470673e-06, "epoch": 0.2163753921003777, "percentage": 21.64, "elapsed_time": "0:12:45", "remaining_time": "0:46:11", "throughput": 13884.35, "total_tokens": 10625280} +{"current_steps": 3385, "total_steps": 15621, "loss": 0.5339, "lr": 1.9183340839752606e-06, "epoch": 0.21669547404135459, "percentage": 21.67, "elapsed_time": "0:12:45", "remaining_time": "0:46:08", "throughput": 13893.9, "total_tokens": 10641152} +{"current_steps": 3390, "total_steps": 15621, "loss": 0.4193, "lr": 1.9178912495525672e-06, "epoch": 0.21701555598233147, "percentage": 21.7, "elapsed_time": "0:12:46", "remaining_time": "0:46:05", "throughput": 13903.56, "total_tokens": 10657472} +{"current_steps": 3395, "total_steps": 15621, "loss": 0.5054, "lr": 1.917447269131872e-06, "epoch": 0.21733563792330837, "percentage": 21.73, "elapsed_time": "0:12:47", "remaining_time": "0:46:02", "throughput": 13913.27, "total_tokens": 10673600} +{"current_steps": 3400, "total_steps": 15621, "loss": 0.5693, "lr": 1.917002143267489e-06, "epoch": 0.21765571986428525, "percentage": 21.77, "elapsed_time": "0:12:47", "remaining_time": "0:45:59", "throughput": 13922.56, "total_tokens": 10689344} +{"current_steps": 3405, "total_steps": 15621, "loss": 0.4478, "lr": 1.9165558725151633e-06, "epoch": 0.21797580180526216, "percentage": 21.8, "elapsed_time": "0:12:48", "remaining_time": "0:45:56", "throughput": 13931.3, "total_tokens": 10704384} +{"current_steps": 3410, "total_steps": 15621, "loss": 0.5002, "lr": 1.9161084574320692e-06, "epoch": 0.21829588374623904, "percentage": 21.83, "elapsed_time": "0:12:48", "remaining_time": "0:45:53", "throughput": 13941.0, "total_tokens": 10720512} +{"current_steps": 3415, "total_steps": 15621, "loss": 0.4727, "lr": 1.91565989857681e-06, "epoch": 0.21861596568721592, "percentage": 21.86, "elapsed_time": "0:12:49", "remaining_time": "0:45:50", "throughput": 13949.87, "total_tokens": 10735744} +{"current_steps": 3420, "total_steps": 15621, "loss": 0.4573, "lr": 1.9152101965094162e-06, "epoch": 0.21893604762819283, "percentage": 21.89, "elapsed_time": "0:12:50", "remaining_time": "0:45:47", "throughput": 13958.64, "total_tokens": 10750848} +{"current_steps": 3425, "total_steps": 15621, "loss": 0.4878, "lr": 1.9147593517913464e-06, "epoch": 0.2192561295691697, "percentage": 21.93, "elapsed_time": "0:12:50", "remaining_time": "0:45:44", "throughput": 13967.07, "total_tokens": 10765632} +{"current_steps": 3430, "total_steps": 15621, "loss": 0.3856, "lr": 1.914307364985485e-06, "epoch": 0.21957621151014659, "percentage": 21.96, "elapsed_time": "0:12:51", "remaining_time": "0:45:41", "throughput": 13975.87, "total_tokens": 10780928} +{"current_steps": 3435, "total_steps": 15621, "loss": 0.4217, "lr": 1.913854236656144e-06, "epoch": 0.2198962934511235, "percentage": 21.99, "elapsed_time": "0:12:52", "remaining_time": "0:45:38", "throughput": 13985.17, "total_tokens": 10796864} +{"current_steps": 3440, "total_steps": 15621, "loss": 0.4653, "lr": 1.9133999673690584e-06, "epoch": 0.22021637539210037, "percentage": 22.02, "elapsed_time": "0:12:52", "remaining_time": "0:45:35", "throughput": 13994.61, "total_tokens": 10812672} +{"current_steps": 3445, "total_steps": 15621, "loss": 0.4709, "lr": 1.9129445576913886e-06, "epoch": 0.22053645733307728, "percentage": 22.05, "elapsed_time": "0:12:53", "remaining_time": "0:45:32", "throughput": 14004.01, "total_tokens": 10828544} +{"current_steps": 3450, "total_steps": 15621, "loss": 0.5335, "lr": 1.91248800819172e-06, "epoch": 0.22085653927405416, "percentage": 22.09, "elapsed_time": "0:12:53", "remaining_time": "0:45:30", "throughput": 14013.28, "total_tokens": 10844288} +{"current_steps": 3455, "total_steps": 15621, "loss": 0.5192, "lr": 1.912030319440059e-06, "epoch": 0.22117662121503104, "percentage": 22.12, "elapsed_time": "0:12:54", "remaining_time": "0:45:27", "throughput": 14022.58, "total_tokens": 10860160} +{"current_steps": 3460, "total_steps": 15621, "loss": 0.6043, "lr": 1.9115714920078354e-06, "epoch": 0.22149670315600795, "percentage": 22.15, "elapsed_time": "0:12:55", "remaining_time": "0:45:24", "throughput": 14031.77, "total_tokens": 10875968} +{"current_steps": 3465, "total_steps": 15621, "loss": 0.3252, "lr": 1.9111115264679017e-06, "epoch": 0.22181678509698483, "percentage": 22.18, "elapsed_time": "0:12:55", "remaining_time": "0:45:21", "throughput": 14041.31, "total_tokens": 10892096} +{"current_steps": 3470, "total_steps": 15621, "loss": 0.4378, "lr": 1.910650423394529e-06, "epoch": 0.2221368670379617, "percentage": 22.21, "elapsed_time": "0:12:56", "remaining_time": "0:45:18", "throughput": 14051.02, "total_tokens": 10908544} +{"current_steps": 3475, "total_steps": 15621, "loss": 0.4817, "lr": 1.910188183363411e-06, "epoch": 0.2224569489789386, "percentage": 22.25, "elapsed_time": "0:12:56", "remaining_time": "0:45:15", "throughput": 14060.44, "total_tokens": 10924544} +{"current_steps": 3480, "total_steps": 15621, "loss": 0.4441, "lr": 1.909724806951659e-06, "epoch": 0.2227770309199155, "percentage": 22.28, "elapsed_time": "0:12:57", "remaining_time": "0:45:12", "throughput": 14070.8, "total_tokens": 10941888} +{"current_steps": 3485, "total_steps": 15621, "loss": 0.4669, "lr": 1.909260294737804e-06, "epoch": 0.2230971128608924, "percentage": 22.31, "elapsed_time": "0:12:58", "remaining_time": "0:45:10", "throughput": 14080.74, "total_tokens": 10958592} +{"current_steps": 3490, "total_steps": 15621, "loss": 0.555, "lr": 1.9087946473017953e-06, "epoch": 0.22341719480186928, "percentage": 22.34, "elapsed_time": "0:12:58", "remaining_time": "0:45:07", "throughput": 14089.72, "total_tokens": 10974208} +{"current_steps": 3495, "total_steps": 15621, "loss": 0.4304, "lr": 1.9083278652249992e-06, "epoch": 0.22373727674284616, "percentage": 22.37, "elapsed_time": "0:12:59", "remaining_time": "0:45:04", "throughput": 14098.0, "total_tokens": 10988928} +{"current_steps": 3500, "total_steps": 15621, "loss": 0.425, "lr": 1.9078599490901983e-06, "epoch": 0.22405735868382307, "percentage": 22.41, "elapsed_time": "0:13:00", "remaining_time": "0:45:01", "throughput": 14108.19, "total_tokens": 11005952} +{"current_steps": 3505, "total_steps": 15621, "loss": 0.3971, "lr": 1.9073908994815914e-06, "epoch": 0.22437744062479995, "percentage": 22.44, "elapsed_time": "0:13:00", "remaining_time": "0:44:58", "throughput": 14116.41, "total_tokens": 11020608} +{"current_steps": 3510, "total_steps": 15621, "loss": 0.4862, "lr": 1.9069207169847928e-06, "epoch": 0.22469752256577685, "percentage": 22.47, "elapsed_time": "0:13:01", "remaining_time": "0:44:55", "throughput": 14125.85, "total_tokens": 11036736} +{"current_steps": 3515, "total_steps": 15621, "loss": 0.3584, "lr": 1.9064494021868302e-06, "epoch": 0.22501760450675373, "percentage": 22.5, "elapsed_time": "0:13:01", "remaining_time": "0:44:53", "throughput": 14135.05, "total_tokens": 11052480} +{"current_steps": 3520, "total_steps": 15621, "loss": 0.48, "lr": 1.9059769556761464e-06, "epoch": 0.2253376864477306, "percentage": 22.53, "elapsed_time": "0:13:02", "remaining_time": "0:44:50", "throughput": 14144.3, "total_tokens": 11068416} +{"current_steps": 3525, "total_steps": 15621, "loss": 0.4454, "lr": 1.9055033780425962e-06, "epoch": 0.22565776838870752, "percentage": 22.57, "elapsed_time": "0:13:03", "remaining_time": "0:44:47", "throughput": 14155.15, "total_tokens": 11086400} +{"current_steps": 3530, "total_steps": 15621, "loss": 0.562, "lr": 1.9050286698774464e-06, "epoch": 0.2259778503296844, "percentage": 22.6, "elapsed_time": "0:13:03", "remaining_time": "0:44:44", "throughput": 14164.63, "total_tokens": 11102848} +{"current_steps": 3535, "total_steps": 15621, "loss": 0.5359, "lr": 1.904552831773376e-06, "epoch": 0.22629793227066128, "percentage": 22.63, "elapsed_time": "0:13:04", "remaining_time": "0:44:41", "throughput": 14173.27, "total_tokens": 11118080} +{"current_steps": 3540, "total_steps": 15621, "loss": 0.4967, "lr": 1.9040758643244748e-06, "epoch": 0.22661801421163819, "percentage": 22.66, "elapsed_time": "0:13:05", "remaining_time": "0:44:39", "throughput": 14181.72, "total_tokens": 11133120} +{"current_steps": 3545, "total_steps": 15621, "loss": 0.4694, "lr": 1.903597768126242e-06, "epoch": 0.22693809615261507, "percentage": 22.69, "elapsed_time": "0:13:05", "remaining_time": "0:44:36", "throughput": 14191.56, "total_tokens": 11150144} +{"current_steps": 3550, "total_steps": 15621, "loss": 0.4787, "lr": 1.9031185437755862e-06, "epoch": 0.22725817809359197, "percentage": 22.73, "elapsed_time": "0:13:06", "remaining_time": "0:44:33", "throughput": 14200.35, "total_tokens": 11165760} +{"current_steps": 3555, "total_steps": 15621, "loss": 0.4582, "lr": 1.9026381918708246e-06, "epoch": 0.22757826003456885, "percentage": 22.76, "elapsed_time": "0:13:06", "remaining_time": "0:44:30", "throughput": 14208.27, "total_tokens": 11180096} +{"current_steps": 3560, "total_steps": 15621, "loss": 0.3618, "lr": 1.9021567130116822e-06, "epoch": 0.22789834197554573, "percentage": 22.79, "elapsed_time": "0:13:07", "remaining_time": "0:44:27", "throughput": 14216.77, "total_tokens": 11195584} +{"current_steps": 3565, "total_steps": 15621, "loss": 0.3909, "lr": 1.9016741077992916e-06, "epoch": 0.22821842391652264, "percentage": 22.82, "elapsed_time": "0:13:08", "remaining_time": "0:44:25", "throughput": 14225.4, "total_tokens": 11210944} +{"current_steps": 3570, "total_steps": 15621, "loss": 0.4052, "lr": 1.90119037683619e-06, "epoch": 0.22853850585749952, "percentage": 22.85, "elapsed_time": "0:13:08", "remaining_time": "0:44:22", "throughput": 14234.87, "total_tokens": 11227392} +{"current_steps": 3575, "total_steps": 15621, "loss": 0.6492, "lr": 1.9007055207263223e-06, "epoch": 0.2288585877984764, "percentage": 22.89, "elapsed_time": "0:13:09", "remaining_time": "0:44:19", "throughput": 14244.66, "total_tokens": 11244416} +{"current_steps": 3580, "total_steps": 15621, "loss": 0.3588, "lr": 1.900219540075036e-06, "epoch": 0.2291786697394533, "percentage": 22.92, "elapsed_time": "0:13:10", "remaining_time": "0:44:17", "throughput": 14253.93, "total_tokens": 11260672} +{"current_steps": 3585, "total_steps": 15621, "loss": 0.4749, "lr": 1.8997324354890845e-06, "epoch": 0.22949875168043019, "percentage": 22.95, "elapsed_time": "0:13:10", "remaining_time": "0:44:14", "throughput": 14263.61, "total_tokens": 11277504} +{"current_steps": 3590, "total_steps": 15621, "loss": 0.539, "lr": 1.8992442075766233e-06, "epoch": 0.2298188336214071, "percentage": 22.98, "elapsed_time": "0:13:11", "remaining_time": "0:44:11", "throughput": 14272.31, "total_tokens": 11293184} +{"current_steps": 3595, "total_steps": 15621, "loss": 0.3191, "lr": 1.8987548569472105e-06, "epoch": 0.23013891556238397, "percentage": 23.01, "elapsed_time": "0:13:11", "remaining_time": "0:44:08", "throughput": 14280.9, "total_tokens": 11308480} +{"current_steps": 3600, "total_steps": 15621, "loss": 0.396, "lr": 1.8982643842118064e-06, "epoch": 0.23045899750336085, "percentage": 23.05, "elapsed_time": "0:13:12", "remaining_time": "0:44:06", "throughput": 14289.43, "total_tokens": 11323840} +{"current_steps": 3605, "total_steps": 15621, "loss": 0.5821, "lr": 1.8977727899827716e-06, "epoch": 0.23077907944433776, "percentage": 23.08, "elapsed_time": "0:13:13", "remaining_time": "0:44:03", "throughput": 14298.21, "total_tokens": 11339456} +{"current_steps": 3610, "total_steps": 15621, "loss": 0.6554, "lr": 1.8972800748738678e-06, "epoch": 0.23109916138531464, "percentage": 23.11, "elapsed_time": "0:13:13", "remaining_time": "0:44:00", "throughput": 14306.66, "total_tokens": 11354880} +{"current_steps": 3615, "total_steps": 15621, "loss": 0.5226, "lr": 1.896786239500255e-06, "epoch": 0.23141924332629152, "percentage": 23.14, "elapsed_time": "0:13:14", "remaining_time": "0:43:57", "throughput": 14315.04, "total_tokens": 11369984} +{"current_steps": 3620, "total_steps": 15621, "loss": 0.429, "lr": 1.8962912844784928e-06, "epoch": 0.23173932526726843, "percentage": 23.17, "elapsed_time": "0:13:14", "remaining_time": "0:43:55", "throughput": 14323.08, "total_tokens": 11384640} +{"current_steps": 3625, "total_steps": 15621, "loss": 0.4945, "lr": 1.8957952104265384e-06, "epoch": 0.2320594072082453, "percentage": 23.21, "elapsed_time": "0:13:15", "remaining_time": "0:43:52", "throughput": 14332.44, "total_tokens": 11401152} +{"current_steps": 3630, "total_steps": 15621, "loss": 0.4535, "lr": 1.8952980179637458e-06, "epoch": 0.2323794891492222, "percentage": 23.24, "elapsed_time": "0:13:16", "remaining_time": "0:43:49", "throughput": 14341.2, "total_tokens": 11416896} +{"current_steps": 3635, "total_steps": 15621, "loss": 0.4899, "lr": 1.8947997077108662e-06, "epoch": 0.2326995710901991, "percentage": 23.27, "elapsed_time": "0:13:16", "remaining_time": "0:43:47", "throughput": 14350.12, "total_tokens": 11432832} +{"current_steps": 3640, "total_steps": 15621, "loss": 0.4807, "lr": 1.894300280290045e-06, "epoch": 0.23301965303117597, "percentage": 23.3, "elapsed_time": "0:13:17", "remaining_time": "0:43:44", "throughput": 14358.63, "total_tokens": 11448320} +{"current_steps": 3645, "total_steps": 15621, "loss": 0.5674, "lr": 1.8937997363248237e-06, "epoch": 0.23333973497215288, "percentage": 23.33, "elapsed_time": "0:13:17", "remaining_time": "0:43:41", "throughput": 14366.83, "total_tokens": 11463488} +{"current_steps": 3650, "total_steps": 15621, "loss": 0.4527, "lr": 1.8932980764401373e-06, "epoch": 0.23365981691312976, "percentage": 23.37, "elapsed_time": "0:13:18", "remaining_time": "0:43:38", "throughput": 14375.11, "total_tokens": 11478592} +{"current_steps": 3655, "total_steps": 15621, "loss": 0.3564, "lr": 1.8927953012623141e-06, "epoch": 0.23397989885410664, "percentage": 23.4, "elapsed_time": "0:13:19", "remaining_time": "0:43:36", "throughput": 14383.89, "total_tokens": 11494720} +{"current_steps": 3660, "total_steps": 15621, "loss": 0.4846, "lr": 1.8922914114190744e-06, "epoch": 0.23429998079508355, "percentage": 23.43, "elapsed_time": "0:13:19", "remaining_time": "0:43:33", "throughput": 14392.95, "total_tokens": 11511232} +{"current_steps": 3665, "total_steps": 15621, "loss": 0.5093, "lr": 1.8917864075395312e-06, "epoch": 0.23462006273606043, "percentage": 23.46, "elapsed_time": "0:13:20", "remaining_time": "0:43:31", "throughput": 14401.62, "total_tokens": 11527040} +{"current_steps": 3670, "total_steps": 15621, "loss": 0.4461, "lr": 1.8912802902541873e-06, "epoch": 0.23494014467703733, "percentage": 23.49, "elapsed_time": "0:13:21", "remaining_time": "0:43:28", "throughput": 14410.09, "total_tokens": 11542528} +{"current_steps": 3675, "total_steps": 15621, "loss": 0.4974, "lr": 1.8907730601949362e-06, "epoch": 0.2352602266180142, "percentage": 23.53, "elapsed_time": "0:13:21", "remaining_time": "0:43:25", "throughput": 14418.27, "total_tokens": 11557696} +{"current_steps": 3680, "total_steps": 15621, "loss": 0.4648, "lr": 1.8902647179950608e-06, "epoch": 0.2355803085589911, "percentage": 23.56, "elapsed_time": "0:13:22", "remaining_time": "0:43:23", "throughput": 14427.94, "total_tokens": 11574848} +{"current_steps": 3685, "total_steps": 15621, "loss": 0.5108, "lr": 1.889755264289232e-06, "epoch": 0.235900390499968, "percentage": 23.59, "elapsed_time": "0:13:22", "remaining_time": "0:43:20", "throughput": 14435.88, "total_tokens": 11589696} +{"current_steps": 3690, "total_steps": 15621, "loss": 0.384, "lr": 1.8892446997135087e-06, "epoch": 0.23622047244094488, "percentage": 23.62, "elapsed_time": "0:13:23", "remaining_time": "0:43:17", "throughput": 14445.5, "total_tokens": 11606848} +{"current_steps": 3695, "total_steps": 15621, "loss": 0.6707, "lr": 1.888733024905337e-06, "epoch": 0.23654055438192176, "percentage": 23.65, "elapsed_time": "0:13:24", "remaining_time": "0:43:15", "throughput": 14455.04, "total_tokens": 11623744} +{"current_steps": 3700, "total_steps": 15621, "loss": 0.4755, "lr": 1.888220240503549e-06, "epoch": 0.23686063632289867, "percentage": 23.69, "elapsed_time": "0:13:24", "remaining_time": "0:43:12", "throughput": 14464.13, "total_tokens": 11640256} +{"current_steps": 3705, "total_steps": 15621, "loss": 0.412, "lr": 1.8877063471483618e-06, "epoch": 0.23718071826387555, "percentage": 23.72, "elapsed_time": "0:13:25", "remaining_time": "0:43:10", "throughput": 14472.46, "total_tokens": 11655744} +{"current_steps": 3710, "total_steps": 15621, "loss": 0.2935, "lr": 1.8871913454813772e-06, "epoch": 0.23750080020485245, "percentage": 23.75, "elapsed_time": "0:13:25", "remaining_time": "0:43:07", "throughput": 14480.84, "total_tokens": 11671104} +{"current_steps": 3715, "total_steps": 15621, "loss": 0.3898, "lr": 1.886675236145581e-06, "epoch": 0.23782088214582933, "percentage": 23.78, "elapsed_time": "0:13:26", "remaining_time": "0:43:04", "throughput": 14489.49, "total_tokens": 11686848} +{"current_steps": 3720, "total_steps": 15621, "loss": 0.5018, "lr": 1.8861580197853422e-06, "epoch": 0.2381409640868062, "percentage": 23.81, "elapsed_time": "0:13:27", "remaining_time": "0:43:02", "throughput": 14497.6, "total_tokens": 11701952} +{"current_steps": 3725, "total_steps": 15621, "loss": 0.4647, "lr": 1.8856396970464105e-06, "epoch": 0.23846104602778312, "percentage": 23.85, "elapsed_time": "0:13:27", "remaining_time": "0:42:59", "throughput": 14506.76, "total_tokens": 11718592} +{"current_steps": 3730, "total_steps": 15621, "loss": 0.5143, "lr": 1.8851202685759189e-06, "epoch": 0.23878112796876, "percentage": 23.88, "elapsed_time": "0:13:28", "remaining_time": "0:42:57", "throughput": 14515.19, "total_tokens": 11734208} +{"current_steps": 3735, "total_steps": 15621, "loss": 0.407, "lr": 1.8845997350223792e-06, "epoch": 0.2391012099097369, "percentage": 23.91, "elapsed_time": "0:13:28", "remaining_time": "0:42:54", "throughput": 14523.03, "total_tokens": 11748992} +{"current_steps": 3740, "total_steps": 15621, "loss": 0.4217, "lr": 1.8840780970356842e-06, "epoch": 0.23942129185071379, "percentage": 23.94, "elapsed_time": "0:13:29", "remaining_time": "0:42:51", "throughput": 14531.41, "total_tokens": 11764608} +{"current_steps": 3745, "total_steps": 15621, "loss": 0.4078, "lr": 1.8835553552671048e-06, "epoch": 0.23974137379169067, "percentage": 23.97, "elapsed_time": "0:13:30", "remaining_time": "0:42:49", "throughput": 14540.21, "total_tokens": 11780800} +{"current_steps": 3750, "total_steps": 15621, "loss": 0.4593, "lr": 1.8830315103692902e-06, "epoch": 0.24006145573266757, "percentage": 24.01, "elapsed_time": "0:13:30", "remaining_time": "0:42:46", "throughput": 14548.11, "total_tokens": 11795776} +{"current_steps": 3755, "total_steps": 15621, "loss": 0.5071, "lr": 1.8825065629962669e-06, "epoch": 0.24038153767364445, "percentage": 24.04, "elapsed_time": "0:13:31", "remaining_time": "0:42:44", "throughput": 14556.71, "total_tokens": 11811776} +{"current_steps": 3760, "total_steps": 15621, "loss": 0.4852, "lr": 1.881980513803438e-06, "epoch": 0.24070161961462133, "percentage": 24.07, "elapsed_time": "0:13:32", "remaining_time": "0:42:41", "throughput": 14565.55, "total_tokens": 11828224} +{"current_steps": 3765, "total_steps": 15621, "loss": 0.5035, "lr": 1.881453363447582e-06, "epoch": 0.24102170155559824, "percentage": 24.1, "elapsed_time": "0:13:32", "remaining_time": "0:42:39", "throughput": 14573.72, "total_tokens": 11843904} +{"current_steps": 3770, "total_steps": 15621, "loss": 0.5574, "lr": 1.880925112586852e-06, "epoch": 0.24134178349657512, "percentage": 24.13, "elapsed_time": "0:13:33", "remaining_time": "0:42:36", "throughput": 14581.9, "total_tokens": 11859392} +{"current_steps": 3775, "total_steps": 15621, "loss": 0.4427, "lr": 1.8803957618807762e-06, "epoch": 0.24166186543755203, "percentage": 24.17, "elapsed_time": "0:13:33", "remaining_time": "0:42:34", "throughput": 14590.89, "total_tokens": 11875968} +{"current_steps": 3780, "total_steps": 15621, "loss": 0.4404, "lr": 1.8798653119902548e-06, "epoch": 0.2419819473785289, "percentage": 24.2, "elapsed_time": "0:13:34", "remaining_time": "0:42:31", "throughput": 14599.03, "total_tokens": 11891584} +{"current_steps": 3785, "total_steps": 15621, "loss": 0.5029, "lr": 1.8793337635775603e-06, "epoch": 0.24230202931950579, "percentage": 24.23, "elapsed_time": "0:13:35", "remaining_time": "0:42:29", "throughput": 14607.08, "total_tokens": 11906944} +{"current_steps": 3790, "total_steps": 15621, "loss": 0.4729, "lr": 1.8788011173063376e-06, "epoch": 0.2426221112604827, "percentage": 24.26, "elapsed_time": "0:13:35", "remaining_time": "0:42:26", "throughput": 14615.19, "total_tokens": 11922368} +{"current_steps": 3795, "total_steps": 15621, "loss": 0.5181, "lr": 1.8782673738416018e-06, "epoch": 0.24294219320145957, "percentage": 24.29, "elapsed_time": "0:13:36", "remaining_time": "0:42:23", "throughput": 14623.78, "total_tokens": 11938432} +{"current_steps": 3800, "total_steps": 15621, "loss": 0.5078, "lr": 1.877732533849737e-06, "epoch": 0.24326227514243645, "percentage": 24.33, "elapsed_time": "0:13:37", "remaining_time": "0:42:21", "throughput": 14633.86, "total_tokens": 11956608} +{"current_steps": 3805, "total_steps": 15621, "loss": 0.4394, "lr": 1.8771965979984988e-06, "epoch": 0.24358235708341336, "percentage": 24.36, "elapsed_time": "0:13:37", "remaining_time": "0:42:19", "throughput": 14642.08, "total_tokens": 11972480} +{"current_steps": 3810, "total_steps": 15621, "loss": 0.3889, "lr": 1.8766595669570084e-06, "epoch": 0.24390243902439024, "percentage": 24.39, "elapsed_time": "0:13:38", "remaining_time": "0:42:16", "throughput": 14649.52, "total_tokens": 11987072} +{"current_steps": 3815, "total_steps": 15621, "loss": 0.4361, "lr": 1.8761214413957553e-06, "epoch": 0.24422252096536715, "percentage": 24.42, "elapsed_time": "0:13:38", "remaining_time": "0:42:14", "throughput": 14657.28, "total_tokens": 12002112} +{"current_steps": 3820, "total_steps": 15621, "loss": 0.3493, "lr": 1.8755822219865963e-06, "epoch": 0.24454260290634403, "percentage": 24.45, "elapsed_time": "0:13:39", "remaining_time": "0:42:11", "throughput": 14664.84, "total_tokens": 12016960} +{"current_steps": 3825, "total_steps": 15621, "loss": 0.4331, "lr": 1.875041909402752e-06, "epoch": 0.2448626848473209, "percentage": 24.49, "elapsed_time": "0:13:40", "remaining_time": "0:42:08", "throughput": 14673.04, "total_tokens": 12032576} +{"current_steps": 3830, "total_steps": 15621, "loss": 0.3638, "lr": 1.8745005043188102e-06, "epoch": 0.2451827667882978, "percentage": 24.52, "elapsed_time": "0:13:40", "remaining_time": "0:42:06", "throughput": 14681.55, "total_tokens": 12048768} +{"current_steps": 3835, "total_steps": 15621, "loss": 0.395, "lr": 1.8739580074107208e-06, "epoch": 0.2455028487292747, "percentage": 24.55, "elapsed_time": "0:13:41", "remaining_time": "0:42:04", "throughput": 14690.29, "total_tokens": 12065088} +{"current_steps": 3840, "total_steps": 15621, "loss": 0.6844, "lr": 1.873414419355798e-06, "epoch": 0.24582293067025157, "percentage": 24.58, "elapsed_time": "0:13:41", "remaining_time": "0:42:01", "throughput": 14698.43, "total_tokens": 12080704} +{"current_steps": 3845, "total_steps": 15621, "loss": 0.4292, "lr": 1.872869740832717e-06, "epoch": 0.24614301261122848, "percentage": 24.61, "elapsed_time": "0:13:42", "remaining_time": "0:41:59", "throughput": 14706.76, "total_tokens": 12096704} +{"current_steps": 3850, "total_steps": 15621, "loss": 0.6103, "lr": 1.8723239725215165e-06, "epoch": 0.24646309455220536, "percentage": 24.65, "elapsed_time": "0:13:43", "remaining_time": "0:41:56", "throughput": 14714.21, "total_tokens": 12111488} +{"current_steps": 3855, "total_steps": 15621, "loss": 0.4206, "lr": 1.871777115103594e-06, "epoch": 0.24678317649318227, "percentage": 24.68, "elapsed_time": "0:13:43", "remaining_time": "0:41:54", "throughput": 14723.18, "total_tokens": 12128192} +{"current_steps": 3860, "total_steps": 15621, "loss": 0.4786, "lr": 1.8712291692617074e-06, "epoch": 0.24710325843415915, "percentage": 24.71, "elapsed_time": "0:13:44", "remaining_time": "0:41:51", "throughput": 14731.19, "total_tokens": 12143808} +{"current_steps": 3865, "total_steps": 15621, "loss": 0.4804, "lr": 1.8706801356799735e-06, "epoch": 0.24742334037513602, "percentage": 24.74, "elapsed_time": "0:13:44", "remaining_time": "0:41:49", "throughput": 14739.03, "total_tokens": 12159232} +{"current_steps": 3870, "total_steps": 15621, "loss": 0.4465, "lr": 1.8701300150438674e-06, "epoch": 0.24774342231611293, "percentage": 24.77, "elapsed_time": "0:13:45", "remaining_time": "0:41:46", "throughput": 14747.5, "total_tokens": 12175360} +{"current_steps": 3875, "total_steps": 15621, "loss": 0.4191, "lr": 1.869578808040221e-06, "epoch": 0.2480635042570898, "percentage": 24.81, "elapsed_time": "0:13:46", "remaining_time": "0:41:44", "throughput": 14755.18, "total_tokens": 12190272} +{"current_steps": 3880, "total_steps": 15621, "loss": 0.5149, "lr": 1.869026515357223e-06, "epoch": 0.2483835861980667, "percentage": 24.84, "elapsed_time": "0:13:46", "remaining_time": "0:41:42", "throughput": 14764.96, "total_tokens": 12208448} +{"current_steps": 3885, "total_steps": 15621, "loss": 0.6372, "lr": 1.8684731376844169e-06, "epoch": 0.2487036681390436, "percentage": 24.87, "elapsed_time": "0:13:47", "remaining_time": "0:41:39", "throughput": 14774.23, "total_tokens": 12225984} +{"current_steps": 3890, "total_steps": 15621, "loss": 0.4965, "lr": 1.8679186757127014e-06, "epoch": 0.24902375008002048, "percentage": 24.9, "elapsed_time": "0:13:48", "remaining_time": "0:41:37", "throughput": 14782.03, "total_tokens": 12241408} +{"current_steps": 3895, "total_steps": 15621, "loss": 0.4381, "lr": 1.8673631301343288e-06, "epoch": 0.24934383202099739, "percentage": 24.93, "elapsed_time": "0:13:48", "remaining_time": "0:41:34", "throughput": 14789.33, "total_tokens": 12256064} +{"current_steps": 3900, "total_steps": 15621, "loss": 0.4388, "lr": 1.8668065016429044e-06, "epoch": 0.24966391396197427, "percentage": 24.97, "elapsed_time": "0:13:49", "remaining_time": "0:41:32", "throughput": 14798.12, "total_tokens": 12272832} +{"current_steps": 3905, "total_steps": 15621, "loss": 0.5257, "lr": 1.866248790933385e-06, "epoch": 0.24998399590295114, "percentage": 25.0, "elapsed_time": "0:13:49", "remaining_time": "0:41:30", "throughput": 14806.56, "total_tokens": 12289024} +{"current_steps": 3910, "total_steps": 15621, "loss": 0.4226, "lr": 1.8656899987020795e-06, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:13:50", "remaining_time": "0:41:27", "throughput": 14814.22, "total_tokens": 12304064} +{"current_steps": 3910, "total_steps": 15621, "eval_loss": 0.4644124507904053, "epoch": 0.25030407784392805, "percentage": 25.03, "elapsed_time": "0:14:39", "remaining_time": "0:43:55", "throughput": 13985.65, "total_tokens": 12304064} +{"current_steps": 3915, "total_steps": 15621, "loss": 0.4605, "lr": 1.865130125646646e-06, "epoch": 0.25062415978490493, "percentage": 25.06, "elapsed_time": "0:15:12", "remaining_time": "0:45:29", "throughput": 13496.98, "total_tokens": 12320256} +{"current_steps": 3920, "total_steps": 15621, "loss": 0.4394, "lr": 1.8645691724660933e-06, "epoch": 0.2509442417258818, "percentage": 25.09, "elapsed_time": "0:15:13", "remaining_time": "0:45:26", "throughput": 13504.66, "total_tokens": 12335360} +{"current_steps": 3925, "total_steps": 15621, "loss": 0.4616, "lr": 1.8640071398607774e-06, "epoch": 0.2512643236668587, "percentage": 25.13, "elapsed_time": "0:15:14", "remaining_time": "0:45:23", "throughput": 13513.11, "total_tokens": 12351488} +{"current_steps": 3930, "total_steps": 15621, "loss": 0.6203, "lr": 1.8634440285324024e-06, "epoch": 0.2515844056078356, "percentage": 25.16, "elapsed_time": "0:15:14", "remaining_time": "0:45:20", "throughput": 13520.36, "total_tokens": 12365952} +{"current_steps": 3935, "total_steps": 15621, "loss": 0.469, "lr": 1.8628798391840205e-06, "epoch": 0.2519044875488125, "percentage": 25.19, "elapsed_time": "0:15:15", "remaining_time": "0:45:17", "throughput": 13528.46, "total_tokens": 12381376} +{"current_steps": 3940, "total_steps": 15621, "loss": 0.4588, "lr": 1.8623145725200277e-06, "epoch": 0.2522245694897894, "percentage": 25.22, "elapsed_time": "0:15:15", "remaining_time": "0:45:15", "throughput": 13536.01, "total_tokens": 12396160} +{"current_steps": 3945, "total_steps": 15621, "loss": 0.4468, "lr": 1.8617482292461664e-06, "epoch": 0.25254465143076626, "percentage": 25.25, "elapsed_time": "0:15:16", "remaining_time": "0:45:12", "throughput": 13543.48, "total_tokens": 12410944} +{"current_steps": 3950, "total_steps": 15621, "loss": 0.4172, "lr": 1.861180810069523e-06, "epoch": 0.25286473337174314, "percentage": 25.29, "elapsed_time": "0:15:16", "remaining_time": "0:45:09", "throughput": 13551.33, "total_tokens": 12426304} +{"current_steps": 3955, "total_steps": 15621, "loss": 0.4599, "lr": 1.8606123156985268e-06, "epoch": 0.2531848153127201, "percentage": 25.32, "elapsed_time": "0:15:17", "remaining_time": "0:45:06", "throughput": 13559.69, "total_tokens": 12442432} +{"current_steps": 3960, "total_steps": 15621, "loss": 0.4617, "lr": 1.8600427468429496e-06, "epoch": 0.25350489725369696, "percentage": 25.35, "elapsed_time": "0:15:18", "remaining_time": "0:45:03", "throughput": 13567.82, "total_tokens": 12458368} +{"current_steps": 3965, "total_steps": 15621, "loss": 0.4302, "lr": 1.8594721042139052e-06, "epoch": 0.25382497919467384, "percentage": 25.38, "elapsed_time": "0:15:18", "remaining_time": "0:45:01", "throughput": 13576.03, "total_tokens": 12474368} +{"current_steps": 3970, "total_steps": 15621, "loss": 0.4147, "lr": 1.858900388523847e-06, "epoch": 0.2541450611356507, "percentage": 25.41, "elapsed_time": "0:15:19", "remaining_time": "0:44:58", "throughput": 13584.13, "total_tokens": 12490176} +{"current_steps": 3975, "total_steps": 15621, "loss": 0.4639, "lr": 1.8583276004865694e-06, "epoch": 0.2544651430766276, "percentage": 25.45, "elapsed_time": "0:15:20", "remaining_time": "0:44:55", "throughput": 13593.43, "total_tokens": 12507840} +{"current_steps": 3980, "total_steps": 15621, "loss": 0.3452, "lr": 1.8577537408172046e-06, "epoch": 0.25478522501760453, "percentage": 25.48, "elapsed_time": "0:15:20", "remaining_time": "0:44:53", "throughput": 13601.33, "total_tokens": 12523520} +{"current_steps": 3985, "total_steps": 15621, "loss": 0.5365, "lr": 1.8571788102322234e-06, "epoch": 0.2551053069585814, "percentage": 25.51, "elapsed_time": "0:15:21", "remaining_time": "0:44:50", "throughput": 13610.34, "total_tokens": 12540736} +{"current_steps": 3990, "total_steps": 15621, "loss": 0.4704, "lr": 1.8566028094494332e-06, "epoch": 0.2554253888995583, "percentage": 25.54, "elapsed_time": "0:15:22", "remaining_time": "0:44:47", "throughput": 13618.38, "total_tokens": 12556352} +{"current_steps": 3995, "total_steps": 15621, "loss": 0.3726, "lr": 1.8560257391879778e-06, "epoch": 0.25574547084053517, "percentage": 25.57, "elapsed_time": "0:15:22", "remaining_time": "0:44:44", "throughput": 13625.45, "total_tokens": 12570688} +{"current_steps": 4000, "total_steps": 15621, "loss": 0.4038, "lr": 1.855447600168336e-06, "epoch": 0.25606555278151205, "percentage": 25.61, "elapsed_time": "0:15:23", "remaining_time": "0:44:42", "throughput": 13633.22, "total_tokens": 12585984} +{"current_steps": 4005, "total_steps": 15621, "loss": 0.4665, "lr": 1.8548683931123215e-06, "epoch": 0.25638563472248893, "percentage": 25.64, "elapsed_time": "0:15:23", "remaining_time": "0:44:39", "throughput": 13640.95, "total_tokens": 12601216} +{"current_steps": 4010, "total_steps": 15621, "loss": 0.4408, "lr": 1.8542881187430807e-06, "epoch": 0.25670571666346587, "percentage": 25.67, "elapsed_time": "0:15:24", "remaining_time": "0:44:36", "throughput": 13650.07, "total_tokens": 12618624} +{"current_steps": 4015, "total_steps": 15621, "loss": 0.5792, "lr": 1.8537067777850935e-06, "epoch": 0.25702579860444275, "percentage": 25.7, "elapsed_time": "0:15:25", "remaining_time": "0:44:34", "throughput": 13658.95, "total_tokens": 12635840} +{"current_steps": 4020, "total_steps": 15621, "loss": 0.3554, "lr": 1.8531243709641704e-06, "epoch": 0.2573458805454196, "percentage": 25.73, "elapsed_time": "0:15:25", "remaining_time": "0:44:31", "throughput": 13667.06, "total_tokens": 12651904} +{"current_steps": 4025, "total_steps": 15621, "loss": 0.4923, "lr": 1.8525408990074533e-06, "epoch": 0.2576659624863965, "percentage": 25.77, "elapsed_time": "0:15:26", "remaining_time": "0:44:28", "throughput": 13674.61, "total_tokens": 12666944} +{"current_steps": 4030, "total_steps": 15621, "loss": 0.4155, "lr": 1.851956362643414e-06, "epoch": 0.2579860444273734, "percentage": 25.8, "elapsed_time": "0:15:26", "remaining_time": "0:44:26", "throughput": 13682.52, "total_tokens": 12682688} +{"current_steps": 4035, "total_steps": 15621, "loss": 0.5472, "lr": 1.851370762601853e-06, "epoch": 0.2583061263683503, "percentage": 25.83, "elapsed_time": "0:15:27", "remaining_time": "0:44:23", "throughput": 13690.41, "total_tokens": 12698304} +{"current_steps": 4040, "total_steps": 15621, "loss": 0.4995, "lr": 1.8507840996138983e-06, "epoch": 0.2586262083093272, "percentage": 25.86, "elapsed_time": "0:15:28", "remaining_time": "0:44:20", "throughput": 13697.53, "total_tokens": 12712896} +{"current_steps": 4045, "total_steps": 15621, "loss": 0.39, "lr": 1.8501963744120062e-06, "epoch": 0.2589462902503041, "percentage": 25.89, "elapsed_time": "0:15:28", "remaining_time": "0:44:17", "throughput": 13704.67, "total_tokens": 12727488} +{"current_steps": 4050, "total_steps": 15621, "loss": 0.4037, "lr": 1.849607587729958e-06, "epoch": 0.25926637219128096, "percentage": 25.93, "elapsed_time": "0:15:29", "remaining_time": "0:44:15", "throughput": 13712.24, "total_tokens": 12742720} +{"current_steps": 4055, "total_steps": 15621, "loss": 0.3918, "lr": 1.8490177403028615e-06, "epoch": 0.25958645413225784, "percentage": 25.96, "elapsed_time": "0:15:29", "remaining_time": "0:44:12", "throughput": 13719.84, "total_tokens": 12757760} +{"current_steps": 4060, "total_steps": 15621, "loss": 0.4879, "lr": 1.8484268328671475e-06, "epoch": 0.2599065360732348, "percentage": 25.99, "elapsed_time": "0:15:30", "remaining_time": "0:44:09", "throughput": 13727.64, "total_tokens": 12773312} +{"current_steps": 4065, "total_steps": 15621, "loss": 0.553, "lr": 1.847834866160571e-06, "epoch": 0.26022661801421165, "percentage": 26.02, "elapsed_time": "0:15:31", "remaining_time": "0:44:07", "throughput": 13736.41, "total_tokens": 12790336} +{"current_steps": 4070, "total_steps": 15621, "loss": 0.4995, "lr": 1.847241840922209e-06, "epoch": 0.26054669995518853, "percentage": 26.05, "elapsed_time": "0:15:31", "remaining_time": "0:44:04", "throughput": 13743.85, "total_tokens": 12805632} +{"current_steps": 4075, "total_steps": 15621, "loss": 0.4861, "lr": 1.8466477578924616e-06, "epoch": 0.2608667818961654, "percentage": 26.09, "elapsed_time": "0:15:32", "remaining_time": "0:44:01", "throughput": 13751.59, "total_tokens": 12821184} +{"current_steps": 4080, "total_steps": 15621, "loss": 0.5037, "lr": 1.8460526178130472e-06, "epoch": 0.2611868638371423, "percentage": 26.12, "elapsed_time": "0:15:32", "remaining_time": "0:43:59", "throughput": 13759.11, "total_tokens": 12836544} +{"current_steps": 4085, "total_steps": 15621, "loss": 0.4307, "lr": 1.8454564214270056e-06, "epoch": 0.26150694577811917, "percentage": 26.15, "elapsed_time": "0:15:33", "remaining_time": "0:43:56", "throughput": 13766.73, "total_tokens": 12852032} +{"current_steps": 4090, "total_steps": 15621, "loss": 0.446, "lr": 1.8448591694786955e-06, "epoch": 0.2618270277190961, "percentage": 26.18, "elapsed_time": "0:15:34", "remaining_time": "0:43:53", "throughput": 13774.31, "total_tokens": 12867456} +{"current_steps": 4095, "total_steps": 15621, "loss": 0.3206, "lr": 1.8442608627137925e-06, "epoch": 0.262147109660073, "percentage": 26.21, "elapsed_time": "0:15:34", "remaining_time": "0:43:51", "throughput": 13783.53, "total_tokens": 12885184} +{"current_steps": 4100, "total_steps": 15621, "loss": 0.3815, "lr": 1.8436615018792897e-06, "epoch": 0.26246719160104987, "percentage": 26.25, "elapsed_time": "0:15:35", "remaining_time": "0:43:48", "throughput": 13791.12, "total_tokens": 12900416} +{"current_steps": 4105, "total_steps": 15621, "loss": 0.5722, "lr": 1.8430610877234957e-06, "epoch": 0.26278727354202674, "percentage": 26.28, "elapsed_time": "0:15:36", "remaining_time": "0:43:45", "throughput": 13798.64, "total_tokens": 12915648} +{"current_steps": 4110, "total_steps": 15621, "loss": 0.4491, "lr": 1.8424596209960356e-06, "epoch": 0.2631073554830036, "percentage": 26.31, "elapsed_time": "0:15:36", "remaining_time": "0:43:43", "throughput": 13805.85, "total_tokens": 12930368} +{"current_steps": 4115, "total_steps": 15621, "loss": 0.5253, "lr": 1.8418571024478466e-06, "epoch": 0.26342743742398056, "percentage": 26.34, "elapsed_time": "0:15:37", "remaining_time": "0:43:40", "throughput": 13813.32, "total_tokens": 12945472} +{"current_steps": 4120, "total_steps": 15621, "loss": 0.4884, "lr": 1.8412535328311812e-06, "epoch": 0.26374751936495744, "percentage": 26.37, "elapsed_time": "0:15:37", "remaining_time": "0:43:37", "throughput": 13821.15, "total_tokens": 12961472} +{"current_steps": 4125, "total_steps": 15621, "loss": 0.5935, "lr": 1.8406489128996023e-06, "epoch": 0.2640676013059343, "percentage": 26.41, "elapsed_time": "0:15:38", "remaining_time": "0:43:35", "throughput": 13828.09, "total_tokens": 12975872} +{"current_steps": 4130, "total_steps": 15621, "loss": 0.5286, "lr": 1.8400432434079853e-06, "epoch": 0.2643876832469112, "percentage": 26.44, "elapsed_time": "0:15:38", "remaining_time": "0:43:32", "throughput": 13836.16, "total_tokens": 12992128} +{"current_steps": 4135, "total_steps": 15621, "loss": 0.4112, "lr": 1.8394365251125162e-06, "epoch": 0.2647077651878881, "percentage": 26.47, "elapsed_time": "0:15:40", "remaining_time": "0:43:31", "throughput": 13850.93, "total_tokens": 13021184} +{"current_steps": 4140, "total_steps": 15621, "loss": 0.4385, "lr": 1.8388287587706888e-06, "epoch": 0.265027847128865, "percentage": 26.5, "elapsed_time": "0:15:40", "remaining_time": "0:43:28", "throughput": 13859.06, "total_tokens": 13037568} +{"current_steps": 4145, "total_steps": 15621, "loss": 0.4655, "lr": 1.8382199451413074e-06, "epoch": 0.2653479290698419, "percentage": 26.53, "elapsed_time": "0:15:41", "remaining_time": "0:43:26", "throughput": 13866.81, "total_tokens": 13053440} +{"current_steps": 4150, "total_steps": 15621, "loss": 0.5121, "lr": 1.837610084984483e-06, "epoch": 0.26566801101081877, "percentage": 26.57, "elapsed_time": "0:15:41", "remaining_time": "0:43:23", "throughput": 13874.65, "total_tokens": 13069440} +{"current_steps": 4155, "total_steps": 15621, "loss": 0.5466, "lr": 1.8369991790616327e-06, "epoch": 0.26598809295179565, "percentage": 26.6, "elapsed_time": "0:15:42", "remaining_time": "0:43:21", "throughput": 13881.73, "total_tokens": 13084224} +{"current_steps": 4160, "total_steps": 15621, "loss": 0.6597, "lr": 1.8363872281354795e-06, "epoch": 0.26630817489277253, "percentage": 26.63, "elapsed_time": "0:15:43", "remaining_time": "0:43:18", "throughput": 13888.65, "total_tokens": 13098688} +{"current_steps": 4165, "total_steps": 15621, "loss": 0.4049, "lr": 1.835774232970052e-06, "epoch": 0.26662825683374947, "percentage": 26.66, "elapsed_time": "0:15:43", "remaining_time": "0:43:15", "throughput": 13896.1, "total_tokens": 13114112} +{"current_steps": 4170, "total_steps": 15621, "loss": 0.4672, "lr": 1.8351601943306815e-06, "epoch": 0.26694833877472635, "percentage": 26.69, "elapsed_time": "0:15:44", "remaining_time": "0:43:13", "throughput": 13904.08, "total_tokens": 13130240} +{"current_steps": 4175, "total_steps": 15621, "loss": 0.3994, "lr": 1.8345451129840025e-06, "epoch": 0.2672684207157032, "percentage": 26.73, "elapsed_time": "0:15:44", "remaining_time": "0:43:10", "throughput": 13911.45, "total_tokens": 13145536} +{"current_steps": 4180, "total_steps": 15621, "loss": 0.552, "lr": 1.8339289896979515e-06, "epoch": 0.2675885026566801, "percentage": 26.76, "elapsed_time": "0:15:45", "remaining_time": "0:43:07", "throughput": 13918.54, "total_tokens": 13160256} +{"current_steps": 4185, "total_steps": 15621, "loss": 0.5336, "lr": 1.8333118252417651e-06, "epoch": 0.267908584597657, "percentage": 26.79, "elapsed_time": "0:15:46", "remaining_time": "0:43:05", "throughput": 13926.85, "total_tokens": 13177088} +{"current_steps": 4190, "total_steps": 15621, "loss": 0.5098, "lr": 1.832693620385981e-06, "epoch": 0.26822866653863386, "percentage": 26.82, "elapsed_time": "0:15:46", "remaining_time": "0:43:02", "throughput": 13934.48, "total_tokens": 13192768} +{"current_steps": 4195, "total_steps": 15621, "loss": 0.5183, "lr": 1.8320743759024352e-06, "epoch": 0.2685487484796108, "percentage": 26.85, "elapsed_time": "0:15:47", "remaining_time": "0:43:00", "throughput": 13941.86, "total_tokens": 13208192} +{"current_steps": 4200, "total_steps": 15621, "loss": 0.5242, "lr": 1.831454092564261e-06, "epoch": 0.2688688304205877, "percentage": 26.89, "elapsed_time": "0:15:47", "remaining_time": "0:42:57", "throughput": 13949.46, "total_tokens": 13223872} +{"current_steps": 4205, "total_steps": 15621, "loss": 0.4714, "lr": 1.8308327711458899e-06, "epoch": 0.26918891236156456, "percentage": 26.92, "elapsed_time": "0:15:48", "remaining_time": "0:42:55", "throughput": 13956.65, "total_tokens": 13239104} +{"current_steps": 4210, "total_steps": 15621, "loss": 0.3844, "lr": 1.830210412423049e-06, "epoch": 0.26950899430254144, "percentage": 26.95, "elapsed_time": "0:15:49", "remaining_time": "0:42:52", "throughput": 13964.09, "total_tokens": 13254464} +{"current_steps": 4215, "total_steps": 15621, "loss": 0.3647, "lr": 1.8295870171727605e-06, "epoch": 0.2698290762435183, "percentage": 26.98, "elapsed_time": "0:15:49", "remaining_time": "0:42:50", "throughput": 13971.45, "total_tokens": 13269824} +{"current_steps": 4220, "total_steps": 15621, "loss": 0.4194, "lr": 1.8289625861733408e-06, "epoch": 0.27014915818449525, "percentage": 27.01, "elapsed_time": "0:15:50", "remaining_time": "0:42:47", "throughput": 13980.79, "total_tokens": 13288448} +{"current_steps": 4225, "total_steps": 15621, "loss": 0.5194, "lr": 1.8283371202043991e-06, "epoch": 0.27046924012547213, "percentage": 27.05, "elapsed_time": "0:15:51", "remaining_time": "0:42:45", "throughput": 13988.37, "total_tokens": 13304320} +{"current_steps": 4230, "total_steps": 15621, "loss": 0.5503, "lr": 1.827710620046837e-06, "epoch": 0.270789322066449, "percentage": 27.08, "elapsed_time": "0:15:51", "remaining_time": "0:42:42", "throughput": 13997.18, "total_tokens": 13321920} +{"current_steps": 4235, "total_steps": 15621, "loss": 0.4687, "lr": 1.8270830864828474e-06, "epoch": 0.2711094040074259, "percentage": 27.11, "elapsed_time": "0:15:52", "remaining_time": "0:42:40", "throughput": 14004.47, "total_tokens": 13337280} +{"current_steps": 4240, "total_steps": 15621, "loss": 0.4287, "lr": 1.8264545202959133e-06, "epoch": 0.27142948594840277, "percentage": 27.14, "elapsed_time": "0:15:53", "remaining_time": "0:42:38", "throughput": 14012.62, "total_tokens": 13354112} +{"current_steps": 4245, "total_steps": 15621, "loss": 0.4321, "lr": 1.8258249222708067e-06, "epoch": 0.2717495678893797, "percentage": 27.17, "elapsed_time": "0:15:53", "remaining_time": "0:42:35", "throughput": 14019.85, "total_tokens": 13369600} +{"current_steps": 4250, "total_steps": 15621, "loss": 0.4464, "lr": 1.8251942931935886e-06, "epoch": 0.2720696498303566, "percentage": 27.21, "elapsed_time": "0:15:54", "remaining_time": "0:42:33", "throughput": 14027.38, "total_tokens": 13385536} +{"current_steps": 4255, "total_steps": 15621, "loss": 0.3788, "lr": 1.8245626338516069e-06, "epoch": 0.27238973177133347, "percentage": 27.24, "elapsed_time": "0:15:54", "remaining_time": "0:42:30", "throughput": 14034.6, "total_tokens": 13400832} +{"current_steps": 4260, "total_steps": 15621, "loss": 0.3397, "lr": 1.823929945033495e-06, "epoch": 0.27270981371231034, "percentage": 27.27, "elapsed_time": "0:15:55", "remaining_time": "0:42:28", "throughput": 14041.76, "total_tokens": 13416000} +{"current_steps": 4265, "total_steps": 15621, "loss": 0.5015, "lr": 1.8232962275291728e-06, "epoch": 0.2730298956532872, "percentage": 27.3, "elapsed_time": "0:15:56", "remaining_time": "0:42:25", "throughput": 14049.09, "total_tokens": 13431360} +{"current_steps": 4270, "total_steps": 15621, "loss": 0.4342, "lr": 1.822661482129844e-06, "epoch": 0.2733499775942641, "percentage": 27.33, "elapsed_time": "0:15:56", "remaining_time": "0:42:23", "throughput": 14056.47, "total_tokens": 13446976} +{"current_steps": 4275, "total_steps": 15621, "loss": 0.3796, "lr": 1.8220257096279956e-06, "epoch": 0.27367005953524104, "percentage": 27.37, "elapsed_time": "0:15:57", "remaining_time": "0:42:20", "throughput": 14064.09, "total_tokens": 13463040} +{"current_steps": 4280, "total_steps": 15621, "loss": 0.6798, "lr": 1.8213889108173972e-06, "epoch": 0.2739901414762179, "percentage": 27.4, "elapsed_time": "0:15:57", "remaining_time": "0:42:18", "throughput": 14071.47, "total_tokens": 13478656} +{"current_steps": 4285, "total_steps": 15621, "loss": 0.4843, "lr": 1.8207510864930992e-06, "epoch": 0.2743102234171948, "percentage": 27.43, "elapsed_time": "0:15:58", "remaining_time": "0:42:15", "throughput": 14079.3, "total_tokens": 13495296} +{"current_steps": 4290, "total_steps": 15621, "loss": 0.5024, "lr": 1.8201122374514336e-06, "epoch": 0.2746303053581717, "percentage": 27.46, "elapsed_time": "0:15:59", "remaining_time": "0:42:13", "throughput": 14086.61, "total_tokens": 13510912} +{"current_steps": 4295, "total_steps": 15621, "loss": 0.4465, "lr": 1.8194723644900099e-06, "epoch": 0.27495038729914856, "percentage": 27.5, "elapsed_time": "0:15:59", "remaining_time": "0:42:10", "throughput": 14093.53, "total_tokens": 13525952} +{"current_steps": 4300, "total_steps": 15621, "loss": 0.5334, "lr": 1.8188314684077173e-06, "epoch": 0.2752704692401255, "percentage": 27.53, "elapsed_time": "0:16:00", "remaining_time": "0:42:08", "throughput": 14103.96, "total_tokens": 13546752} +{"current_steps": 4305, "total_steps": 15621, "loss": 0.5659, "lr": 1.8181895500047226e-06, "epoch": 0.2755905511811024, "percentage": 27.56, "elapsed_time": "0:16:01", "remaining_time": "0:42:06", "throughput": 14110.89, "total_tokens": 13561728} +{"current_steps": 4310, "total_steps": 15621, "loss": 0.4559, "lr": 1.817546610082468e-06, "epoch": 0.27591063312207925, "percentage": 27.59, "elapsed_time": "0:16:01", "remaining_time": "0:42:03", "throughput": 14118.12, "total_tokens": 13577344} +{"current_steps": 4315, "total_steps": 15621, "loss": 0.4806, "lr": 1.816902649443672e-06, "epoch": 0.27623071506305613, "percentage": 27.62, "elapsed_time": "0:16:02", "remaining_time": "0:42:01", "throughput": 14124.89, "total_tokens": 13592256} +{"current_steps": 4320, "total_steps": 15621, "loss": 0.5351, "lr": 1.8162576688923262e-06, "epoch": 0.276550797004033, "percentage": 27.66, "elapsed_time": "0:16:02", "remaining_time": "0:41:58", "throughput": 14132.82, "total_tokens": 13608832} +{"current_steps": 4325, "total_steps": 15621, "loss": 0.5544, "lr": 1.815611669233697e-06, "epoch": 0.27687087894500995, "percentage": 27.69, "elapsed_time": "0:16:03", "remaining_time": "0:41:56", "throughput": 14139.82, "total_tokens": 13624128} +{"current_steps": 4330, "total_steps": 15621, "loss": 0.5301, "lr": 1.8149646512743222e-06, "epoch": 0.2771909608859868, "percentage": 27.72, "elapsed_time": "0:16:04", "remaining_time": "0:41:54", "throughput": 14147.52, "total_tokens": 13640576} +{"current_steps": 4335, "total_steps": 15621, "loss": 0.4513, "lr": 1.8143166158220118e-06, "epoch": 0.2775110428269637, "percentage": 27.75, "elapsed_time": "0:16:04", "remaining_time": "0:41:51", "throughput": 14154.55, "total_tokens": 13655872} +{"current_steps": 4340, "total_steps": 15621, "loss": 0.6679, "lr": 1.8136675636858454e-06, "epoch": 0.2778311247679406, "percentage": 27.78, "elapsed_time": "0:16:05", "remaining_time": "0:41:49", "throughput": 14162.29, "total_tokens": 13672384} +{"current_steps": 4345, "total_steps": 15621, "loss": 0.3988, "lr": 1.8130174956761723e-06, "epoch": 0.27815120670891746, "percentage": 27.82, "elapsed_time": "0:16:05", "remaining_time": "0:41:46", "throughput": 14169.09, "total_tokens": 13687296} +{"current_steps": 4350, "total_steps": 15621, "loss": 0.5363, "lr": 1.81236641260461e-06, "epoch": 0.2784712886498944, "percentage": 27.85, "elapsed_time": "0:16:06", "remaining_time": "0:41:44", "throughput": 14176.06, "total_tokens": 13702528} +{"current_steps": 4355, "total_steps": 15621, "loss": 0.5002, "lr": 1.811714315284043e-06, "epoch": 0.2787913705908713, "percentage": 27.88, "elapsed_time": "0:16:07", "remaining_time": "0:41:42", "throughput": 14182.86, "total_tokens": 13717568} +{"current_steps": 4360, "total_steps": 15621, "loss": 0.4016, "lr": 1.8110612045286229e-06, "epoch": 0.27911145253184816, "percentage": 27.91, "elapsed_time": "0:16:07", "remaining_time": "0:41:39", "throughput": 14190.25, "total_tokens": 13733568} +{"current_steps": 4365, "total_steps": 15621, "loss": 0.3744, "lr": 1.8104070811537661e-06, "epoch": 0.27943153447282504, "percentage": 27.94, "elapsed_time": "0:16:08", "remaining_time": "0:41:37", "throughput": 14197.52, "total_tokens": 13749312} +{"current_steps": 4370, "total_steps": 15621, "loss": 0.4299, "lr": 1.8097519459761533e-06, "epoch": 0.2797516164138019, "percentage": 27.98, "elapsed_time": "0:16:09", "remaining_time": "0:41:34", "throughput": 14205.22, "total_tokens": 13765952} +{"current_steps": 4375, "total_steps": 15621, "loss": 0.495, "lr": 1.8090957998137283e-06, "epoch": 0.2800716983547788, "percentage": 28.01, "elapsed_time": "0:16:09", "remaining_time": "0:41:32", "throughput": 14212.22, "total_tokens": 13781440} +{"current_steps": 4380, "total_steps": 15621, "loss": 0.4471, "lr": 1.8084386434856978e-06, "epoch": 0.28039178029575573, "percentage": 28.04, "elapsed_time": "0:16:10", "remaining_time": "0:41:30", "throughput": 14219.22, "total_tokens": 13796864} +{"current_steps": 4385, "total_steps": 15621, "loss": 0.4915, "lr": 1.8077804778125283e-06, "epoch": 0.2807118622367326, "percentage": 28.07, "elapsed_time": "0:16:10", "remaining_time": "0:41:27", "throughput": 14226.52, "total_tokens": 13812736} +{"current_steps": 4390, "total_steps": 15621, "loss": 0.4966, "lr": 1.807121303615948e-06, "epoch": 0.2810319441777095, "percentage": 28.1, "elapsed_time": "0:16:11", "remaining_time": "0:41:25", "throughput": 14233.62, "total_tokens": 13828288} +{"current_steps": 4395, "total_steps": 15621, "loss": 0.4125, "lr": 1.8064611217189434e-06, "epoch": 0.28135202611868637, "percentage": 28.14, "elapsed_time": "0:16:12", "remaining_time": "0:41:23", "throughput": 14241.78, "total_tokens": 13845568} +{"current_steps": 4400, "total_steps": 15621, "loss": 0.398, "lr": 1.8057999329457596e-06, "epoch": 0.28167210805966325, "percentage": 28.17, "elapsed_time": "0:16:12", "remaining_time": "0:41:20", "throughput": 14248.62, "total_tokens": 13860608} +{"current_steps": 4405, "total_steps": 15621, "loss": 0.5663, "lr": 1.8051377381218984e-06, "epoch": 0.2819921900006402, "percentage": 28.2, "elapsed_time": "0:16:13", "remaining_time": "0:41:18", "throughput": 14255.99, "total_tokens": 13876608} +{"current_steps": 4410, "total_steps": 15621, "loss": 0.5656, "lr": 1.8044745380741177e-06, "epoch": 0.28231227194161707, "percentage": 28.23, "elapsed_time": "0:16:14", "remaining_time": "0:41:16", "throughput": 14263.88, "total_tokens": 13893632} +{"current_steps": 4415, "total_steps": 15621, "loss": 0.3896, "lr": 1.8038103336304306e-06, "epoch": 0.28263235388259395, "percentage": 28.26, "elapsed_time": "0:16:14", "remaining_time": "0:41:13", "throughput": 14270.95, "total_tokens": 13909312} +{"current_steps": 4420, "total_steps": 15621, "loss": 0.5699, "lr": 1.8031451256201042e-06, "epoch": 0.2829524358235708, "percentage": 28.3, "elapsed_time": "0:16:15", "remaining_time": "0:41:11", "throughput": 14278.54, "total_tokens": 13925824} +{"current_steps": 4425, "total_steps": 15621, "loss": 0.5385, "lr": 1.8024789148736589e-06, "epoch": 0.2832725177645477, "percentage": 28.33, "elapsed_time": "0:16:15", "remaining_time": "0:41:09", "throughput": 14286.19, "total_tokens": 13942336} +{"current_steps": 4430, "total_steps": 15621, "loss": 0.392, "lr": 1.8018117022228655e-06, "epoch": 0.28359259970552464, "percentage": 28.36, "elapsed_time": "0:16:16", "remaining_time": "0:41:06", "throughput": 14293.08, "total_tokens": 13957760} +{"current_steps": 4435, "total_steps": 15621, "loss": 0.4997, "lr": 1.8011434885007479e-06, "epoch": 0.2839126816465015, "percentage": 28.39, "elapsed_time": "0:16:17", "remaining_time": "0:41:04", "throughput": 14299.94, "total_tokens": 13972992} +{"current_steps": 4440, "total_steps": 15621, "loss": 0.4308, "lr": 1.8004742745415787e-06, "epoch": 0.2842327635874784, "percentage": 28.42, "elapsed_time": "0:16:17", "remaining_time": "0:41:02", "throughput": 14307.03, "total_tokens": 13988736} +{"current_steps": 4445, "total_steps": 15621, "loss": 0.5427, "lr": 1.799804061180879e-06, "epoch": 0.2845528455284553, "percentage": 28.46, "elapsed_time": "0:16:18", "remaining_time": "0:40:59", "throughput": 14313.44, "total_tokens": 14003520} +{"current_steps": 4450, "total_steps": 15621, "loss": 0.518, "lr": 1.799132849255418e-06, "epoch": 0.28487292746943216, "percentage": 28.49, "elapsed_time": "0:16:19", "remaining_time": "0:40:57", "throughput": 14321.32, "total_tokens": 14020608} +{"current_steps": 4455, "total_steps": 15621, "loss": 0.4011, "lr": 1.798460639603212e-06, "epoch": 0.28519300941040904, "percentage": 28.52, "elapsed_time": "0:16:19", "remaining_time": "0:40:55", "throughput": 14327.73, "total_tokens": 14035328} +{"current_steps": 4460, "total_steps": 15621, "loss": 0.4805, "lr": 1.7977874330635224e-06, "epoch": 0.285513091351386, "percentage": 28.55, "elapsed_time": "0:16:20", "remaining_time": "0:40:52", "throughput": 14334.57, "total_tokens": 14050816} +{"current_steps": 4465, "total_steps": 15621, "loss": 0.3289, "lr": 1.7971132304768555e-06, "epoch": 0.28583317329236285, "percentage": 28.58, "elapsed_time": "0:16:20", "remaining_time": "0:40:50", "throughput": 14341.77, "total_tokens": 14066880} +{"current_steps": 4470, "total_steps": 15621, "loss": 0.4937, "lr": 1.7964380326849612e-06, "epoch": 0.28615325523333973, "percentage": 28.62, "elapsed_time": "0:16:21", "remaining_time": "0:40:48", "throughput": 14348.32, "total_tokens": 14081728} +{"current_steps": 4475, "total_steps": 15621, "loss": 0.4941, "lr": 1.795761840530832e-06, "epoch": 0.2864733371743166, "percentage": 28.65, "elapsed_time": "0:16:22", "remaining_time": "0:40:46", "throughput": 14355.74, "total_tokens": 14097984} +{"current_steps": 4480, "total_steps": 15621, "loss": 0.4208, "lr": 1.7950846548587015e-06, "epoch": 0.2867934191152935, "percentage": 28.68, "elapsed_time": "0:16:22", "remaining_time": "0:40:43", "throughput": 14363.79, "total_tokens": 14115264} +{"current_steps": 4485, "total_steps": 15621, "loss": 0.2799, "lr": 1.7944064765140445e-06, "epoch": 0.2871135010562704, "percentage": 28.71, "elapsed_time": "0:16:23", "remaining_time": "0:40:41", "throughput": 14369.93, "total_tokens": 14129472} +{"current_steps": 4490, "total_steps": 15621, "loss": 0.55, "lr": 1.7937273063435735e-06, "epoch": 0.2874335829972473, "percentage": 28.74, "elapsed_time": "0:16:23", "remaining_time": "0:40:39", "throughput": 14376.78, "total_tokens": 14144896} +{"current_steps": 4495, "total_steps": 15621, "loss": 0.3622, "lr": 1.7930471451952416e-06, "epoch": 0.2877536649382242, "percentage": 28.78, "elapsed_time": "0:16:24", "remaining_time": "0:40:36", "throughput": 14383.29, "total_tokens": 14159744} +{"current_steps": 4500, "total_steps": 15621, "loss": 0.4915, "lr": 1.7923659939182377e-06, "epoch": 0.28807374687920106, "percentage": 28.81, "elapsed_time": "0:16:25", "remaining_time": "0:40:34", "throughput": 14390.92, "total_tokens": 14176384} +{"current_steps": 4505, "total_steps": 15621, "loss": 0.5376, "lr": 1.7916838533629866e-06, "epoch": 0.28839382882017794, "percentage": 28.84, "elapsed_time": "0:16:25", "remaining_time": "0:40:32", "throughput": 14398.08, "total_tokens": 14192320} +{"current_steps": 4510, "total_steps": 15621, "loss": 0.397, "lr": 1.7910007243811493e-06, "epoch": 0.2887139107611549, "percentage": 28.87, "elapsed_time": "0:16:26", "remaining_time": "0:40:29", "throughput": 14405.13, "total_tokens": 14208192} +{"current_steps": 4515, "total_steps": 15621, "loss": 0.5486, "lr": 1.7903166078256202e-06, "epoch": 0.28903399270213176, "percentage": 28.9, "elapsed_time": "0:16:26", "remaining_time": "0:40:27", "throughput": 14411.64, "total_tokens": 14223104} +{"current_steps": 4520, "total_steps": 15621, "loss": 0.4153, "lr": 1.789631504550527e-06, "epoch": 0.28935407464310864, "percentage": 28.94, "elapsed_time": "0:16:27", "remaining_time": "0:40:25", "throughput": 14418.45, "total_tokens": 14238464} +{"current_steps": 4525, "total_steps": 15621, "loss": 0.384, "lr": 1.7889454154112288e-06, "epoch": 0.2896741565840855, "percentage": 28.97, "elapsed_time": "0:16:28", "remaining_time": "0:40:23", "throughput": 14425.65, "total_tokens": 14254656} +{"current_steps": 4530, "total_steps": 15621, "loss": 0.3983, "lr": 1.7882583412643167e-06, "epoch": 0.2899942385250624, "percentage": 29.0, "elapsed_time": "0:16:28", "remaining_time": "0:40:20", "throughput": 14431.8, "total_tokens": 14268928} +{"current_steps": 4535, "total_steps": 15621, "loss": 0.4326, "lr": 1.78757028296761e-06, "epoch": 0.29031432046603933, "percentage": 29.03, "elapsed_time": "0:16:29", "remaining_time": "0:40:18", "throughput": 14439.47, "total_tokens": 14285952} +{"current_steps": 4540, "total_steps": 15621, "loss": 0.3522, "lr": 1.7868812413801582e-06, "epoch": 0.2906344024070162, "percentage": 29.06, "elapsed_time": "0:16:29", "remaining_time": "0:40:16", "throughput": 14446.55, "total_tokens": 14301760} +{"current_steps": 4545, "total_steps": 15621, "loss": 0.4976, "lr": 1.7861912173622372e-06, "epoch": 0.2909544843479931, "percentage": 29.1, "elapsed_time": "0:16:30", "remaining_time": "0:40:14", "throughput": 14453.88, "total_tokens": 14318208} +{"current_steps": 4550, "total_steps": 15621, "loss": 0.4597, "lr": 1.7855002117753504e-06, "epoch": 0.29127456628896997, "percentage": 29.13, "elapsed_time": "0:16:31", "remaining_time": "0:40:11", "throughput": 14460.97, "total_tokens": 14334144} +{"current_steps": 4555, "total_steps": 15621, "loss": 0.5283, "lr": 1.7848082254822266e-06, "epoch": 0.29159464822994685, "percentage": 29.16, "elapsed_time": "0:16:31", "remaining_time": "0:40:09", "throughput": 14467.45, "total_tokens": 14349120} +{"current_steps": 4560, "total_steps": 15621, "loss": 0.4868, "lr": 1.7841152593468185e-06, "epoch": 0.29191473017092373, "percentage": 29.19, "elapsed_time": "0:16:32", "remaining_time": "0:40:07", "throughput": 14474.74, "total_tokens": 14365376} +{"current_steps": 4565, "total_steps": 15621, "loss": 0.4582, "lr": 1.7834213142343026e-06, "epoch": 0.29223481211190067, "percentage": 29.22, "elapsed_time": "0:16:33", "remaining_time": "0:40:05", "throughput": 14481.79, "total_tokens": 14381568} +{"current_steps": 4570, "total_steps": 15621, "loss": 0.4626, "lr": 1.7827263910110777e-06, "epoch": 0.29255489405287755, "percentage": 29.26, "elapsed_time": "0:16:33", "remaining_time": "0:40:02", "throughput": 14488.67, "total_tokens": 14397312} +{"current_steps": 4575, "total_steps": 15621, "loss": 0.4372, "lr": 1.7820304905447632e-06, "epoch": 0.2928749759938544, "percentage": 29.29, "elapsed_time": "0:16:34", "remaining_time": "0:40:00", "throughput": 14495.51, "total_tokens": 14412928} +{"current_steps": 4580, "total_steps": 15621, "loss": 0.446, "lr": 1.7813336137041991e-06, "epoch": 0.2931950579348313, "percentage": 29.32, "elapsed_time": "0:16:34", "remaining_time": "0:39:58", "throughput": 14501.93, "total_tokens": 14427968} +{"current_steps": 4585, "total_steps": 15621, "loss": 0.3693, "lr": 1.7806357613594447e-06, "epoch": 0.2935151398758082, "percentage": 29.35, "elapsed_time": "0:16:35", "remaining_time": "0:39:56", "throughput": 14508.39, "total_tokens": 14442944} +{"current_steps": 4590, "total_steps": 15621, "loss": 0.4481, "lr": 1.7799369343817764e-06, "epoch": 0.2938352218167851, "percentage": 29.38, "elapsed_time": "0:16:36", "remaining_time": "0:39:53", "throughput": 14515.1, "total_tokens": 14458176} +{"current_steps": 4595, "total_steps": 15621, "loss": 0.3566, "lr": 1.7792371336436883e-06, "epoch": 0.294155303757762, "percentage": 29.42, "elapsed_time": "0:16:36", "remaining_time": "0:39:51", "throughput": 14521.76, "total_tokens": 14473600} +{"current_steps": 4600, "total_steps": 15621, "loss": 0.6518, "lr": 1.7785363600188892e-06, "epoch": 0.2944753856987389, "percentage": 29.45, "elapsed_time": "0:16:37", "remaining_time": "0:39:49", "throughput": 14528.34, "total_tokens": 14488896} +{"current_steps": 4605, "total_steps": 15621, "loss": 0.5881, "lr": 1.7778346143823038e-06, "epoch": 0.29479546763971576, "percentage": 29.48, "elapsed_time": "0:16:37", "remaining_time": "0:39:47", "throughput": 14534.09, "total_tokens": 14502784} +{"current_steps": 4610, "total_steps": 15621, "loss": 0.4293, "lr": 1.7771318976100696e-06, "epoch": 0.29511554958069264, "percentage": 29.51, "elapsed_time": "0:16:38", "remaining_time": "0:39:44", "throughput": 14541.86, "total_tokens": 14520000} +{"current_steps": 4615, "total_steps": 15621, "loss": 0.3401, "lr": 1.7764282105795364e-06, "epoch": 0.2954356315216696, "percentage": 29.54, "elapsed_time": "0:16:39", "remaining_time": "0:39:42", "throughput": 14548.99, "total_tokens": 14536320} +{"current_steps": 4620, "total_steps": 15621, "loss": 0.4524, "lr": 1.7757235541692663e-06, "epoch": 0.29575571346264645, "percentage": 29.58, "elapsed_time": "0:16:39", "remaining_time": "0:39:40", "throughput": 14555.72, "total_tokens": 14551808} +{"current_steps": 4625, "total_steps": 15621, "loss": 0.3157, "lr": 1.7750179292590306e-06, "epoch": 0.29607579540362333, "percentage": 29.61, "elapsed_time": "0:16:40", "remaining_time": "0:39:38", "throughput": 14562.19, "total_tokens": 14566976} +{"current_steps": 4630, "total_steps": 15621, "loss": 0.3475, "lr": 1.7743113367298107e-06, "epoch": 0.2963958773446002, "percentage": 29.64, "elapsed_time": "0:16:40", "remaining_time": "0:39:36", "throughput": 14569.18, "total_tokens": 14583104} +{"current_steps": 4635, "total_steps": 15621, "loss": 0.4454, "lr": 1.7736037774637955e-06, "epoch": 0.2967159592855771, "percentage": 29.67, "elapsed_time": "0:16:41", "remaining_time": "0:39:33", "throughput": 14575.75, "total_tokens": 14598336} +{"current_steps": 4640, "total_steps": 15621, "loss": 0.5142, "lr": 1.772895252344381e-06, "epoch": 0.29703604122655397, "percentage": 29.7, "elapsed_time": "0:16:42", "remaining_time": "0:39:31", "throughput": 14583.28, "total_tokens": 14615232} +{"current_steps": 4645, "total_steps": 15621, "loss": 0.3932, "lr": 1.7721857622561692e-06, "epoch": 0.2973561231675309, "percentage": 29.74, "elapsed_time": "0:16:42", "remaining_time": "0:39:29", "throughput": 14590.02, "total_tokens": 14630848} +{"current_steps": 4650, "total_steps": 15621, "loss": 0.4601, "lr": 1.7714753080849664e-06, "epoch": 0.2976762051085078, "percentage": 29.77, "elapsed_time": "0:16:43", "remaining_time": "0:39:27", "throughput": 14597.09, "total_tokens": 14647040} +{"current_steps": 4655, "total_steps": 15621, "loss": 0.4116, "lr": 1.7707638907177837e-06, "epoch": 0.29799628704948466, "percentage": 29.8, "elapsed_time": "0:16:44", "remaining_time": "0:39:25", "throughput": 14603.36, "total_tokens": 14661888} +{"current_steps": 4660, "total_steps": 15621, "loss": 0.7093, "lr": 1.7700515110428336e-06, "epoch": 0.29831636899046154, "percentage": 29.83, "elapsed_time": "0:16:44", "remaining_time": "0:39:23", "throughput": 14610.19, "total_tokens": 14677696} +{"current_steps": 4665, "total_steps": 15621, "loss": 0.4799, "lr": 1.7693381699495307e-06, "epoch": 0.2986364509314384, "percentage": 29.86, "elapsed_time": "0:16:45", "remaining_time": "0:39:20", "throughput": 14616.9, "total_tokens": 14693184} +{"current_steps": 4670, "total_steps": 15621, "loss": 0.3643, "lr": 1.7686238683284894e-06, "epoch": 0.29895653287241536, "percentage": 29.9, "elapsed_time": "0:16:45", "remaining_time": "0:39:18", "throughput": 14623.13, "total_tokens": 14707904} +{"current_steps": 4675, "total_steps": 15621, "loss": 0.3608, "lr": 1.7679086070715237e-06, "epoch": 0.29927661481339224, "percentage": 29.93, "elapsed_time": "0:16:46", "remaining_time": "0:39:16", "throughput": 14630.15, "total_tokens": 14724096} +{"current_steps": 4680, "total_steps": 15621, "loss": 0.4544, "lr": 1.7671923870716459e-06, "epoch": 0.2995966967543691, "percentage": 29.96, "elapsed_time": "0:16:47", "remaining_time": "0:39:14", "throughput": 14636.22, "total_tokens": 14738752} +{"current_steps": 4685, "total_steps": 15621, "loss": 0.3486, "lr": 1.7664752092230652e-06, "epoch": 0.299916778695346, "percentage": 29.99, "elapsed_time": "0:16:47", "remaining_time": "0:39:11", "throughput": 14642.49, "total_tokens": 14753664} +{"current_steps": 4690, "total_steps": 15621, "loss": 0.3784, "lr": 1.7657570744211863e-06, "epoch": 0.3002368606363229, "percentage": 30.02, "elapsed_time": "0:16:48", "remaining_time": "0:39:09", "throughput": 14649.14, "total_tokens": 14769152} +{"current_steps": 4692, "total_steps": 15621, "eval_loss": 0.4629112482070923, "epoch": 0.30036489341271366, "percentage": 30.04, "elapsed_time": "0:17:37", "remaining_time": "0:41:03", "throughput": 13970.66, "total_tokens": 14775488} +{"current_steps": 4695, "total_steps": 15621, "loss": 0.5028, "lr": 1.765037983562609e-06, "epoch": 0.3005569425772998, "percentage": 30.06, "elapsed_time": "0:18:32", "remaining_time": "0:43:07", "throughput": 13294.29, "total_tokens": 14784128} +{"current_steps": 4700, "total_steps": 15621, "loss": 0.4459, "lr": 1.7643179375451264e-06, "epoch": 0.3008770245182767, "percentage": 30.09, "elapsed_time": "0:18:32", "remaining_time": "0:43:05", "throughput": 13301.25, "total_tokens": 14799936} +{"current_steps": 4705, "total_steps": 15621, "loss": 0.6083, "lr": 1.7635969372677252e-06, "epoch": 0.30119710645925357, "percentage": 30.12, "elapsed_time": "0:18:33", "remaining_time": "0:43:02", "throughput": 13307.22, "total_tokens": 14814208} +{"current_steps": 4710, "total_steps": 15621, "loss": 0.483, "lr": 1.7628749836305818e-06, "epoch": 0.30151718840023045, "percentage": 30.15, "elapsed_time": "0:18:33", "remaining_time": "0:43:00", "throughput": 13313.77, "total_tokens": 14829504} +{"current_steps": 4715, "total_steps": 15621, "loss": 0.3949, "lr": 1.7621520775350645e-06, "epoch": 0.30183727034120733, "percentage": 30.18, "elapsed_time": "0:18:34", "remaining_time": "0:42:57", "throughput": 13319.92, "total_tokens": 14843968} +{"current_steps": 4720, "total_steps": 15621, "loss": 0.4567, "lr": 1.7614282198837293e-06, "epoch": 0.30215735228218427, "percentage": 30.22, "elapsed_time": "0:18:35", "remaining_time": "0:42:55", "throughput": 13326.9, "total_tokens": 14859840} +{"current_steps": 4725, "total_steps": 15621, "loss": 0.473, "lr": 1.7607034115803219e-06, "epoch": 0.30247743422316115, "percentage": 30.25, "elapsed_time": "0:18:35", "remaining_time": "0:42:52", "throughput": 13333.65, "total_tokens": 14875648} +{"current_steps": 4730, "total_steps": 15621, "loss": 0.4192, "lr": 1.7599776535297734e-06, "epoch": 0.302797516164138, "percentage": 30.28, "elapsed_time": "0:18:36", "remaining_time": "0:42:50", "throughput": 13339.91, "total_tokens": 14890560} +{"current_steps": 4735, "total_steps": 15621, "loss": 0.4702, "lr": 1.7592509466382012e-06, "epoch": 0.3031175981051149, "percentage": 30.31, "elapsed_time": "0:18:36", "remaining_time": "0:42:47", "throughput": 13346.91, "total_tokens": 14906688} +{"current_steps": 4740, "total_steps": 15621, "loss": 0.5561, "lr": 1.7585232918129076e-06, "epoch": 0.3034376800460918, "percentage": 30.34, "elapsed_time": "0:18:37", "remaining_time": "0:42:45", "throughput": 13353.7, "total_tokens": 14922496} +{"current_steps": 4745, "total_steps": 15621, "loss": 0.4601, "lr": 1.757794689962378e-06, "epoch": 0.30375776198706866, "percentage": 30.38, "elapsed_time": "0:18:38", "remaining_time": "0:42:42", "throughput": 13360.71, "total_tokens": 14938880} +{"current_steps": 4750, "total_steps": 15621, "loss": 0.4968, "lr": 1.7570651419962807e-06, "epoch": 0.3040778439280456, "percentage": 30.41, "elapsed_time": "0:18:38", "remaining_time": "0:42:40", "throughput": 13367.11, "total_tokens": 14954112} +{"current_steps": 4755, "total_steps": 15621, "loss": 0.448, "lr": 1.7563346488254647e-06, "epoch": 0.3043979258690225, "percentage": 30.44, "elapsed_time": "0:18:39", "remaining_time": "0:42:37", "throughput": 13373.69, "total_tokens": 14969536} +{"current_steps": 4760, "total_steps": 15621, "loss": 0.3373, "lr": 1.755603211361959e-06, "epoch": 0.30471800780999936, "percentage": 30.47, "elapsed_time": "0:18:39", "remaining_time": "0:42:35", "throughput": 13380.63, "total_tokens": 14985728} +{"current_steps": 4765, "total_steps": 15621, "loss": 0.452, "lr": 1.7548708305189722e-06, "epoch": 0.30503808975097624, "percentage": 30.5, "elapsed_time": "0:18:40", "remaining_time": "0:42:33", "throughput": 13388.73, "total_tokens": 15003904} +{"current_steps": 4770, "total_steps": 15621, "loss": 0.5662, "lr": 1.7541375072108905e-06, "epoch": 0.3053581716919531, "percentage": 30.54, "elapsed_time": "0:18:41", "remaining_time": "0:42:30", "throughput": 13395.27, "total_tokens": 15019328} +{"current_steps": 4775, "total_steps": 15621, "loss": 0.4597, "lr": 1.7534032423532766e-06, "epoch": 0.30567825363293005, "percentage": 30.57, "elapsed_time": "0:18:41", "remaining_time": "0:42:28", "throughput": 13401.28, "total_tokens": 15033856} +{"current_steps": 4780, "total_steps": 15621, "loss": 0.3603, "lr": 1.7526680368628685e-06, "epoch": 0.30599833557390693, "percentage": 30.6, "elapsed_time": "0:18:42", "remaining_time": "0:42:25", "throughput": 13408.84, "total_tokens": 15051200} +{"current_steps": 4785, "total_steps": 15621, "loss": 0.4471, "lr": 1.751931891657579e-06, "epoch": 0.3063184175148838, "percentage": 30.63, "elapsed_time": "0:18:43", "remaining_time": "0:42:23", "throughput": 13415.23, "total_tokens": 15066368} +{"current_steps": 4790, "total_steps": 15621, "loss": 0.3494, "lr": 1.7511948076564943e-06, "epoch": 0.3066384994558607, "percentage": 30.66, "elapsed_time": "0:18:43", "remaining_time": "0:42:20", "throughput": 13421.6, "total_tokens": 15081600} +{"current_steps": 4795, "total_steps": 15621, "loss": 0.5308, "lr": 1.7504567857798722e-06, "epoch": 0.30695858139683757, "percentage": 30.7, "elapsed_time": "0:18:44", "remaining_time": "0:42:18", "throughput": 13428.35, "total_tokens": 15097536} +{"current_steps": 4800, "total_steps": 15621, "loss": 0.5013, "lr": 1.7497178269491417e-06, "epoch": 0.3072786633378145, "percentage": 30.73, "elapsed_time": "0:18:44", "remaining_time": "0:42:16", "throughput": 13435.28, "total_tokens": 15113728} +{"current_steps": 4805, "total_steps": 15621, "loss": 0.5561, "lr": 1.7489779320869014e-06, "epoch": 0.3075987452787914, "percentage": 30.76, "elapsed_time": "0:18:45", "remaining_time": "0:42:13", "throughput": 13442.26, "total_tokens": 15130048} +{"current_steps": 4810, "total_steps": 15621, "loss": 0.3673, "lr": 1.7482371021169193e-06, "epoch": 0.30791882721976827, "percentage": 30.79, "elapsed_time": "0:18:46", "remaining_time": "0:42:11", "throughput": 13448.84, "total_tokens": 15145600} +{"current_steps": 4815, "total_steps": 15621, "loss": 0.3935, "lr": 1.7474953379641297e-06, "epoch": 0.30823890916074514, "percentage": 30.82, "elapsed_time": "0:18:46", "remaining_time": "0:42:08", "throughput": 13456.05, "total_tokens": 15162368} +{"current_steps": 4820, "total_steps": 15621, "loss": 0.4323, "lr": 1.746752640554634e-06, "epoch": 0.308558991101722, "percentage": 30.86, "elapsed_time": "0:18:47", "remaining_time": "0:42:06", "throughput": 13462.86, "total_tokens": 15178368} +{"current_steps": 4825, "total_steps": 15621, "loss": 0.5467, "lr": 1.7460090108156988e-06, "epoch": 0.3088790730426989, "percentage": 30.89, "elapsed_time": "0:18:48", "remaining_time": "0:42:03", "throughput": 13469.18, "total_tokens": 15193408} +{"current_steps": 4830, "total_steps": 15621, "loss": 0.3081, "lr": 1.7452644496757548e-06, "epoch": 0.30919915498367584, "percentage": 30.92, "elapsed_time": "0:18:48", "remaining_time": "0:42:01", "throughput": 13475.54, "total_tokens": 15208640} +{"current_steps": 4835, "total_steps": 15621, "loss": 0.4533, "lr": 1.7445189580643946e-06, "epoch": 0.3095192369246527, "percentage": 30.95, "elapsed_time": "0:18:49", "remaining_time": "0:41:59", "throughput": 13482.16, "total_tokens": 15224192} +{"current_steps": 4840, "total_steps": 15621, "loss": 0.5119, "lr": 1.7437725369123737e-06, "epoch": 0.3098393188656296, "percentage": 30.98, "elapsed_time": "0:18:49", "remaining_time": "0:41:56", "throughput": 13488.59, "total_tokens": 15239616} +{"current_steps": 4845, "total_steps": 15621, "loss": 0.4595, "lr": 1.7430251871516077e-06, "epoch": 0.3101594008066065, "percentage": 31.02, "elapsed_time": "0:18:50", "remaining_time": "0:41:54", "throughput": 13495.39, "total_tokens": 15255680} +{"current_steps": 4850, "total_steps": 15621, "loss": 0.4886, "lr": 1.7422769097151715e-06, "epoch": 0.31047948274758336, "percentage": 31.05, "elapsed_time": "0:18:51", "remaining_time": "0:41:51", "throughput": 13501.91, "total_tokens": 15271232} +{"current_steps": 4855, "total_steps": 15621, "loss": 0.4938, "lr": 1.7415277055372982e-06, "epoch": 0.3107995646885603, "percentage": 31.08, "elapsed_time": "0:18:51", "remaining_time": "0:41:49", "throughput": 13508.5, "total_tokens": 15287040} +{"current_steps": 4860, "total_steps": 15621, "loss": 0.5025, "lr": 1.7407775755533778e-06, "epoch": 0.31111964662953717, "percentage": 31.11, "elapsed_time": "0:18:52", "remaining_time": "0:41:47", "throughput": 13515.9, "total_tokens": 15304256} +{"current_steps": 4865, "total_steps": 15621, "loss": 0.3567, "lr": 1.7400265206999568e-06, "epoch": 0.31143972857051405, "percentage": 31.14, "elapsed_time": "0:18:52", "remaining_time": "0:41:44", "throughput": 13523.61, "total_tokens": 15322112} +{"current_steps": 4870, "total_steps": 15621, "loss": 0.5436, "lr": 1.7392745419147362e-06, "epoch": 0.31175981051149093, "percentage": 31.18, "elapsed_time": "0:18:53", "remaining_time": "0:41:42", "throughput": 13529.87, "total_tokens": 15337216} +{"current_steps": 4875, "total_steps": 15621, "loss": 0.4521, "lr": 1.7385216401365693e-06, "epoch": 0.3120798924524678, "percentage": 31.21, "elapsed_time": "0:18:54", "remaining_time": "0:41:40", "throughput": 13536.98, "total_tokens": 15354048} +{"current_steps": 4880, "total_steps": 15621, "loss": 0.4933, "lr": 1.7377678163054638e-06, "epoch": 0.31239997439344475, "percentage": 31.24, "elapsed_time": "0:18:54", "remaining_time": "0:41:37", "throughput": 13543.24, "total_tokens": 15369344} +{"current_steps": 4885, "total_steps": 15621, "loss": 0.4949, "lr": 1.7370130713625775e-06, "epoch": 0.3127200563344216, "percentage": 31.27, "elapsed_time": "0:18:55", "remaining_time": "0:41:35", "throughput": 13550.26, "total_tokens": 15385920} +{"current_steps": 4890, "total_steps": 15621, "loss": 0.3867, "lr": 1.736257406250218e-06, "epoch": 0.3130401382753985, "percentage": 31.3, "elapsed_time": "0:18:56", "remaining_time": "0:41:33", "throughput": 13556.71, "total_tokens": 15401536} +{"current_steps": 4895, "total_steps": 15621, "loss": 0.4501, "lr": 1.735500821911842e-06, "epoch": 0.3133602202163754, "percentage": 31.34, "elapsed_time": "0:18:56", "remaining_time": "0:41:30", "throughput": 13563.15, "total_tokens": 15417152} +{"current_steps": 4900, "total_steps": 15621, "loss": 0.4949, "lr": 1.7347433192920544e-06, "epoch": 0.31368030215735226, "percentage": 31.37, "elapsed_time": "0:18:57", "remaining_time": "0:41:28", "throughput": 13569.13, "total_tokens": 15431872} +{"current_steps": 4905, "total_steps": 15621, "loss": 0.4021, "lr": 1.7339848993366056e-06, "epoch": 0.3140003840983292, "percentage": 31.4, "elapsed_time": "0:18:57", "remaining_time": "0:41:25", "throughput": 13575.7, "total_tokens": 15447552} +{"current_steps": 4910, "total_steps": 15621, "loss": 0.4615, "lr": 1.7332255629923922e-06, "epoch": 0.3143204660393061, "percentage": 31.43, "elapsed_time": "0:18:58", "remaining_time": "0:41:23", "throughput": 13582.78, "total_tokens": 15464384} +{"current_steps": 4915, "total_steps": 15621, "loss": 0.4968, "lr": 1.732465311207454e-06, "epoch": 0.31464054798028296, "percentage": 31.46, "elapsed_time": "0:18:59", "remaining_time": "0:41:21", "throughput": 13589.02, "total_tokens": 15479808} +{"current_steps": 4920, "total_steps": 15621, "loss": 0.4973, "lr": 1.731704144930975e-06, "epoch": 0.31496062992125984, "percentage": 31.5, "elapsed_time": "0:18:59", "remaining_time": "0:41:19", "throughput": 13595.97, "total_tokens": 15496512} +{"current_steps": 4925, "total_steps": 15621, "loss": 0.4094, "lr": 1.7309420651132797e-06, "epoch": 0.3152807118622367, "percentage": 31.53, "elapsed_time": "0:19:00", "remaining_time": "0:41:16", "throughput": 13602.79, "total_tokens": 15512896} +{"current_steps": 4930, "total_steps": 15621, "loss": 0.3234, "lr": 1.7301790727058343e-06, "epoch": 0.3156007938032136, "percentage": 31.56, "elapsed_time": "0:19:01", "remaining_time": "0:41:14", "throughput": 13608.97, "total_tokens": 15528064} +{"current_steps": 4935, "total_steps": 15621, "loss": 0.3618, "lr": 1.7294151686612431e-06, "epoch": 0.31592087574419053, "percentage": 31.59, "elapsed_time": "0:19:01", "remaining_time": "0:41:12", "throughput": 13615.23, "total_tokens": 15543424} +{"current_steps": 4940, "total_steps": 15621, "loss": 0.5609, "lr": 1.7286503539332495e-06, "epoch": 0.3162409576851674, "percentage": 31.62, "elapsed_time": "0:19:02", "remaining_time": "0:41:09", "throughput": 13622.14, "total_tokens": 15560192} +{"current_steps": 4945, "total_steps": 15621, "loss": 0.3968, "lr": 1.7278846294767337e-06, "epoch": 0.3165610396261443, "percentage": 31.66, "elapsed_time": "0:19:02", "remaining_time": "0:41:07", "throughput": 13628.6, "total_tokens": 15576128} +{"current_steps": 4950, "total_steps": 15621, "loss": 0.7032, "lr": 1.7271179962477118e-06, "epoch": 0.31688112156712117, "percentage": 31.69, "elapsed_time": "0:19:03", "remaining_time": "0:41:05", "throughput": 13635.39, "total_tokens": 15592576} +{"current_steps": 4955, "total_steps": 15621, "loss": 0.4261, "lr": 1.7263504552033341e-06, "epoch": 0.31720120350809805, "percentage": 31.72, "elapsed_time": "0:19:04", "remaining_time": "0:41:02", "throughput": 13641.48, "total_tokens": 15607744} +{"current_steps": 4960, "total_steps": 15621, "loss": 0.4846, "lr": 1.725582007301885e-06, "epoch": 0.317521285449075, "percentage": 31.75, "elapsed_time": "0:19:04", "remaining_time": "0:41:00", "throughput": 13647.82, "total_tokens": 15623360} +{"current_steps": 4965, "total_steps": 15621, "loss": 0.4213, "lr": 1.7248126535027806e-06, "epoch": 0.31784136739005187, "percentage": 31.78, "elapsed_time": "0:19:05", "remaining_time": "0:40:58", "throughput": 13654.08, "total_tokens": 15638656} +{"current_steps": 4970, "total_steps": 15621, "loss": 0.4632, "lr": 1.7240423947665678e-06, "epoch": 0.31816144933102875, "percentage": 31.82, "elapsed_time": "0:19:05", "remaining_time": "0:40:55", "throughput": 13660.48, "total_tokens": 15654400} +{"current_steps": 4975, "total_steps": 15621, "loss": 0.3822, "lr": 1.723271232054924e-06, "epoch": 0.3184815312720056, "percentage": 31.85, "elapsed_time": "0:19:06", "remaining_time": "0:40:53", "throughput": 13666.78, "total_tokens": 15670016} +{"current_steps": 4980, "total_steps": 15621, "loss": 0.4977, "lr": 1.722499166330655e-06, "epoch": 0.3188016132129825, "percentage": 31.88, "elapsed_time": "0:19:07", "remaining_time": "0:40:51", "throughput": 13673.46, "total_tokens": 15686208} +{"current_steps": 4985, "total_steps": 15621, "loss": 0.44, "lr": 1.7217261985576936e-06, "epoch": 0.31912169515395944, "percentage": 31.91, "elapsed_time": "0:19:07", "remaining_time": "0:40:49", "throughput": 13680.14, "total_tokens": 15702592} +{"current_steps": 4990, "total_steps": 15621, "loss": 0.5176, "lr": 1.7209523297010992e-06, "epoch": 0.3194417770949363, "percentage": 31.94, "elapsed_time": "0:19:08", "remaining_time": "0:40:46", "throughput": 13686.17, "total_tokens": 15717696} +{"current_steps": 4995, "total_steps": 15621, "loss": 0.4644, "lr": 1.7201775607270564e-06, "epoch": 0.3197618590359132, "percentage": 31.98, "elapsed_time": "0:19:09", "remaining_time": "0:40:44", "throughput": 13692.38, "total_tokens": 15733184} +{"current_steps": 5000, "total_steps": 15621, "loss": 0.5267, "lr": 1.7194018926028733e-06, "epoch": 0.3200819409768901, "percentage": 32.01, "elapsed_time": "0:19:09", "remaining_time": "0:40:42", "throughput": 13699.14, "total_tokens": 15749888} +{"current_steps": 5005, "total_steps": 15621, "loss": 0.3621, "lr": 1.7186253262969803e-06, "epoch": 0.32040202291786696, "percentage": 32.04, "elapsed_time": "0:19:10", "remaining_time": "0:40:40", "throughput": 13706.86, "total_tokens": 15768384} +{"current_steps": 5010, "total_steps": 15621, "loss": 0.3269, "lr": 1.7178478627789299e-06, "epoch": 0.32072210485884384, "percentage": 32.07, "elapsed_time": "0:19:11", "remaining_time": "0:40:37", "throughput": 13713.33, "total_tokens": 15784448} +{"current_steps": 5015, "total_steps": 15621, "loss": 0.4088, "lr": 1.7170695030193944e-06, "epoch": 0.3210421867998208, "percentage": 32.1, "elapsed_time": "0:19:11", "remaining_time": "0:40:35", "throughput": 13719.76, "total_tokens": 15800512} +{"current_steps": 5020, "total_steps": 15621, "loss": 0.4744, "lr": 1.716290247990165e-06, "epoch": 0.32136226874079765, "percentage": 32.14, "elapsed_time": "0:19:12", "remaining_time": "0:40:33", "throughput": 13725.72, "total_tokens": 15815680} +{"current_steps": 5025, "total_steps": 15621, "loss": 0.3939, "lr": 1.715510098664151e-06, "epoch": 0.32168235068177453, "percentage": 32.17, "elapsed_time": "0:19:12", "remaining_time": "0:40:30", "throughput": 13731.52, "total_tokens": 15830528} +{"current_steps": 5030, "total_steps": 15621, "loss": 0.4933, "lr": 1.7147290560153777e-06, "epoch": 0.3220024326227514, "percentage": 32.2, "elapsed_time": "0:19:13", "remaining_time": "0:40:28", "throughput": 13737.4, "total_tokens": 15845568} +{"current_steps": 5035, "total_steps": 15621, "loss": 0.4531, "lr": 1.7139471210189862e-06, "epoch": 0.3223225145637283, "percentage": 32.23, "elapsed_time": "0:19:14", "remaining_time": "0:40:26", "throughput": 13743.91, "total_tokens": 15861632} +{"current_steps": 5040, "total_steps": 15621, "loss": 0.5187, "lr": 1.7131642946512312e-06, "epoch": 0.3226425965047052, "percentage": 32.26, "elapsed_time": "0:19:14", "remaining_time": "0:40:24", "throughput": 13750.3, "total_tokens": 15877632} +{"current_steps": 5045, "total_steps": 15621, "loss": 0.37, "lr": 1.712380577889481e-06, "epoch": 0.3229626784456821, "percentage": 32.3, "elapsed_time": "0:19:15", "remaining_time": "0:40:21", "throughput": 13756.51, "total_tokens": 15893184} +{"current_steps": 5050, "total_steps": 15621, "loss": 0.3955, "lr": 1.711595971712215e-06, "epoch": 0.323282760386659, "percentage": 32.33, "elapsed_time": "0:19:15", "remaining_time": "0:40:19", "throughput": 13762.45, "total_tokens": 15908416} +{"current_steps": 5055, "total_steps": 15621, "loss": 0.4074, "lr": 1.7108104770990234e-06, "epoch": 0.32360284232763586, "percentage": 32.36, "elapsed_time": "0:19:16", "remaining_time": "0:40:17", "throughput": 13768.69, "total_tokens": 15924224} +{"current_steps": 5060, "total_steps": 15621, "loss": 0.2532, "lr": 1.7100240950306052e-06, "epoch": 0.32392292426861274, "percentage": 32.39, "elapsed_time": "0:19:17", "remaining_time": "0:40:15", "throughput": 13774.97, "total_tokens": 15940032} +{"current_steps": 5065, "total_steps": 15621, "loss": 0.4556, "lr": 1.7092368264887677e-06, "epoch": 0.3242430062095897, "percentage": 32.42, "elapsed_time": "0:19:17", "remaining_time": "0:40:12", "throughput": 13780.77, "total_tokens": 15954944} +{"current_steps": 5070, "total_steps": 15621, "loss": 0.4923, "lr": 1.7084486724564252e-06, "epoch": 0.32456308815056656, "percentage": 32.46, "elapsed_time": "0:19:18", "remaining_time": "0:40:10", "throughput": 13787.04, "total_tokens": 15970624} +{"current_steps": 5075, "total_steps": 15621, "loss": 0.418, "lr": 1.707659633917597e-06, "epoch": 0.32488317009154344, "percentage": 32.49, "elapsed_time": "0:19:19", "remaining_time": "0:40:08", "throughput": 13793.44, "total_tokens": 15986688} +{"current_steps": 5080, "total_steps": 15621, "loss": 0.4172, "lr": 1.7068697118574064e-06, "epoch": 0.3252032520325203, "percentage": 32.52, "elapsed_time": "0:19:19", "remaining_time": "0:40:06", "throughput": 13799.83, "total_tokens": 16002752} +{"current_steps": 5085, "total_steps": 15621, "loss": 0.4924, "lr": 1.7060789072620816e-06, "epoch": 0.3255233339734972, "percentage": 32.55, "elapsed_time": "0:19:20", "remaining_time": "0:40:03", "throughput": 13805.97, "total_tokens": 16018112} +{"current_steps": 5090, "total_steps": 15621, "loss": 0.411, "lr": 1.7052872211189509e-06, "epoch": 0.32584341591447413, "percentage": 32.58, "elapsed_time": "0:19:20", "remaining_time": "0:40:01", "throughput": 13812.27, "total_tokens": 16033984} +{"current_steps": 5095, "total_steps": 15621, "loss": 0.3263, "lr": 1.7044946544164431e-06, "epoch": 0.326163497855451, "percentage": 32.62, "elapsed_time": "0:19:21", "remaining_time": "0:39:59", "throughput": 13818.4, "total_tokens": 16049536} +{"current_steps": 5100, "total_steps": 15621, "loss": 0.3722, "lr": 1.703701208144088e-06, "epoch": 0.3264835797964279, "percentage": 32.65, "elapsed_time": "0:19:22", "remaining_time": "0:39:57", "throughput": 13825.22, "total_tokens": 16066304} +{"current_steps": 5105, "total_steps": 15621, "loss": 0.4627, "lr": 1.702906883292512e-06, "epoch": 0.32680366173740477, "percentage": 32.68, "elapsed_time": "0:19:22", "remaining_time": "0:39:55", "throughput": 13831.18, "total_tokens": 16081536} +{"current_steps": 5110, "total_steps": 15621, "loss": 0.5501, "lr": 1.7021116808534393e-06, "epoch": 0.32712374367838165, "percentage": 32.71, "elapsed_time": "0:19:23", "remaining_time": "0:39:52", "throughput": 13837.28, "total_tokens": 16096896} +{"current_steps": 5115, "total_steps": 15621, "loss": 0.4294, "lr": 1.7013156018196893e-06, "epoch": 0.32744382561935853, "percentage": 32.74, "elapsed_time": "0:19:23", "remaining_time": "0:39:50", "throughput": 13843.72, "total_tokens": 16112960} +{"current_steps": 5120, "total_steps": 15621, "loss": 0.4168, "lr": 1.7005186471851759e-06, "epoch": 0.32776390756033547, "percentage": 32.78, "elapsed_time": "0:19:24", "remaining_time": "0:39:48", "throughput": 13850.33, "total_tokens": 16129344} +{"current_steps": 5125, "total_steps": 15621, "loss": 0.5931, "lr": 1.6997208179449066e-06, "epoch": 0.32808398950131235, "percentage": 32.81, "elapsed_time": "0:19:25", "remaining_time": "0:39:46", "throughput": 13857.98, "total_tokens": 16147776} +{"current_steps": 5130, "total_steps": 15621, "loss": 0.3523, "lr": 1.6989221150949806e-06, "epoch": 0.3284040714422892, "percentage": 32.84, "elapsed_time": "0:19:25", "remaining_time": "0:39:44", "throughput": 13863.83, "total_tokens": 16162880} +{"current_steps": 5135, "total_steps": 15621, "loss": 0.2737, "lr": 1.6981225396325873e-06, "epoch": 0.3287241533832661, "percentage": 32.87, "elapsed_time": "0:19:26", "remaining_time": "0:39:41", "throughput": 13870.47, "total_tokens": 16179392} +{"current_steps": 5140, "total_steps": 15621, "loss": 0.5036, "lr": 1.6973220925560067e-06, "epoch": 0.329044235324243, "percentage": 32.9, "elapsed_time": "0:19:27", "remaining_time": "0:39:39", "throughput": 13876.34, "total_tokens": 16194560} +{"current_steps": 5145, "total_steps": 15621, "loss": 0.4281, "lr": 1.696520774864606e-06, "epoch": 0.3293643172652199, "percentage": 32.94, "elapsed_time": "0:19:27", "remaining_time": "0:39:37", "throughput": 13882.5, "total_tokens": 16210112} +{"current_steps": 5150, "total_steps": 15621, "loss": 0.4646, "lr": 1.69571858755884e-06, "epoch": 0.3296843992061968, "percentage": 32.97, "elapsed_time": "0:19:28", "remaining_time": "0:39:35", "throughput": 13888.76, "total_tokens": 16225856} +{"current_steps": 5155, "total_steps": 15621, "loss": 0.4177, "lr": 1.6949155316402487e-06, "epoch": 0.3300044811471737, "percentage": 33.0, "elapsed_time": "0:19:28", "remaining_time": "0:39:33", "throughput": 13894.82, "total_tokens": 16241536} +{"current_steps": 5160, "total_steps": 15621, "loss": 0.3777, "lr": 1.6941116081114566e-06, "epoch": 0.33032456308815056, "percentage": 33.03, "elapsed_time": "0:19:29", "remaining_time": "0:39:30", "throughput": 13900.43, "total_tokens": 16256384} +{"current_steps": 5165, "total_steps": 15621, "loss": 0.3937, "lr": 1.6933068179761722e-06, "epoch": 0.33064464502912744, "percentage": 33.06, "elapsed_time": "0:19:30", "remaining_time": "0:39:28", "throughput": 13906.22, "total_tokens": 16271360} +{"current_steps": 5170, "total_steps": 15621, "loss": 0.4118, "lr": 1.6925011622391857e-06, "epoch": 0.3309647269701044, "percentage": 33.1, "elapsed_time": "0:19:30", "remaining_time": "0:39:26", "throughput": 13912.14, "total_tokens": 16286656} +{"current_steps": 5175, "total_steps": 15621, "loss": 0.4038, "lr": 1.6916946419063667e-06, "epoch": 0.33128480891108125, "percentage": 33.13, "elapsed_time": "0:19:31", "remaining_time": "0:39:24", "throughput": 13918.39, "total_tokens": 16302592} +{"current_steps": 5180, "total_steps": 15621, "loss": 0.5252, "lr": 1.690887257984666e-06, "epoch": 0.33160489085205813, "percentage": 33.16, "elapsed_time": "0:19:31", "remaining_time": "0:39:22", "throughput": 13924.66, "total_tokens": 16318656} +{"current_steps": 5185, "total_steps": 15621, "loss": 0.4784, "lr": 1.690079011482112e-06, "epoch": 0.331924972793035, "percentage": 33.19, "elapsed_time": "0:19:32", "remaining_time": "0:39:19", "throughput": 13930.71, "total_tokens": 16334016} +{"current_steps": 5190, "total_steps": 15621, "loss": 0.5322, "lr": 1.6892699034078096e-06, "epoch": 0.3322450547340119, "percentage": 33.22, "elapsed_time": "0:19:33", "remaining_time": "0:39:17", "throughput": 13936.97, "total_tokens": 16349888} +{"current_steps": 5195, "total_steps": 15621, "loss": 0.5017, "lr": 1.68845993477194e-06, "epoch": 0.33256513667498877, "percentage": 33.26, "elapsed_time": "0:19:33", "remaining_time": "0:39:15", "throughput": 13942.83, "total_tokens": 16365056} +{"current_steps": 5200, "total_steps": 15621, "loss": 0.3857, "lr": 1.6876491065857584e-06, "epoch": 0.3328852186159657, "percentage": 33.29, "elapsed_time": "0:19:34", "remaining_time": "0:39:13", "throughput": 13948.53, "total_tokens": 16380032} +{"current_steps": 5205, "total_steps": 15621, "loss": 0.6437, "lr": 1.6868374198615928e-06, "epoch": 0.3332053005569426, "percentage": 33.32, "elapsed_time": "0:19:34", "remaining_time": "0:39:11", "throughput": 13954.13, "total_tokens": 16394752} +{"current_steps": 5210, "total_steps": 15621, "loss": 0.4782, "lr": 1.6860248756128448e-06, "epoch": 0.33352538249791946, "percentage": 33.35, "elapsed_time": "0:19:35", "remaining_time": "0:39:08", "throughput": 13960.15, "total_tokens": 16410368} +{"current_steps": 5215, "total_steps": 15621, "loss": 0.3992, "lr": 1.6852114748539844e-06, "epoch": 0.33384546443889634, "percentage": 33.38, "elapsed_time": "0:19:36", "remaining_time": "0:39:06", "throughput": 13965.68, "total_tokens": 16425088} +{"current_steps": 5220, "total_steps": 15621, "loss": 0.3352, "lr": 1.6843972186005525e-06, "epoch": 0.3341655463798732, "percentage": 33.42, "elapsed_time": "0:19:36", "remaining_time": "0:39:04", "throughput": 13972.0, "total_tokens": 16441152} +{"current_steps": 5225, "total_steps": 15621, "loss": 0.4641, "lr": 1.6835821078691577e-06, "epoch": 0.33448562832085016, "percentage": 33.45, "elapsed_time": "0:19:37", "remaining_time": "0:39:02", "throughput": 13978.8, "total_tokens": 16458240} +{"current_steps": 5230, "total_steps": 15621, "loss": 0.4142, "lr": 1.6827661436774746e-06, "epoch": 0.33480571026182704, "percentage": 33.48, "elapsed_time": "0:19:37", "remaining_time": "0:39:00", "throughput": 13984.99, "total_tokens": 16474112} +{"current_steps": 5235, "total_steps": 15621, "loss": 0.3955, "lr": 1.681949327044245e-06, "epoch": 0.3351257922028039, "percentage": 33.51, "elapsed_time": "0:19:38", "remaining_time": "0:38:58", "throughput": 13991.49, "total_tokens": 16490560} +{"current_steps": 5240, "total_steps": 15621, "loss": 0.6757, "lr": 1.6811316589892734e-06, "epoch": 0.3354458741437808, "percentage": 33.54, "elapsed_time": "0:19:39", "remaining_time": "0:38:56", "throughput": 13997.26, "total_tokens": 16505728} +{"current_steps": 5245, "total_steps": 15621, "loss": 0.4257, "lr": 1.6803131405334284e-06, "epoch": 0.3357659560847577, "percentage": 33.58, "elapsed_time": "0:19:39", "remaining_time": "0:38:54", "throughput": 14003.58, "total_tokens": 16521856} +{"current_steps": 5250, "total_steps": 15621, "loss": 0.4271, "lr": 1.6794937726986396e-06, "epoch": 0.3360860380257346, "percentage": 33.61, "elapsed_time": "0:19:40", "remaining_time": "0:38:51", "throughput": 14009.7, "total_tokens": 16537792} +{"current_steps": 5255, "total_steps": 15621, "loss": 0.434, "lr": 1.6786735565078974e-06, "epoch": 0.3364061199667115, "percentage": 33.64, "elapsed_time": "0:19:41", "remaining_time": "0:38:49", "throughput": 14015.68, "total_tokens": 16553408} +{"current_steps": 5260, "total_steps": 15621, "loss": 0.4297, "lr": 1.677852492985251e-06, "epoch": 0.33672620190768837, "percentage": 33.67, "elapsed_time": "0:19:41", "remaining_time": "0:38:47", "throughput": 14022.23, "total_tokens": 16570112} +{"current_steps": 5265, "total_steps": 15621, "loss": 0.4931, "lr": 1.6770305831558086e-06, "epoch": 0.33704628384866525, "percentage": 33.7, "elapsed_time": "0:19:42", "remaining_time": "0:38:45", "throughput": 14028.45, "total_tokens": 16586304} +{"current_steps": 5270, "total_steps": 15621, "loss": 0.3922, "lr": 1.6762078280457342e-06, "epoch": 0.33736636578964213, "percentage": 33.74, "elapsed_time": "0:19:42", "remaining_time": "0:38:43", "throughput": 14034.35, "total_tokens": 16601920} +{"current_steps": 5275, "total_steps": 15621, "loss": 0.4797, "lr": 1.6753842286822465e-06, "epoch": 0.33768644773061907, "percentage": 33.77, "elapsed_time": "0:19:43", "remaining_time": "0:38:41", "throughput": 14040.72, "total_tokens": 16618240} +{"current_steps": 5280, "total_steps": 15621, "loss": 0.59, "lr": 1.6745597860936199e-06, "epoch": 0.33800652967159595, "percentage": 33.8, "elapsed_time": "0:19:44", "remaining_time": "0:38:39", "throughput": 14046.49, "total_tokens": 16633408} +{"current_steps": 5285, "total_steps": 15621, "loss": 0.439, "lr": 1.6737345013091794e-06, "epoch": 0.3383266116125728, "percentage": 33.83, "elapsed_time": "0:19:44", "remaining_time": "0:38:37", "throughput": 14052.76, "total_tokens": 16649664} +{"current_steps": 5290, "total_steps": 15621, "loss": 0.4602, "lr": 1.672908375359304e-06, "epoch": 0.3386466935535497, "percentage": 33.86, "elapsed_time": "0:19:45", "remaining_time": "0:38:34", "throughput": 14058.51, "total_tokens": 16664896} +{"current_steps": 5295, "total_steps": 15621, "loss": 0.5433, "lr": 1.6720814092754209e-06, "epoch": 0.3389667754945266, "percentage": 33.9, "elapsed_time": "0:19:46", "remaining_time": "0:38:32", "throughput": 14064.34, "total_tokens": 16680384} +{"current_steps": 5300, "total_steps": 15621, "loss": 0.3696, "lr": 1.6712536040900075e-06, "epoch": 0.33928685743550346, "percentage": 33.93, "elapsed_time": "0:19:46", "remaining_time": "0:38:30", "throughput": 14070.46, "total_tokens": 16696192} +{"current_steps": 5305, "total_steps": 15621, "loss": 0.4752, "lr": 1.6704249608365878e-06, "epoch": 0.3396069393764804, "percentage": 33.96, "elapsed_time": "0:19:47", "remaining_time": "0:38:29", "throughput": 14082.79, "total_tokens": 16727104} +{"current_steps": 5310, "total_steps": 15621, "loss": 0.4154, "lr": 1.669595480549733e-06, "epoch": 0.3399270213174573, "percentage": 33.99, "elapsed_time": "0:19:48", "remaining_time": "0:38:27", "throughput": 14088.26, "total_tokens": 16741696} +{"current_steps": 5315, "total_steps": 15621, "loss": 0.432, "lr": 1.6687651642650587e-06, "epoch": 0.34024710325843416, "percentage": 34.02, "elapsed_time": "0:19:48", "remaining_time": "0:38:25", "throughput": 14094.09, "total_tokens": 16757120} +{"current_steps": 5320, "total_steps": 15621, "loss": 0.4471, "lr": 1.6679340130192245e-06, "epoch": 0.34056718519941104, "percentage": 34.06, "elapsed_time": "0:19:49", "remaining_time": "0:38:23", "throughput": 14099.78, "total_tokens": 16772416} +{"current_steps": 5325, "total_steps": 15621, "loss": 0.3172, "lr": 1.667102027849933e-06, "epoch": 0.3408872671403879, "percentage": 34.09, "elapsed_time": "0:19:50", "remaining_time": "0:38:21", "throughput": 14105.92, "total_tokens": 16788352} +{"current_steps": 5330, "total_steps": 15621, "loss": 0.3456, "lr": 1.6662692097959266e-06, "epoch": 0.34120734908136485, "percentage": 34.12, "elapsed_time": "0:19:50", "remaining_time": "0:38:19", "throughput": 14111.65, "total_tokens": 16803648} +{"current_steps": 5335, "total_steps": 15621, "loss": 0.4708, "lr": 1.6654355598969894e-06, "epoch": 0.34152743102234173, "percentage": 34.15, "elapsed_time": "0:19:51", "remaining_time": "0:38:16", "throughput": 14117.4, "total_tokens": 16818944} +{"current_steps": 5340, "total_steps": 15621, "loss": 0.5078, "lr": 1.6646010791939423e-06, "epoch": 0.3418475129633186, "percentage": 34.18, "elapsed_time": "0:19:51", "remaining_time": "0:38:14", "throughput": 14122.99, "total_tokens": 16833984} +{"current_steps": 5345, "total_steps": 15621, "loss": 0.5507, "lr": 1.6637657687286446e-06, "epoch": 0.3421675949042955, "percentage": 34.22, "elapsed_time": "0:19:52", "remaining_time": "0:38:12", "throughput": 14128.81, "total_tokens": 16849280} +{"current_steps": 5350, "total_steps": 15621, "loss": 0.3979, "lr": 1.6629296295439912e-06, "epoch": 0.34248767684527237, "percentage": 34.25, "elapsed_time": "0:19:53", "remaining_time": "0:38:10", "throughput": 14135.01, "total_tokens": 16865664} +{"current_steps": 5355, "total_steps": 15621, "loss": 0.4884, "lr": 1.6620926626839116e-06, "epoch": 0.3428077587862493, "percentage": 34.28, "elapsed_time": "0:19:53", "remaining_time": "0:38:08", "throughput": 14140.91, "total_tokens": 16881536} +{"current_steps": 5360, "total_steps": 15621, "loss": 0.4395, "lr": 1.661254869193369e-06, "epoch": 0.3431278407272262, "percentage": 34.31, "elapsed_time": "0:19:54", "remaining_time": "0:38:06", "throughput": 14147.61, "total_tokens": 16898816} +{"current_steps": 5365, "total_steps": 15621, "loss": 0.5104, "lr": 1.6604162501183581e-06, "epoch": 0.34344792266820307, "percentage": 34.34, "elapsed_time": "0:19:55", "remaining_time": "0:38:04", "throughput": 14153.81, "total_tokens": 16915136} +{"current_steps": 5370, "total_steps": 15621, "loss": 0.4607, "lr": 1.6595768065059045e-06, "epoch": 0.34376800460917994, "percentage": 34.38, "elapsed_time": "0:19:55", "remaining_time": "0:38:02", "throughput": 14159.81, "total_tokens": 16931200} +{"current_steps": 5375, "total_steps": 15621, "loss": 0.4652, "lr": 1.6587365394040641e-06, "epoch": 0.3440880865501568, "percentage": 34.41, "elapsed_time": "0:19:56", "remaining_time": "0:38:00", "throughput": 14165.59, "total_tokens": 16946816} +{"current_steps": 5380, "total_steps": 15621, "loss": 0.3893, "lr": 1.6578954498619195e-06, "epoch": 0.3444081684911337, "percentage": 34.44, "elapsed_time": "0:19:56", "remaining_time": "0:37:58", "throughput": 14171.61, "total_tokens": 16962880} +{"current_steps": 5385, "total_steps": 15621, "loss": 0.4587, "lr": 1.6570535389295814e-06, "epoch": 0.34472825043211064, "percentage": 34.47, "elapsed_time": "0:19:57", "remaining_time": "0:37:56", "throughput": 14177.32, "total_tokens": 16978240} +{"current_steps": 5390, "total_steps": 15621, "loss": 0.3628, "lr": 1.6562108076581853e-06, "epoch": 0.3450483323730875, "percentage": 34.5, "elapsed_time": "0:19:58", "remaining_time": "0:37:54", "throughput": 14183.12, "total_tokens": 16993728} +{"current_steps": 5395, "total_steps": 15621, "loss": 0.5903, "lr": 1.6553672570998912e-06, "epoch": 0.3453684143140644, "percentage": 34.54, "elapsed_time": "0:19:58", "remaining_time": "0:37:52", "throughput": 14189.06, "total_tokens": 17009728} +{"current_steps": 5400, "total_steps": 15621, "loss": 0.4174, "lr": 1.6545228883078815e-06, "epoch": 0.3456884962550413, "percentage": 34.57, "elapsed_time": "0:19:59", "remaining_time": "0:37:50", "throughput": 14194.54, "total_tokens": 17024640} +{"current_steps": 5405, "total_steps": 15621, "loss": 0.3541, "lr": 1.653677702336361e-06, "epoch": 0.34600857819601816, "percentage": 34.6, "elapsed_time": "0:20:00", "remaining_time": "0:37:48", "throughput": 14200.43, "total_tokens": 17040512} +{"current_steps": 5410, "total_steps": 15621, "loss": 0.4657, "lr": 1.6528317002405538e-06, "epoch": 0.3463286601369951, "percentage": 34.63, "elapsed_time": "0:20:00", "remaining_time": "0:37:46", "throughput": 14206.21, "total_tokens": 17056064} +{"current_steps": 5415, "total_steps": 15621, "loss": 0.3692, "lr": 1.6519848830767043e-06, "epoch": 0.34664874207797197, "percentage": 34.66, "elapsed_time": "0:20:01", "remaining_time": "0:37:44", "throughput": 14212.43, "total_tokens": 17072448} +{"current_steps": 5420, "total_steps": 15621, "loss": 0.6197, "lr": 1.6511372519020726e-06, "epoch": 0.34696882401894885, "percentage": 34.7, "elapsed_time": "0:20:01", "remaining_time": "0:37:42", "throughput": 14218.29, "total_tokens": 17088320} +{"current_steps": 5425, "total_steps": 15621, "loss": 0.4291, "lr": 1.650288807774937e-06, "epoch": 0.34728890595992573, "percentage": 34.73, "elapsed_time": "0:20:02", "remaining_time": "0:37:39", "throughput": 14224.35, "total_tokens": 17104448} +{"current_steps": 5430, "total_steps": 15621, "loss": 0.3964, "lr": 1.6494395517545893e-06, "epoch": 0.3476089879009026, "percentage": 34.76, "elapsed_time": "0:20:03", "remaining_time": "0:37:38", "throughput": 14231.06, "total_tokens": 17121856} +{"current_steps": 5435, "total_steps": 15621, "loss": 0.5052, "lr": 1.6485894849013362e-06, "epoch": 0.34792906984187955, "percentage": 34.79, "elapsed_time": "0:20:03", "remaining_time": "0:37:35", "throughput": 14236.39, "total_tokens": 17136512} +{"current_steps": 5440, "total_steps": 15621, "loss": 0.443, "lr": 1.6477386082764961e-06, "epoch": 0.3482491517828564, "percentage": 34.82, "elapsed_time": "0:20:04", "remaining_time": "0:37:33", "throughput": 14242.47, "total_tokens": 17152640} +{"current_steps": 5445, "total_steps": 15621, "loss": 0.362, "lr": 1.6468869229423983e-06, "epoch": 0.3485692337238333, "percentage": 34.86, "elapsed_time": "0:20:04", "remaining_time": "0:37:31", "throughput": 14247.92, "total_tokens": 17167680} +{"current_steps": 5450, "total_steps": 15621, "loss": 0.6295, "lr": 1.6460344299623813e-06, "epoch": 0.3488893156648102, "percentage": 34.89, "elapsed_time": "0:20:05", "remaining_time": "0:37:29", "throughput": 14253.62, "total_tokens": 17183296} +{"current_steps": 5455, "total_steps": 15621, "loss": 0.5424, "lr": 1.6451811304007939e-06, "epoch": 0.34920939760578706, "percentage": 34.92, "elapsed_time": "0:20:06", "remaining_time": "0:37:27", "throughput": 14259.07, "total_tokens": 17198272} +{"current_steps": 5460, "total_steps": 15621, "loss": 0.5177, "lr": 1.6443270253229895e-06, "epoch": 0.349529479546764, "percentage": 34.95, "elapsed_time": "0:20:06", "remaining_time": "0:37:25", "throughput": 14264.65, "total_tokens": 17213376} +{"current_steps": 5465, "total_steps": 15621, "loss": 0.4657, "lr": 1.6434721157953288e-06, "epoch": 0.3498495614877409, "percentage": 34.98, "elapsed_time": "0:20:07", "remaining_time": "0:37:23", "throughput": 14270.77, "total_tokens": 17229632} +{"current_steps": 5470, "total_steps": 15621, "loss": 0.579, "lr": 1.6426164028851765e-06, "epoch": 0.35016964342871776, "percentage": 35.02, "elapsed_time": "0:20:07", "remaining_time": "0:37:21", "throughput": 14276.73, "total_tokens": 17245696} +{"current_steps": 5474, "total_steps": 15621, "eval_loss": 0.43906036019325256, "epoch": 0.3504257089814993, "percentage": 35.04, "elapsed_time": "0:20:57", "remaining_time": "0:38:51", "throughput": 13724.04, "total_tokens": 17259840} +{"current_steps": 5475, "total_steps": 15621, "loss": 0.3787, "lr": 1.6417598876609002e-06, "epoch": 0.35048972536969464, "percentage": 35.05, "elapsed_time": "0:21:46", "remaining_time": "0:40:20", "throughput": 13215.82, "total_tokens": 17262976} +{"current_steps": 5480, "total_steps": 15621, "loss": 0.419, "lr": 1.640902571191869e-06, "epoch": 0.3508098073106715, "percentage": 35.08, "elapsed_time": "0:21:46", "remaining_time": "0:40:18", "throughput": 13221.52, "total_tokens": 17278336} +{"current_steps": 5485, "total_steps": 15621, "loss": 0.3535, "lr": 1.6400444545484524e-06, "epoch": 0.3511298892516484, "percentage": 35.11, "elapsed_time": "0:21:47", "remaining_time": "0:40:16", "throughput": 13227.0, "total_tokens": 17293248} +{"current_steps": 5490, "total_steps": 15621, "loss": 0.4275, "lr": 1.6391855388020193e-06, "epoch": 0.35144997119262533, "percentage": 35.14, "elapsed_time": "0:21:48", "remaining_time": "0:40:13", "throughput": 13232.89, "total_tokens": 17309184} +{"current_steps": 5495, "total_steps": 15621, "loss": 0.4436, "lr": 1.6383258250249363e-06, "epoch": 0.3517700531336022, "percentage": 35.18, "elapsed_time": "0:21:48", "remaining_time": "0:40:11", "throughput": 13238.85, "total_tokens": 17325248} +{"current_steps": 5500, "total_steps": 15621, "loss": 0.4226, "lr": 1.6374653142905661e-06, "epoch": 0.3520901350745791, "percentage": 35.21, "elapsed_time": "0:21:49", "remaining_time": "0:40:09", "throughput": 13244.54, "total_tokens": 17340736} +{"current_steps": 5505, "total_steps": 15621, "loss": 0.4188, "lr": 1.6366040076732662e-06, "epoch": 0.35241021701555597, "percentage": 35.24, "elapsed_time": "0:21:49", "remaining_time": "0:40:07", "throughput": 13250.13, "total_tokens": 17355904} +{"current_steps": 5510, "total_steps": 15621, "loss": 0.4712, "lr": 1.6357419062483882e-06, "epoch": 0.35273029895653285, "percentage": 35.27, "elapsed_time": "0:21:50", "remaining_time": "0:40:04", "throughput": 13255.75, "total_tokens": 17371264} +{"current_steps": 5515, "total_steps": 15621, "loss": 0.4168, "lr": 1.6348790110922758e-06, "epoch": 0.3530503808975098, "percentage": 35.31, "elapsed_time": "0:21:51", "remaining_time": "0:40:02", "throughput": 13262.29, "total_tokens": 17388608} +{"current_steps": 5520, "total_steps": 15621, "loss": 0.4668, "lr": 1.6340153232822635e-06, "epoch": 0.35337046283848667, "percentage": 35.34, "elapsed_time": "0:21:51", "remaining_time": "0:40:00", "throughput": 13267.86, "total_tokens": 17403712} +{"current_steps": 5525, "total_steps": 15621, "loss": 0.4809, "lr": 1.633150843896676e-06, "epoch": 0.35369054477946354, "percentage": 35.37, "elapsed_time": "0:21:52", "remaining_time": "0:39:58", "throughput": 13274.46, "total_tokens": 17421056} +{"current_steps": 5530, "total_steps": 15621, "loss": 0.5588, "lr": 1.6322855740148263e-06, "epoch": 0.3540106267204404, "percentage": 35.4, "elapsed_time": "0:21:52", "remaining_time": "0:39:55", "throughput": 13279.9, "total_tokens": 17436096} +{"current_steps": 5535, "total_steps": 15621, "loss": 0.3701, "lr": 1.6314195147170132e-06, "epoch": 0.3543307086614173, "percentage": 35.43, "elapsed_time": "0:21:53", "remaining_time": "0:39:53", "throughput": 13285.96, "total_tokens": 17452480} +{"current_steps": 5540, "total_steps": 15621, "loss": 0.4038, "lr": 1.6305526670845225e-06, "epoch": 0.35465079060239424, "percentage": 35.47, "elapsed_time": "0:21:54", "remaining_time": "0:39:51", "throughput": 13291.54, "total_tokens": 17467776} +{"current_steps": 5545, "total_steps": 15621, "loss": 0.5081, "lr": 1.6296850321996232e-06, "epoch": 0.3549708725433711, "percentage": 35.5, "elapsed_time": "0:21:54", "remaining_time": "0:39:49", "throughput": 13296.98, "total_tokens": 17482752} +{"current_steps": 5550, "total_steps": 15621, "loss": 0.3885, "lr": 1.6288166111455683e-06, "epoch": 0.355290954484348, "percentage": 35.53, "elapsed_time": "0:21:55", "remaining_time": "0:39:46", "throughput": 13302.45, "total_tokens": 17497792} +{"current_steps": 5555, "total_steps": 15621, "loss": 0.4774, "lr": 1.6279474050065906e-06, "epoch": 0.3556110364253249, "percentage": 35.56, "elapsed_time": "0:21:55", "remaining_time": "0:39:44", "throughput": 13307.99, "total_tokens": 17513024} +{"current_steps": 5560, "total_steps": 15621, "loss": 0.4143, "lr": 1.6270774148679054e-06, "epoch": 0.35593111836630176, "percentage": 35.59, "elapsed_time": "0:21:56", "remaining_time": "0:39:42", "throughput": 13313.76, "total_tokens": 17529024} +{"current_steps": 5565, "total_steps": 15621, "loss": 0.3764, "lr": 1.6262066418157048e-06, "epoch": 0.35625120030727864, "percentage": 35.63, "elapsed_time": "0:21:57", "remaining_time": "0:39:40", "throughput": 13319.12, "total_tokens": 17543936} +{"current_steps": 5570, "total_steps": 15621, "loss": 0.5374, "lr": 1.6253350869371595e-06, "epoch": 0.35657128224825557, "percentage": 35.66, "elapsed_time": "0:21:57", "remaining_time": "0:39:37", "throughput": 13324.61, "total_tokens": 17559168} +{"current_steps": 5575, "total_steps": 15621, "loss": 0.3828, "lr": 1.6244627513204158e-06, "epoch": 0.35689136418923245, "percentage": 35.69, "elapsed_time": "0:21:58", "remaining_time": "0:39:35", "throughput": 13330.36, "total_tokens": 17574912} +{"current_steps": 5580, "total_steps": 15621, "loss": 0.4239, "lr": 1.6235896360545954e-06, "epoch": 0.35721144613020933, "percentage": 35.72, "elapsed_time": "0:21:59", "remaining_time": "0:39:33", "throughput": 13335.94, "total_tokens": 17590272} +{"current_steps": 5585, "total_steps": 15621, "loss": 0.4379, "lr": 1.622715742229792e-06, "epoch": 0.3575315280711862, "percentage": 35.75, "elapsed_time": "0:21:59", "remaining_time": "0:39:31", "throughput": 13341.64, "total_tokens": 17605952} +{"current_steps": 5590, "total_steps": 15621, "loss": 0.3813, "lr": 1.6218410709370734e-06, "epoch": 0.3578516100121631, "percentage": 35.79, "elapsed_time": "0:22:00", "remaining_time": "0:39:29", "throughput": 13347.1, "total_tokens": 17621120} +{"current_steps": 5595, "total_steps": 15621, "loss": 0.5629, "lr": 1.6209656232684768e-06, "epoch": 0.35817169195314, "percentage": 35.82, "elapsed_time": "0:22:00", "remaining_time": "0:39:26", "throughput": 13352.48, "total_tokens": 17636096} +{"current_steps": 5600, "total_steps": 15621, "loss": 0.4427, "lr": 1.620089400317008e-06, "epoch": 0.3584917738941169, "percentage": 35.85, "elapsed_time": "0:22:01", "remaining_time": "0:39:24", "throughput": 13358.59, "total_tokens": 17652672} +{"current_steps": 5605, "total_steps": 15621, "loss": 0.4875, "lr": 1.6192124031766425e-06, "epoch": 0.3588118558350938, "percentage": 35.88, "elapsed_time": "0:22:02", "remaining_time": "0:39:22", "throughput": 13364.02, "total_tokens": 17668032} +{"current_steps": 5610, "total_steps": 15621, "loss": 0.4474, "lr": 1.6183346329423213e-06, "epoch": 0.35913193777607066, "percentage": 35.91, "elapsed_time": "0:22:02", "remaining_time": "0:39:20", "throughput": 13369.47, "total_tokens": 17683264} +{"current_steps": 5615, "total_steps": 15621, "loss": 0.3642, "lr": 1.6174560907099508e-06, "epoch": 0.35945201971704754, "percentage": 35.95, "elapsed_time": "0:22:03", "remaining_time": "0:39:18", "throughput": 13375.25, "total_tokens": 17699200} +{"current_steps": 5620, "total_steps": 15621, "loss": 0.3489, "lr": 1.6165767775764013e-06, "epoch": 0.3597721016580245, "percentage": 35.98, "elapsed_time": "0:22:03", "remaining_time": "0:39:15", "throughput": 13380.89, "total_tokens": 17714816} +{"current_steps": 5625, "total_steps": 15621, "loss": 0.411, "lr": 1.6156966946395056e-06, "epoch": 0.36009218359900136, "percentage": 36.01, "elapsed_time": "0:22:04", "remaining_time": "0:39:13", "throughput": 13387.41, "total_tokens": 17732352} +{"current_steps": 5630, "total_steps": 15621, "loss": 0.5376, "lr": 1.6148158429980577e-06, "epoch": 0.36041226553997824, "percentage": 36.04, "elapsed_time": "0:22:05", "remaining_time": "0:39:11", "throughput": 13393.19, "total_tokens": 17748288} +{"current_steps": 5635, "total_steps": 15621, "loss": 0.3839, "lr": 1.6139342237518108e-06, "epoch": 0.3607323474809551, "percentage": 36.07, "elapsed_time": "0:22:05", "remaining_time": "0:39:09", "throughput": 13398.56, "total_tokens": 17763520} +{"current_steps": 5640, "total_steps": 15621, "loss": 0.428, "lr": 1.6130518380014773e-06, "epoch": 0.361052429421932, "percentage": 36.11, "elapsed_time": "0:22:06", "remaining_time": "0:39:07", "throughput": 13404.26, "total_tokens": 17779328} +{"current_steps": 5645, "total_steps": 15621, "loss": 0.4178, "lr": 1.6121686868487259e-06, "epoch": 0.3613725113629089, "percentage": 36.14, "elapsed_time": "0:22:07", "remaining_time": "0:39:05", "throughput": 13410.15, "total_tokens": 17795584} +{"current_steps": 5650, "total_steps": 15621, "loss": 0.44, "lr": 1.6112847713961815e-06, "epoch": 0.3616925933038858, "percentage": 36.17, "elapsed_time": "0:22:07", "remaining_time": "0:39:02", "throughput": 13415.32, "total_tokens": 17810368} +{"current_steps": 5655, "total_steps": 15621, "loss": 0.4283, "lr": 1.610400092747423e-06, "epoch": 0.3620126752448627, "percentage": 36.2, "elapsed_time": "0:22:08", "remaining_time": "0:39:00", "throughput": 13421.13, "total_tokens": 17826496} +{"current_steps": 5660, "total_steps": 15621, "loss": 0.4191, "lr": 1.609514652006981e-06, "epoch": 0.36233275718583957, "percentage": 36.23, "elapsed_time": "0:22:08", "remaining_time": "0:38:58", "throughput": 13426.35, "total_tokens": 17841344} +{"current_steps": 5665, "total_steps": 15621, "loss": 0.5596, "lr": 1.60862845028034e-06, "epoch": 0.36265283912681645, "percentage": 36.27, "elapsed_time": "0:22:09", "remaining_time": "0:38:56", "throughput": 13432.08, "total_tokens": 17857408} +{"current_steps": 5670, "total_steps": 15621, "loss": 0.4256, "lr": 1.6077414886739327e-06, "epoch": 0.36297292106779333, "percentage": 36.3, "elapsed_time": "0:22:10", "remaining_time": "0:38:54", "throughput": 13437.77, "total_tokens": 17873280} +{"current_steps": 5675, "total_steps": 15621, "loss": 0.4936, "lr": 1.6068537682951412e-06, "epoch": 0.36329300300877027, "percentage": 36.33, "elapsed_time": "0:22:10", "remaining_time": "0:38:52", "throughput": 13443.17, "total_tokens": 17888448} +{"current_steps": 5680, "total_steps": 15621, "loss": 0.4402, "lr": 1.6059652902522947e-06, "epoch": 0.36361308494974715, "percentage": 36.36, "elapsed_time": "0:22:11", "remaining_time": "0:38:49", "throughput": 13448.82, "total_tokens": 17904320} +{"current_steps": 5685, "total_steps": 15621, "loss": 0.3667, "lr": 1.6050760556546683e-06, "epoch": 0.363933166890724, "percentage": 36.39, "elapsed_time": "0:22:11", "remaining_time": "0:38:47", "throughput": 13454.29, "total_tokens": 17919744} +{"current_steps": 5690, "total_steps": 15621, "loss": 0.3814, "lr": 1.6041860656124823e-06, "epoch": 0.3642532488317009, "percentage": 36.43, "elapsed_time": "0:22:12", "remaining_time": "0:38:45", "throughput": 13459.61, "total_tokens": 17934656} +{"current_steps": 5695, "total_steps": 15621, "loss": 0.5375, "lr": 1.6032953212368993e-06, "epoch": 0.3645733307726778, "percentage": 36.46, "elapsed_time": "0:22:13", "remaining_time": "0:38:43", "throughput": 13465.49, "total_tokens": 17950976} +{"current_steps": 5700, "total_steps": 15621, "loss": 0.4688, "lr": 1.6024038236400243e-06, "epoch": 0.3648934127136547, "percentage": 36.49, "elapsed_time": "0:22:13", "remaining_time": "0:38:41", "throughput": 13470.95, "total_tokens": 17966400} +{"current_steps": 5705, "total_steps": 15621, "loss": 0.5649, "lr": 1.6015115739349027e-06, "epoch": 0.3652134946546316, "percentage": 36.52, "elapsed_time": "0:22:14", "remaining_time": "0:38:39", "throughput": 13477.3, "total_tokens": 17983872} +{"current_steps": 5710, "total_steps": 15621, "loss": 0.5461, "lr": 1.6006185732355183e-06, "epoch": 0.3655335765956085, "percentage": 36.55, "elapsed_time": "0:22:14", "remaining_time": "0:38:37", "throughput": 13482.99, "total_tokens": 17999680} +{"current_steps": 5715, "total_steps": 15621, "loss": 0.3802, "lr": 1.5997248226567931e-06, "epoch": 0.36585365853658536, "percentage": 36.59, "elapsed_time": "0:22:15", "remaining_time": "0:38:35", "throughput": 13488.27, "total_tokens": 18014784} +{"current_steps": 5720, "total_steps": 15621, "loss": 0.4997, "lr": 1.5988303233145853e-06, "epoch": 0.36617374047756224, "percentage": 36.62, "elapsed_time": "0:22:16", "remaining_time": "0:38:32", "throughput": 13493.54, "total_tokens": 18029888} +{"current_steps": 5725, "total_steps": 15621, "loss": 0.3877, "lr": 1.597935076325688e-06, "epoch": 0.3664938224185392, "percentage": 36.65, "elapsed_time": "0:22:16", "remaining_time": "0:38:30", "throughput": 13499.09, "total_tokens": 18045632} +{"current_steps": 5730, "total_steps": 15621, "loss": 0.5839, "lr": 1.5970390828078272e-06, "epoch": 0.36681390435951605, "percentage": 36.68, "elapsed_time": "0:22:17", "remaining_time": "0:38:28", "throughput": 13504.44, "total_tokens": 18060928} +{"current_steps": 5735, "total_steps": 15621, "loss": 0.4567, "lr": 1.5961423438796615e-06, "epoch": 0.36713398630049293, "percentage": 36.71, "elapsed_time": "0:22:18", "remaining_time": "0:38:26", "throughput": 13509.85, "total_tokens": 18076352} +{"current_steps": 5740, "total_steps": 15621, "loss": 0.4411, "lr": 1.59524486066078e-06, "epoch": 0.3674540682414698, "percentage": 36.75, "elapsed_time": "0:22:18", "remaining_time": "0:38:24", "throughput": 13515.42, "total_tokens": 18092096} +{"current_steps": 5745, "total_steps": 15621, "loss": 0.5834, "lr": 1.5943466342717012e-06, "epoch": 0.3677741501824467, "percentage": 36.78, "elapsed_time": "0:22:19", "remaining_time": "0:38:22", "throughput": 13520.92, "total_tokens": 18107648} +{"current_steps": 5750, "total_steps": 15621, "loss": 0.4433, "lr": 1.5934476658338708e-06, "epoch": 0.36809423212342357, "percentage": 36.81, "elapsed_time": "0:22:19", "remaining_time": "0:38:20", "throughput": 13526.43, "total_tokens": 18123264} +{"current_steps": 5755, "total_steps": 15621, "loss": 0.5414, "lr": 1.5925479564696619e-06, "epoch": 0.3684143140644005, "percentage": 36.84, "elapsed_time": "0:22:20", "remaining_time": "0:38:17", "throughput": 13531.73, "total_tokens": 18138368} +{"current_steps": 5760, "total_steps": 15621, "loss": 0.3336, "lr": 1.5916475073023721e-06, "epoch": 0.3687343960053774, "percentage": 36.87, "elapsed_time": "0:22:21", "remaining_time": "0:38:15", "throughput": 13537.4, "total_tokens": 18154432} +{"current_steps": 5765, "total_steps": 15621, "loss": 0.3355, "lr": 1.5907463194562226e-06, "epoch": 0.36905447794635426, "percentage": 36.91, "elapsed_time": "0:22:21", "remaining_time": "0:38:13", "throughput": 13543.38, "total_tokens": 18171200} +{"current_steps": 5770, "total_steps": 15621, "loss": 0.3807, "lr": 1.589844394056357e-06, "epoch": 0.36937455988733114, "percentage": 36.94, "elapsed_time": "0:22:22", "remaining_time": "0:38:11", "throughput": 13549.05, "total_tokens": 18187008} +{"current_steps": 5775, "total_steps": 15621, "loss": 0.3492, "lr": 1.5889417322288403e-06, "epoch": 0.369694641828308, "percentage": 36.97, "elapsed_time": "0:22:22", "remaining_time": "0:38:09", "throughput": 13554.63, "total_tokens": 18202944} +{"current_steps": 5780, "total_steps": 15621, "loss": 0.4969, "lr": 1.5880383351006556e-06, "epoch": 0.37001472376928496, "percentage": 37.0, "elapsed_time": "0:22:23", "remaining_time": "0:38:07", "throughput": 13559.82, "total_tokens": 18217984} +{"current_steps": 5785, "total_steps": 15621, "loss": 0.505, "lr": 1.5871342037997055e-06, "epoch": 0.37033480571026184, "percentage": 37.03, "elapsed_time": "0:22:24", "remaining_time": "0:38:05", "throughput": 13565.38, "total_tokens": 18233984} +{"current_steps": 5790, "total_steps": 15621, "loss": 0.403, "lr": 1.5862293394548082e-06, "epoch": 0.3706548876512387, "percentage": 37.07, "elapsed_time": "0:22:24", "remaining_time": "0:38:03", "throughput": 13570.53, "total_tokens": 18249024} +{"current_steps": 5795, "total_steps": 15621, "loss": 0.3414, "lr": 1.5853237431956972e-06, "epoch": 0.3709749695922156, "percentage": 37.1, "elapsed_time": "0:22:25", "remaining_time": "0:38:01", "throughput": 13575.74, "total_tokens": 18264256} +{"current_steps": 5800, "total_steps": 15621, "loss": 0.5495, "lr": 1.5844174161530206e-06, "epoch": 0.3712950515331925, "percentage": 37.13, "elapsed_time": "0:22:25", "remaining_time": "0:37:59", "throughput": 13581.14, "total_tokens": 18279936} +{"current_steps": 5805, "total_steps": 15621, "loss": 0.4039, "lr": 1.5835103594583382e-06, "epoch": 0.3716151334741694, "percentage": 37.16, "elapsed_time": "0:22:26", "remaining_time": "0:37:57", "throughput": 13586.61, "total_tokens": 18295488} +{"current_steps": 5810, "total_steps": 15621, "loss": 0.5329, "lr": 1.5826025742441207e-06, "epoch": 0.3719352154151463, "percentage": 37.19, "elapsed_time": "0:22:27", "remaining_time": "0:37:54", "throughput": 13592.13, "total_tokens": 18311360} +{"current_steps": 5815, "total_steps": 15621, "loss": 0.4284, "lr": 1.5816940616437486e-06, "epoch": 0.37225529735612317, "percentage": 37.23, "elapsed_time": "0:22:27", "remaining_time": "0:37:52", "throughput": 13597.37, "total_tokens": 18326592} +{"current_steps": 5820, "total_steps": 15621, "loss": 0.3573, "lr": 1.5807848227915108e-06, "epoch": 0.37257537929710005, "percentage": 37.26, "elapsed_time": "0:22:28", "remaining_time": "0:37:50", "throughput": 13603.54, "total_tokens": 18344000} +{"current_steps": 5825, "total_steps": 15621, "loss": 0.4787, "lr": 1.5798748588226028e-06, "epoch": 0.37289546123807693, "percentage": 37.29, "elapsed_time": "0:22:29", "remaining_time": "0:37:48", "throughput": 13609.11, "total_tokens": 18359872} +{"current_steps": 5830, "total_steps": 15621, "loss": 0.4776, "lr": 1.578964170873125e-06, "epoch": 0.3732155431790538, "percentage": 37.32, "elapsed_time": "0:22:29", "remaining_time": "0:37:46", "throughput": 13614.0, "total_tokens": 18374400} +{"current_steps": 5835, "total_steps": 15621, "loss": 0.2927, "lr": 1.5780527600800816e-06, "epoch": 0.37353562512003075, "percentage": 37.35, "elapsed_time": "0:22:30", "remaining_time": "0:37:44", "throughput": 13619.69, "total_tokens": 18390656} +{"current_steps": 5840, "total_steps": 15621, "loss": 0.4476, "lr": 1.5771406275813808e-06, "epoch": 0.3738557070610076, "percentage": 37.39, "elapsed_time": "0:22:30", "remaining_time": "0:37:42", "throughput": 13625.06, "total_tokens": 18406400} +{"current_steps": 5845, "total_steps": 15621, "loss": 0.5497, "lr": 1.5762277745158297e-06, "epoch": 0.3741757890019845, "percentage": 37.42, "elapsed_time": "0:22:31", "remaining_time": "0:37:40", "throughput": 13630.83, "total_tokens": 18422848} +{"current_steps": 5850, "total_steps": 15621, "loss": 0.4932, "lr": 1.5753142020231365e-06, "epoch": 0.3744958709429614, "percentage": 37.45, "elapsed_time": "0:22:32", "remaining_time": "0:37:38", "throughput": 13636.43, "total_tokens": 18438912} +{"current_steps": 5855, "total_steps": 15621, "loss": 0.525, "lr": 1.5743999112439073e-06, "epoch": 0.37481595288393826, "percentage": 37.48, "elapsed_time": "0:22:32", "remaining_time": "0:37:36", "throughput": 13642.24, "total_tokens": 18455488} +{"current_steps": 5860, "total_steps": 15621, "loss": 0.3954, "lr": 1.5734849033196446e-06, "epoch": 0.3751360348249152, "percentage": 37.51, "elapsed_time": "0:22:33", "remaining_time": "0:37:34", "throughput": 13647.11, "total_tokens": 18470080} +{"current_steps": 5865, "total_steps": 15621, "loss": 0.4337, "lr": 1.5725691793927468e-06, "epoch": 0.3754561167658921, "percentage": 37.55, "elapsed_time": "0:22:33", "remaining_time": "0:37:32", "throughput": 13651.94, "total_tokens": 18484480} +{"current_steps": 5870, "total_steps": 15621, "loss": 0.46, "lr": 1.5716527406065057e-06, "epoch": 0.37577619870686896, "percentage": 37.58, "elapsed_time": "0:22:34", "remaining_time": "0:37:30", "throughput": 13657.91, "total_tokens": 18501312} +{"current_steps": 5875, "total_steps": 15621, "loss": 0.449, "lr": 1.570735588105106e-06, "epoch": 0.37609628064784584, "percentage": 37.61, "elapsed_time": "0:22:35", "remaining_time": "0:37:28", "throughput": 13662.88, "total_tokens": 18515968} +{"current_steps": 5880, "total_steps": 15621, "loss": 0.3901, "lr": 1.5698177230336234e-06, "epoch": 0.3764163625888227, "percentage": 37.64, "elapsed_time": "0:22:35", "remaining_time": "0:37:26", "throughput": 13668.04, "total_tokens": 18531200} +{"current_steps": 5885, "total_steps": 15621, "loss": 0.2699, "lr": 1.568899146538023e-06, "epoch": 0.37673644452979965, "percentage": 37.67, "elapsed_time": "0:22:36", "remaining_time": "0:37:24", "throughput": 13673.78, "total_tokens": 18547712} +{"current_steps": 5890, "total_steps": 15621, "loss": 0.4111, "lr": 1.5679798597651587e-06, "epoch": 0.37705652647077653, "percentage": 37.71, "elapsed_time": "0:22:37", "remaining_time": "0:37:21", "throughput": 13678.87, "total_tokens": 18562752} +{"current_steps": 5895, "total_steps": 15621, "loss": 0.4265, "lr": 1.5670598638627706e-06, "epoch": 0.3773766084117534, "percentage": 37.74, "elapsed_time": "0:22:37", "remaining_time": "0:37:19", "throughput": 13684.17, "total_tokens": 18578368} +{"current_steps": 5900, "total_steps": 15621, "loss": 0.3882, "lr": 1.5661391599794847e-06, "epoch": 0.3776966903527303, "percentage": 37.77, "elapsed_time": "0:22:38", "remaining_time": "0:37:17", "throughput": 13689.26, "total_tokens": 18593408} +{"current_steps": 5905, "total_steps": 15621, "loss": 0.4155, "lr": 1.56521774926481e-06, "epoch": 0.37801677229370717, "percentage": 37.8, "elapsed_time": "0:22:38", "remaining_time": "0:37:15", "throughput": 13694.14, "total_tokens": 18607872} +{"current_steps": 5910, "total_steps": 15621, "loss": 0.359, "lr": 1.5642956328691393e-06, "epoch": 0.3783368542346841, "percentage": 37.83, "elapsed_time": "0:22:39", "remaining_time": "0:37:14", "throughput": 13697.15, "total_tokens": 18624000} +{"current_steps": 5915, "total_steps": 15621, "loss": 0.564, "lr": 1.5633728119437451e-06, "epoch": 0.378656936175661, "percentage": 37.87, "elapsed_time": "0:22:40", "remaining_time": "0:37:12", "throughput": 13703.06, "total_tokens": 18640704} +{"current_steps": 5920, "total_steps": 15621, "loss": 0.4568, "lr": 1.5624492876407807e-06, "epoch": 0.37897701811663786, "percentage": 37.9, "elapsed_time": "0:22:40", "remaining_time": "0:37:10", "throughput": 13709.36, "total_tokens": 18658368} +{"current_steps": 5925, "total_steps": 15621, "loss": 0.4087, "lr": 1.5615250611132766e-06, "epoch": 0.37929710005761474, "percentage": 37.93, "elapsed_time": "0:22:41", "remaining_time": "0:37:08", "throughput": 13715.44, "total_tokens": 18675584} +{"current_steps": 5930, "total_steps": 15621, "loss": 0.5669, "lr": 1.5606001335151405e-06, "epoch": 0.3796171819985916, "percentage": 37.96, "elapsed_time": "0:22:42", "remaining_time": "0:37:06", "throughput": 13721.08, "total_tokens": 18691904} +{"current_steps": 5935, "total_steps": 15621, "loss": 0.3744, "lr": 1.5596745060011561e-06, "epoch": 0.3799372639395685, "percentage": 37.99, "elapsed_time": "0:22:42", "remaining_time": "0:37:04", "throughput": 13726.95, "total_tokens": 18708736} +{"current_steps": 5940, "total_steps": 15621, "loss": 0.3464, "lr": 1.5587481797269793e-06, "epoch": 0.38025734588054544, "percentage": 38.03, "elapsed_time": "0:22:43", "remaining_time": "0:37:02", "throughput": 13732.07, "total_tokens": 18724032} +{"current_steps": 5945, "total_steps": 15621, "loss": 0.4203, "lr": 1.5578211558491396e-06, "epoch": 0.3805774278215223, "percentage": 38.06, "elapsed_time": "0:22:44", "remaining_time": "0:37:00", "throughput": 13737.68, "total_tokens": 18740352} +{"current_steps": 5950, "total_steps": 15621, "loss": 0.3225, "lr": 1.5568934355250375e-06, "epoch": 0.3808975097624992, "percentage": 38.09, "elapsed_time": "0:22:44", "remaining_time": "0:36:58", "throughput": 13742.38, "total_tokens": 18754560} +{"current_steps": 5955, "total_steps": 15621, "loss": 0.6491, "lr": 1.5559650199129423e-06, "epoch": 0.3812175917034761, "percentage": 38.12, "elapsed_time": "0:22:45", "remaining_time": "0:36:56", "throughput": 13747.34, "total_tokens": 18769280} +{"current_steps": 5960, "total_steps": 15621, "loss": 0.4012, "lr": 1.5550359101719921e-06, "epoch": 0.38153767364445296, "percentage": 38.15, "elapsed_time": "0:22:45", "remaining_time": "0:36:54", "throughput": 13752.48, "total_tokens": 18784512} +{"current_steps": 5965, "total_steps": 15621, "loss": 0.3561, "lr": 1.554106107462191e-06, "epoch": 0.3818577555854299, "percentage": 38.19, "elapsed_time": "0:22:46", "remaining_time": "0:36:52", "throughput": 13757.88, "total_tokens": 18800384} +{"current_steps": 5970, "total_steps": 15621, "loss": 0.4248, "lr": 1.5531756129444092e-06, "epoch": 0.38217783752640677, "percentage": 38.22, "elapsed_time": "0:22:47", "remaining_time": "0:36:50", "throughput": 13763.03, "total_tokens": 18815552} +{"current_steps": 5975, "total_steps": 15621, "loss": 0.3884, "lr": 1.5522444277803796e-06, "epoch": 0.38249791946738365, "percentage": 38.25, "elapsed_time": "0:22:47", "remaining_time": "0:36:47", "throughput": 13767.86, "total_tokens": 18830080} +{"current_steps": 5980, "total_steps": 15621, "loss": 0.4319, "lr": 1.5513125531326976e-06, "epoch": 0.38281800140836053, "percentage": 38.28, "elapsed_time": "0:22:48", "remaining_time": "0:36:46", "throughput": 13773.37, "total_tokens": 18846272} +{"current_steps": 5985, "total_steps": 15621, "loss": 0.3747, "lr": 1.5503799901648198e-06, "epoch": 0.3831380833493374, "percentage": 38.31, "elapsed_time": "0:22:48", "remaining_time": "0:36:43", "throughput": 13778.2, "total_tokens": 18860928} +{"current_steps": 5990, "total_steps": 15621, "loss": 0.4553, "lr": 1.5494467400410625e-06, "epoch": 0.38345816529031435, "percentage": 38.35, "elapsed_time": "0:22:49", "remaining_time": "0:36:41", "throughput": 13783.63, "total_tokens": 18877120} +{"current_steps": 5995, "total_steps": 15621, "loss": 0.6017, "lr": 1.5485128039265986e-06, "epoch": 0.3837782472312912, "percentage": 38.38, "elapsed_time": "0:22:50", "remaining_time": "0:36:39", "throughput": 13788.7, "total_tokens": 18892224} +{"current_steps": 6000, "total_steps": 15621, "loss": 0.4408, "lr": 1.547578182987459e-06, "epoch": 0.3840983291722681, "percentage": 38.41, "elapsed_time": "0:22:50", "remaining_time": "0:36:37", "throughput": 13793.59, "total_tokens": 18907008} +{"current_steps": 6005, "total_steps": 15621, "loss": 0.2736, "lr": 1.5466428783905286e-06, "epoch": 0.384418411113245, "percentage": 38.44, "elapsed_time": "0:22:51", "remaining_time": "0:36:35", "throughput": 13798.71, "total_tokens": 18922368} +{"current_steps": 6010, "total_steps": 15621, "loss": 0.4288, "lr": 1.5457068913035463e-06, "epoch": 0.38473849305422186, "percentage": 38.47, "elapsed_time": "0:22:51", "remaining_time": "0:36:33", "throughput": 13803.76, "total_tokens": 18937536} +{"current_steps": 6015, "total_steps": 15621, "loss": 0.4784, "lr": 1.544770222895103e-06, "epoch": 0.38505857499519874, "percentage": 38.51, "elapsed_time": "0:22:52", "remaining_time": "0:36:31", "throughput": 13809.42, "total_tokens": 18954048} +{"current_steps": 6020, "total_steps": 15621, "loss": 0.5188, "lr": 1.5438328743346398e-06, "epoch": 0.3853786569361757, "percentage": 38.54, "elapsed_time": "0:22:53", "remaining_time": "0:36:29", "throughput": 13814.55, "total_tokens": 18969472} +{"current_steps": 6025, "total_steps": 15621, "loss": 0.4098, "lr": 1.5428948467924478e-06, "epoch": 0.38569873887715256, "percentage": 38.57, "elapsed_time": "0:22:53", "remaining_time": "0:36:27", "throughput": 13819.28, "total_tokens": 18983872} +{"current_steps": 6030, "total_steps": 15621, "loss": 0.3223, "lr": 1.5419561414396656e-06, "epoch": 0.38601882081812944, "percentage": 38.6, "elapsed_time": "0:22:54", "remaining_time": "0:36:25", "throughput": 13824.4, "total_tokens": 18999360} +{"current_steps": 6035, "total_steps": 15621, "loss": 0.4888, "lr": 1.541016759448277e-06, "epoch": 0.3863389027591063, "percentage": 38.63, "elapsed_time": "0:22:54", "remaining_time": "0:36:23", "throughput": 13829.77, "total_tokens": 19015424} +{"current_steps": 6040, "total_steps": 15621, "loss": 0.3641, "lr": 1.5400767019911124e-06, "epoch": 0.3866589847000832, "percentage": 38.67, "elapsed_time": "0:22:55", "remaining_time": "0:36:22", "throughput": 13835.19, "total_tokens": 19031616} +{"current_steps": 6045, "total_steps": 15621, "loss": 0.4821, "lr": 1.539135970241844e-06, "epoch": 0.38697906664106013, "percentage": 38.7, "elapsed_time": "0:22:56", "remaining_time": "0:36:20", "throughput": 13840.31, "total_tokens": 19047040} +{"current_steps": 6050, "total_steps": 15621, "loss": 0.479, "lr": 1.5381945653749866e-06, "epoch": 0.387299148582037, "percentage": 38.73, "elapsed_time": "0:22:56", "remaining_time": "0:36:18", "throughput": 13845.62, "total_tokens": 19062848} +{"current_steps": 6055, "total_steps": 15621, "loss": 0.5564, "lr": 1.5372524885658952e-06, "epoch": 0.3876192305230139, "percentage": 38.76, "elapsed_time": "0:22:57", "remaining_time": "0:36:16", "throughput": 13851.1, "total_tokens": 19078976} +{"current_steps": 6060, "total_steps": 15621, "loss": 0.3676, "lr": 1.5363097409907638e-06, "epoch": 0.38793931246399077, "percentage": 38.79, "elapsed_time": "0:22:58", "remaining_time": "0:36:14", "throughput": 13855.9, "total_tokens": 19093632} +{"current_steps": 6065, "total_steps": 15621, "loss": 0.3605, "lr": 1.535366323826624e-06, "epoch": 0.38825939440496765, "percentage": 38.83, "elapsed_time": "0:22:58", "remaining_time": "0:36:12", "throughput": 13861.0, "total_tokens": 19109056} +{"current_steps": 6070, "total_steps": 15621, "loss": 0.3699, "lr": 1.534422238251343e-06, "epoch": 0.3885794763459446, "percentage": 38.86, "elapsed_time": "0:22:59", "remaining_time": "0:36:10", "throughput": 13866.09, "total_tokens": 19124544} +{"current_steps": 6075, "total_steps": 15621, "loss": 0.3834, "lr": 1.5334774854436223e-06, "epoch": 0.38889955828692147, "percentage": 38.89, "elapsed_time": "0:22:59", "remaining_time": "0:36:08", "throughput": 13871.39, "total_tokens": 19140480} +{"current_steps": 6080, "total_steps": 15621, "loss": 0.3776, "lr": 1.5325320665829975e-06, "epoch": 0.38921964022789834, "percentage": 38.92, "elapsed_time": "0:23:00", "remaining_time": "0:36:06", "throughput": 13876.8, "total_tokens": 19156736} +{"current_steps": 6085, "total_steps": 15621, "loss": 0.4455, "lr": 1.5315859828498352e-06, "epoch": 0.3895397221688752, "percentage": 38.95, "elapsed_time": "0:23:01", "remaining_time": "0:36:04", "throughput": 13881.61, "total_tokens": 19171520} +{"current_steps": 6090, "total_steps": 15621, "loss": 0.4921, "lr": 1.5306392354253316e-06, "epoch": 0.3898598041098521, "percentage": 38.99, "elapsed_time": "0:23:01", "remaining_time": "0:36:02", "throughput": 13886.81, "total_tokens": 19187136} +{"current_steps": 6095, "total_steps": 15621, "loss": 0.4377, "lr": 1.5296918254915123e-06, "epoch": 0.39017988605082904, "percentage": 39.02, "elapsed_time": "0:23:02", "remaining_time": "0:36:00", "throughput": 13891.6, "total_tokens": 19201856} +{"current_steps": 6100, "total_steps": 15621, "loss": 0.3869, "lr": 1.5287437542312296e-06, "epoch": 0.3904999679918059, "percentage": 39.05, "elapsed_time": "0:23:02", "remaining_time": "0:35:58", "throughput": 13896.37, "total_tokens": 19216704} +{"current_steps": 6105, "total_steps": 15621, "loss": 0.5316, "lr": 1.5277950228281614e-06, "epoch": 0.3908200499327828, "percentage": 39.08, "elapsed_time": "0:23:03", "remaining_time": "0:35:56", "throughput": 13902.1, "total_tokens": 19233408} +{"current_steps": 6110, "total_steps": 15621, "loss": 0.354, "lr": 1.52684563246681e-06, "epoch": 0.3911401318737597, "percentage": 39.11, "elapsed_time": "0:23:04", "remaining_time": "0:35:54", "throughput": 13907.76, "total_tokens": 19250048} +{"current_steps": 6115, "total_steps": 15621, "loss": 0.4243, "lr": 1.5258955843325015e-06, "epoch": 0.39146021381473656, "percentage": 39.15, "elapsed_time": "0:23:04", "remaining_time": "0:35:52", "throughput": 13913.33, "total_tokens": 19266560} +{"current_steps": 6120, "total_steps": 15621, "loss": 0.4885, "lr": 1.5249448796113804e-06, "epoch": 0.39178029575571344, "percentage": 39.18, "elapsed_time": "0:23:05", "remaining_time": "0:35:50", "throughput": 13918.15, "total_tokens": 19281408} +{"current_steps": 6125, "total_steps": 15621, "loss": 0.4747, "lr": 1.5239935194904141e-06, "epoch": 0.39210037769669037, "percentage": 39.21, "elapsed_time": "0:23:05", "remaining_time": "0:35:48", "throughput": 13923.01, "total_tokens": 19296384} +{"current_steps": 6130, "total_steps": 15621, "loss": 0.3702, "lr": 1.523041505157386e-06, "epoch": 0.39242045963766725, "percentage": 39.24, "elapsed_time": "0:23:06", "remaining_time": "0:35:46", "throughput": 13928.12, "total_tokens": 19312000} +{"current_steps": 6135, "total_steps": 15621, "loss": 0.3909, "lr": 1.5220888378008977e-06, "epoch": 0.39274054157864413, "percentage": 39.27, "elapsed_time": "0:23:07", "remaining_time": "0:35:44", "throughput": 13933.2, "total_tokens": 19327488} +{"current_steps": 6140, "total_steps": 15621, "loss": 0.4661, "lr": 1.5211355186103654e-06, "epoch": 0.393060623519621, "percentage": 39.31, "elapsed_time": "0:23:07", "remaining_time": "0:35:42", "throughput": 13937.92, "total_tokens": 19342080} +{"current_steps": 6145, "total_steps": 15621, "loss": 0.4126, "lr": 1.5201815487760192e-06, "epoch": 0.3933807054605979, "percentage": 39.34, "elapsed_time": "0:23:08", "remaining_time": "0:35:40", "throughput": 13943.31, "total_tokens": 19358336} +{"current_steps": 6150, "total_steps": 15621, "loss": 0.508, "lr": 1.5192269294889019e-06, "epoch": 0.3937007874015748, "percentage": 39.37, "elapsed_time": "0:23:08", "remaining_time": "0:35:38", "throughput": 13948.21, "total_tokens": 19373376} +{"current_steps": 6155, "total_steps": 15621, "loss": 0.4029, "lr": 1.5182716619408666e-06, "epoch": 0.3940208693425517, "percentage": 39.4, "elapsed_time": "0:23:09", "remaining_time": "0:35:37", "throughput": 13953.16, "total_tokens": 19388608} +{"current_steps": 6160, "total_steps": 15621, "loss": 0.5398, "lr": 1.5173157473245764e-06, "epoch": 0.3943409512835286, "percentage": 39.43, "elapsed_time": "0:23:10", "remaining_time": "0:35:35", "throughput": 13957.85, "total_tokens": 19403264} +{"current_steps": 6165, "total_steps": 15621, "loss": 0.4363, "lr": 1.5163591868335016e-06, "epoch": 0.39466103322450546, "percentage": 39.47, "elapsed_time": "0:23:10", "remaining_time": "0:35:33", "throughput": 13962.87, "total_tokens": 19418816} +{"current_steps": 6170, "total_steps": 15621, "loss": 0.5781, "lr": 1.515401981661919e-06, "epoch": 0.39498111516548234, "percentage": 39.5, "elapsed_time": "0:23:11", "remaining_time": "0:35:31", "throughput": 13968.37, "total_tokens": 19435392} +{"current_steps": 6175, "total_steps": 15621, "loss": 0.4592, "lr": 1.514444133004911e-06, "epoch": 0.3953011971064593, "percentage": 39.53, "elapsed_time": "0:23:11", "remaining_time": "0:35:29", "throughput": 13973.05, "total_tokens": 19450048} +{"current_steps": 6180, "total_steps": 15621, "loss": 0.4592, "lr": 1.5134856420583631e-06, "epoch": 0.39562127904743616, "percentage": 39.56, "elapsed_time": "0:23:12", "remaining_time": "0:35:27", "throughput": 13978.5, "total_tokens": 19466368} +{"current_steps": 6185, "total_steps": 15621, "loss": 0.3338, "lr": 1.5125265100189614e-06, "epoch": 0.39594136098841304, "percentage": 39.59, "elapsed_time": "0:23:13", "remaining_time": "0:35:25", "throughput": 13983.82, "total_tokens": 19482624} +{"current_steps": 6190, "total_steps": 15621, "loss": 0.5304, "lr": 1.5115667380841948e-06, "epoch": 0.3962614429293899, "percentage": 39.63, "elapsed_time": "0:23:13", "remaining_time": "0:35:23", "throughput": 13988.85, "total_tokens": 19498048} +{"current_steps": 6195, "total_steps": 15621, "loss": 0.43, "lr": 1.510606327452349e-06, "epoch": 0.3965815248703668, "percentage": 39.66, "elapsed_time": "0:23:14", "remaining_time": "0:35:21", "throughput": 13994.54, "total_tokens": 19515264} +{"current_steps": 6200, "total_steps": 15621, "loss": 0.4319, "lr": 1.5096452793225082e-06, "epoch": 0.3969016068113437, "percentage": 39.69, "elapsed_time": "0:23:15", "remaining_time": "0:35:19", "throughput": 14000.52, "total_tokens": 19533056} +{"current_steps": 6205, "total_steps": 15621, "loss": 0.4003, "lr": 1.5086835948945522e-06, "epoch": 0.3972216887523206, "percentage": 39.72, "elapsed_time": "0:23:15", "remaining_time": "0:35:18", "throughput": 14005.47, "total_tokens": 19548480} +{"current_steps": 6210, "total_steps": 15621, "loss": 0.3271, "lr": 1.5077212753691556e-06, "epoch": 0.3975417706932975, "percentage": 39.75, "elapsed_time": "0:23:16", "remaining_time": "0:35:16", "throughput": 14010.39, "total_tokens": 19563712} +{"current_steps": 6215, "total_steps": 15621, "loss": 0.4049, "lr": 1.5067583219477852e-06, "epoch": 0.39786185263427437, "percentage": 39.79, "elapsed_time": "0:23:16", "remaining_time": "0:35:14", "throughput": 14015.19, "total_tokens": 19578624} +{"current_steps": 6220, "total_steps": 15621, "loss": 0.3916, "lr": 1.5057947358327e-06, "epoch": 0.39818193457525125, "percentage": 39.82, "elapsed_time": "0:23:17", "remaining_time": "0:35:12", "throughput": 14019.87, "total_tokens": 19593408} +{"current_steps": 6225, "total_steps": 15621, "loss": 0.4907, "lr": 1.504830518226948e-06, "epoch": 0.39850201651622813, "percentage": 39.85, "elapsed_time": "0:23:18", "remaining_time": "0:35:10", "throughput": 14025.08, "total_tokens": 19609216} +{"current_steps": 6230, "total_steps": 15621, "loss": 0.449, "lr": 1.5038656703343672e-06, "epoch": 0.39882209845720507, "percentage": 39.88, "elapsed_time": "0:23:18", "remaining_time": "0:35:08", "throughput": 14030.08, "total_tokens": 19624896} +{"current_steps": 6235, "total_steps": 15621, "loss": 0.4925, "lr": 1.5029001933595805e-06, "epoch": 0.39914218039818194, "percentage": 39.91, "elapsed_time": "0:23:19", "remaining_time": "0:35:06", "throughput": 14035.0, "total_tokens": 19640128} +{"current_steps": 6240, "total_steps": 15621, "loss": 0.3433, "lr": 1.501934088507998e-06, "epoch": 0.3994622623391588, "percentage": 39.95, "elapsed_time": "0:23:19", "remaining_time": "0:35:04", "throughput": 14040.1, "total_tokens": 19655680} +{"current_steps": 6245, "total_steps": 15621, "loss": 0.6227, "lr": 1.5009673569858126e-06, "epoch": 0.3997823442801357, "percentage": 39.98, "elapsed_time": "0:23:20", "remaining_time": "0:35:02", "throughput": 14045.49, "total_tokens": 19672192} +{"current_steps": 6250, "total_steps": 15621, "loss": 0.5284, "lr": 1.5e-06, "epoch": 0.4001024262211126, "percentage": 40.01, "elapsed_time": "0:23:21", "remaining_time": "0:35:00", "throughput": 14050.98, "total_tokens": 19688896} +{"current_steps": 6255, "total_steps": 15621, "loss": 0.3547, "lr": 1.4990320187583167e-06, "epoch": 0.4004225081620895, "percentage": 40.04, "elapsed_time": "0:23:21", "remaining_time": "0:34:59", "throughput": 14055.89, "total_tokens": 19704128} +{"current_steps": 6256, "total_steps": 15621, "eval_loss": 0.42333245277404785, "epoch": 0.4004865245502849, "percentage": 40.05, "elapsed_time": "0:24:11", "remaining_time": "0:36:12", "throughput": 13580.8, "total_tokens": 19707456} +{"current_steps": 6260, "total_steps": 15621, "loss": 0.395, "lr": 1.4980634144692986e-06, "epoch": 0.4007425901030664, "percentage": 40.07, "elapsed_time": "0:24:53", "remaining_time": "0:37:13", "throughput": 13204.42, "total_tokens": 19719744} +{"current_steps": 6265, "total_steps": 15621, "loss": 0.3795, "lr": 1.4970941883422599e-06, "epoch": 0.4010626720440433, "percentage": 40.11, "elapsed_time": "0:24:54", "remaining_time": "0:37:11", "throughput": 13209.87, "total_tokens": 19736128} +{"current_steps": 6270, "total_steps": 15621, "loss": 0.4165, "lr": 1.4961243415872901e-06, "epoch": 0.40138275398502016, "percentage": 40.14, "elapsed_time": "0:24:54", "remaining_time": "0:37:09", "throughput": 13214.73, "total_tokens": 19751296} +{"current_steps": 6275, "total_steps": 15621, "loss": 0.4057, "lr": 1.4951538754152551e-06, "epoch": 0.40170283592599704, "percentage": 40.17, "elapsed_time": "0:24:55", "remaining_time": "0:37:06", "throughput": 13219.35, "total_tokens": 19765888} +{"current_steps": 6280, "total_steps": 15621, "loss": 0.4205, "lr": 1.4941827910377925e-06, "epoch": 0.402022917866974, "percentage": 40.2, "elapsed_time": "0:24:55", "remaining_time": "0:37:04", "throughput": 13224.09, "total_tokens": 19780864} +{"current_steps": 6285, "total_steps": 15621, "loss": 0.4014, "lr": 1.4932110896673131e-06, "epoch": 0.40234299980795085, "percentage": 40.23, "elapsed_time": "0:24:56", "remaining_time": "0:37:02", "throughput": 13229.33, "total_tokens": 19796864} +{"current_steps": 6290, "total_steps": 15621, "loss": 0.5395, "lr": 1.4922387725169973e-06, "epoch": 0.40266308174892773, "percentage": 40.27, "elapsed_time": "0:24:57", "remaining_time": "0:37:00", "throughput": 13234.16, "total_tokens": 19811904} +{"current_steps": 6295, "total_steps": 15621, "loss": 0.4049, "lr": 1.4912658408007947e-06, "epoch": 0.4029831636899046, "percentage": 40.3, "elapsed_time": "0:24:57", "remaining_time": "0:36:58", "throughput": 13239.16, "total_tokens": 19827456} +{"current_steps": 6300, "total_steps": 15621, "loss": 0.4269, "lr": 1.4902922957334215e-06, "epoch": 0.4033032456308815, "percentage": 40.33, "elapsed_time": "0:24:58", "remaining_time": "0:36:56", "throughput": 13243.97, "total_tokens": 19842496} +{"current_steps": 6305, "total_steps": 15621, "loss": 0.408, "lr": 1.4893181385303608e-06, "epoch": 0.40362332757185837, "percentage": 40.36, "elapsed_time": "0:24:58", "remaining_time": "0:36:54", "throughput": 13249.02, "total_tokens": 19858240} +{"current_steps": 6310, "total_steps": 15621, "loss": 0.3994, "lr": 1.4883433704078584e-06, "epoch": 0.4039434095128353, "percentage": 40.39, "elapsed_time": "0:24:59", "remaining_time": "0:36:52", "throughput": 13254.33, "total_tokens": 19874368} +{"current_steps": 6315, "total_steps": 15621, "loss": 0.3874, "lr": 1.4873679925829246e-06, "epoch": 0.4042634914538122, "percentage": 40.43, "elapsed_time": "0:25:00", "remaining_time": "0:36:50", "throughput": 13260.06, "total_tokens": 19891904} +{"current_steps": 6320, "total_steps": 15621, "loss": 0.4077, "lr": 1.4863920062733298e-06, "epoch": 0.40458357339478906, "percentage": 40.46, "elapsed_time": "0:25:00", "remaining_time": "0:36:48", "throughput": 13264.99, "total_tokens": 19907392} +{"current_steps": 6325, "total_steps": 15621, "loss": 0.3779, "lr": 1.485415412697604e-06, "epoch": 0.40490365533576594, "percentage": 40.49, "elapsed_time": "0:25:01", "remaining_time": "0:36:46", "throughput": 13269.9, "total_tokens": 19922624} +{"current_steps": 6330, "total_steps": 15621, "loss": 0.4348, "lr": 1.484438213075036e-06, "epoch": 0.4052237372767428, "percentage": 40.52, "elapsed_time": "0:25:01", "remaining_time": "0:36:44", "throughput": 13275.36, "total_tokens": 19939328} +{"current_steps": 6335, "total_steps": 15621, "loss": 0.4465, "lr": 1.4834604086256713e-06, "epoch": 0.40554381921771976, "percentage": 40.55, "elapsed_time": "0:25:02", "remaining_time": "0:36:42", "throughput": 13280.54, "total_tokens": 19955392} +{"current_steps": 6340, "total_steps": 15621, "loss": 0.3818, "lr": 1.4824820005703097e-06, "epoch": 0.40586390115869664, "percentage": 40.59, "elapsed_time": "0:25:03", "remaining_time": "0:36:40", "throughput": 13285.79, "total_tokens": 19971520} +{"current_steps": 6345, "total_steps": 15621, "loss": 0.46, "lr": 1.4815029901305061e-06, "epoch": 0.4061839830996735, "percentage": 40.62, "elapsed_time": "0:25:03", "remaining_time": "0:36:38", "throughput": 13291.3, "total_tokens": 19988352} +{"current_steps": 6350, "total_steps": 15621, "loss": 0.4748, "lr": 1.480523378528565e-06, "epoch": 0.4065040650406504, "percentage": 40.65, "elapsed_time": "0:25:04", "remaining_time": "0:36:36", "throughput": 13296.76, "total_tokens": 20005184} +{"current_steps": 6355, "total_steps": 15621, "loss": 0.4064, "lr": 1.4795431669875441e-06, "epoch": 0.4068241469816273, "percentage": 40.68, "elapsed_time": "0:25:05", "remaining_time": "0:36:34", "throughput": 13301.76, "total_tokens": 20020800} +{"current_steps": 6360, "total_steps": 15621, "loss": 0.472, "lr": 1.478562356731249e-06, "epoch": 0.4071442289226042, "percentage": 40.71, "elapsed_time": "0:25:05", "remaining_time": "0:36:32", "throughput": 13306.76, "total_tokens": 20036416} +{"current_steps": 6365, "total_steps": 15621, "loss": 0.4525, "lr": 1.4775809489842326e-06, "epoch": 0.4074643108635811, "percentage": 40.75, "elapsed_time": "0:25:06", "remaining_time": "0:36:30", "throughput": 13312.21, "total_tokens": 20053184} +{"current_steps": 6370, "total_steps": 15621, "loss": 0.3987, "lr": 1.4765989449717937e-06, "epoch": 0.40778439280455797, "percentage": 40.78, "elapsed_time": "0:25:07", "remaining_time": "0:36:28", "throughput": 13317.62, "total_tokens": 20069888} +{"current_steps": 6375, "total_steps": 15621, "loss": 0.5504, "lr": 1.4756163459199763e-06, "epoch": 0.40810447474553485, "percentage": 40.81, "elapsed_time": "0:25:07", "remaining_time": "0:36:26", "throughput": 13322.66, "total_tokens": 20085760} +{"current_steps": 6380, "total_steps": 15621, "loss": 0.2742, "lr": 1.4746331530555665e-06, "epoch": 0.40842455668651173, "percentage": 40.84, "elapsed_time": "0:25:08", "remaining_time": "0:36:24", "throughput": 13327.55, "total_tokens": 20101056} +{"current_steps": 6385, "total_steps": 15621, "loss": 0.4133, "lr": 1.4736493676060923e-06, "epoch": 0.4087446386274886, "percentage": 40.87, "elapsed_time": "0:25:08", "remaining_time": "0:36:22", "throughput": 13332.3, "total_tokens": 20116352} +{"current_steps": 6390, "total_steps": 15621, "loss": 0.3642, "lr": 1.4726649907998216e-06, "epoch": 0.40906472056846555, "percentage": 40.91, "elapsed_time": "0:25:09", "remaining_time": "0:36:20", "throughput": 13337.14, "total_tokens": 20131712} +{"current_steps": 6395, "total_steps": 15621, "loss": 0.3759, "lr": 1.4716800238657599e-06, "epoch": 0.4093848025094424, "percentage": 40.94, "elapsed_time": "0:25:10", "remaining_time": "0:36:18", "throughput": 13341.91, "total_tokens": 20146880} +{"current_steps": 6400, "total_steps": 15621, "loss": 0.2767, "lr": 1.4706944680336505e-06, "epoch": 0.4097048844504193, "percentage": 40.97, "elapsed_time": "0:25:10", "remaining_time": "0:36:16", "throughput": 13347.27, "total_tokens": 20163520} +{"current_steps": 6405, "total_steps": 15621, "loss": 0.4681, "lr": 1.469708324533971e-06, "epoch": 0.4100249663913962, "percentage": 41.0, "elapsed_time": "0:25:11", "remaining_time": "0:36:14", "throughput": 13351.72, "total_tokens": 20177984} +{"current_steps": 6410, "total_steps": 15621, "loss": 0.3395, "lr": 1.4687215945979335e-06, "epoch": 0.41034504833237306, "percentage": 41.03, "elapsed_time": "0:25:11", "remaining_time": "0:36:12", "throughput": 13356.63, "total_tokens": 20193472} +{"current_steps": 6415, "total_steps": 15621, "loss": 0.4507, "lr": 1.4677342794574815e-06, "epoch": 0.41066513027335, "percentage": 41.07, "elapsed_time": "0:25:12", "remaining_time": "0:36:10", "throughput": 13362.25, "total_tokens": 20210624} +{"current_steps": 6420, "total_steps": 15621, "loss": 0.4199, "lr": 1.4667463803452902e-06, "epoch": 0.4109852122143269, "percentage": 41.1, "elapsed_time": "0:25:13", "remaining_time": "0:36:08", "throughput": 13367.36, "total_tokens": 20226688} +{"current_steps": 6425, "total_steps": 15621, "loss": 0.4472, "lr": 1.4657578984947627e-06, "epoch": 0.41130529415530376, "percentage": 41.13, "elapsed_time": "0:25:13", "remaining_time": "0:36:06", "throughput": 13373.26, "total_tokens": 20244608} +{"current_steps": 6430, "total_steps": 15621, "loss": 0.3699, "lr": 1.4647688351400303e-06, "epoch": 0.41162537609628064, "percentage": 41.16, "elapsed_time": "0:25:14", "remaining_time": "0:36:04", "throughput": 13378.53, "total_tokens": 20261184} +{"current_steps": 6435, "total_steps": 15621, "loss": 0.3348, "lr": 1.46377919151595e-06, "epoch": 0.4119454580372575, "percentage": 41.19, "elapsed_time": "0:25:15", "remaining_time": "0:36:02", "throughput": 13383.37, "total_tokens": 20276736} +{"current_steps": 6440, "total_steps": 15621, "loss": 0.4651, "lr": 1.462788968858104e-06, "epoch": 0.41226553997823445, "percentage": 41.23, "elapsed_time": "0:25:15", "remaining_time": "0:36:00", "throughput": 13388.87, "total_tokens": 20293888} +{"current_steps": 6445, "total_steps": 15621, "loss": 0.482, "lr": 1.4617981684027966e-06, "epoch": 0.41258562191921133, "percentage": 41.26, "elapsed_time": "0:25:16", "remaining_time": "0:35:58", "throughput": 13393.83, "total_tokens": 20309696} +{"current_steps": 6450, "total_steps": 15621, "loss": 0.4013, "lr": 1.4608067913870536e-06, "epoch": 0.4129057038601882, "percentage": 41.29, "elapsed_time": "0:25:16", "remaining_time": "0:35:56", "throughput": 13398.85, "total_tokens": 20325632} +{"current_steps": 6455, "total_steps": 15621, "loss": 0.3968, "lr": 1.4598148390486213e-06, "epoch": 0.4132257858011651, "percentage": 41.32, "elapsed_time": "0:25:17", "remaining_time": "0:35:54", "throughput": 13404.0, "total_tokens": 20341888} +{"current_steps": 6460, "total_steps": 15621, "loss": 0.5073, "lr": 1.4588223126259639e-06, "epoch": 0.41354586774214197, "percentage": 41.35, "elapsed_time": "0:25:18", "remaining_time": "0:35:53", "throughput": 13409.39, "total_tokens": 20358656} +{"current_steps": 6465, "total_steps": 15621, "loss": 0.3245, "lr": 1.4578292133582615e-06, "epoch": 0.4138659496831189, "percentage": 41.39, "elapsed_time": "0:25:18", "remaining_time": "0:35:51", "throughput": 13413.69, "total_tokens": 20372864} +{"current_steps": 6470, "total_steps": 15621, "loss": 0.3954, "lr": 1.456835542485411e-06, "epoch": 0.4141860316240958, "percentage": 41.42, "elapsed_time": "0:25:19", "remaining_time": "0:35:49", "throughput": 13418.29, "total_tokens": 20387840} +{"current_steps": 6475, "total_steps": 15621, "loss": 0.4092, "lr": 1.4558413012480215e-06, "epoch": 0.41450611356507266, "percentage": 41.45, "elapsed_time": "0:25:20", "remaining_time": "0:35:47", "throughput": 13423.68, "total_tokens": 20404736} +{"current_steps": 6480, "total_steps": 15621, "loss": 0.5673, "lr": 1.4548464908874156e-06, "epoch": 0.41482619550604954, "percentage": 41.48, "elapsed_time": "0:25:20", "remaining_time": "0:35:45", "throughput": 13429.6, "total_tokens": 20422848} +{"current_steps": 6485, "total_steps": 15621, "loss": 0.3996, "lr": 1.4538511126456255e-06, "epoch": 0.4151462774470264, "percentage": 41.51, "elapsed_time": "0:25:21", "remaining_time": "0:35:43", "throughput": 13434.27, "total_tokens": 20438016} +{"current_steps": 6490, "total_steps": 15621, "loss": 0.5913, "lr": 1.452855167765392e-06, "epoch": 0.4154663593880033, "percentage": 41.55, "elapsed_time": "0:25:21", "remaining_time": "0:35:41", "throughput": 13439.47, "total_tokens": 20454464} +{"current_steps": 6495, "total_steps": 15621, "loss": 0.4487, "lr": 1.4518586574901647e-06, "epoch": 0.41578644132898024, "percentage": 41.58, "elapsed_time": "0:25:22", "remaining_time": "0:35:39", "throughput": 13444.51, "total_tokens": 20470464} +{"current_steps": 6500, "total_steps": 15621, "loss": 0.4617, "lr": 1.450861583064098e-06, "epoch": 0.4161065232699571, "percentage": 41.61, "elapsed_time": "0:25:23", "remaining_time": "0:35:37", "throughput": 13449.25, "total_tokens": 20485696} +{"current_steps": 6505, "total_steps": 15621, "loss": 0.3642, "lr": 1.4498639457320515e-06, "epoch": 0.416426605210934, "percentage": 41.64, "elapsed_time": "0:25:23", "remaining_time": "0:35:35", "throughput": 13453.8, "total_tokens": 20500608} +{"current_steps": 6510, "total_steps": 15621, "loss": 0.4686, "lr": 1.4488657467395865e-06, "epoch": 0.4167466871519109, "percentage": 41.67, "elapsed_time": "0:25:24", "remaining_time": "0:35:33", "throughput": 13458.47, "total_tokens": 20515776} +{"current_steps": 6515, "total_steps": 15621, "loss": 0.5078, "lr": 1.4478669873329663e-06, "epoch": 0.41706676909288776, "percentage": 41.71, "elapsed_time": "0:25:24", "remaining_time": "0:35:31", "throughput": 13463.35, "total_tokens": 20531456} +{"current_steps": 6520, "total_steps": 15621, "loss": 0.386, "lr": 1.4468676687591536e-06, "epoch": 0.4173868510338647, "percentage": 41.74, "elapsed_time": "0:25:25", "remaining_time": "0:35:29", "throughput": 13468.18, "total_tokens": 20547200} +{"current_steps": 6525, "total_steps": 15621, "loss": 0.4358, "lr": 1.4458677922658104e-06, "epoch": 0.41770693297484157, "percentage": 41.77, "elapsed_time": "0:25:26", "remaining_time": "0:35:27", "throughput": 13472.88, "total_tokens": 20562560} +{"current_steps": 6530, "total_steps": 15621, "loss": 0.2798, "lr": 1.444867359101293e-06, "epoch": 0.41802701491581845, "percentage": 41.8, "elapsed_time": "0:25:26", "remaining_time": "0:35:25", "throughput": 13477.37, "total_tokens": 20577344} +{"current_steps": 6535, "total_steps": 15621, "loss": 0.3529, "lr": 1.4438663705146545e-06, "epoch": 0.41834709685679533, "percentage": 41.83, "elapsed_time": "0:25:27", "remaining_time": "0:35:23", "throughput": 13482.2, "total_tokens": 20593088} +{"current_steps": 6540, "total_steps": 15621, "loss": 0.3589, "lr": 1.442864827755641e-06, "epoch": 0.4186671787977722, "percentage": 41.87, "elapsed_time": "0:25:28", "remaining_time": "0:35:21", "throughput": 13487.49, "total_tokens": 20609792} +{"current_steps": 6545, "total_steps": 15621, "loss": 0.4407, "lr": 1.4418627320746901e-06, "epoch": 0.41898726073874915, "percentage": 41.9, "elapsed_time": "0:25:28", "remaining_time": "0:35:19", "throughput": 13492.27, "total_tokens": 20625280} +{"current_steps": 6550, "total_steps": 15621, "loss": 0.3854, "lr": 1.4408600847229304e-06, "epoch": 0.419307342679726, "percentage": 41.93, "elapsed_time": "0:25:29", "remaining_time": "0:35:17", "throughput": 13497.53, "total_tokens": 20641984} +{"current_steps": 6555, "total_steps": 15621, "loss": 0.5281, "lr": 1.4398568869521782e-06, "epoch": 0.4196274246207029, "percentage": 41.96, "elapsed_time": "0:25:29", "remaining_time": "0:35:16", "throughput": 13502.59, "total_tokens": 20658240} +{"current_steps": 6560, "total_steps": 15621, "loss": 0.3645, "lr": 1.4388531400149384e-06, "epoch": 0.4199475065616798, "percentage": 41.99, "elapsed_time": "0:25:30", "remaining_time": "0:35:14", "throughput": 13507.27, "total_tokens": 20673408} +{"current_steps": 6565, "total_steps": 15621, "loss": 0.3866, "lr": 1.4378488451644007e-06, "epoch": 0.42026758850265666, "percentage": 42.03, "elapsed_time": "0:25:31", "remaining_time": "0:35:12", "throughput": 13512.04, "total_tokens": 20688960} +{"current_steps": 6570, "total_steps": 15621, "loss": 0.4049, "lr": 1.4368440036544386e-06, "epoch": 0.42058767044363354, "percentage": 42.06, "elapsed_time": "0:25:31", "remaining_time": "0:35:10", "throughput": 13516.87, "total_tokens": 20704768} +{"current_steps": 6575, "total_steps": 15621, "loss": 0.4199, "lr": 1.435838616739609e-06, "epoch": 0.4209077523846105, "percentage": 42.09, "elapsed_time": "0:25:32", "remaining_time": "0:35:08", "throughput": 13521.45, "total_tokens": 20719808} +{"current_steps": 6580, "total_steps": 15621, "loss": 0.5392, "lr": 1.4348326856751493e-06, "epoch": 0.42122783432558736, "percentage": 42.12, "elapsed_time": "0:25:32", "remaining_time": "0:35:06", "throughput": 13526.36, "total_tokens": 20735680} +{"current_steps": 6585, "total_steps": 15621, "loss": 0.3422, "lr": 1.433826211716976e-06, "epoch": 0.42154791626656424, "percentage": 42.15, "elapsed_time": "0:25:33", "remaining_time": "0:35:04", "throughput": 13530.7, "total_tokens": 20750144} +{"current_steps": 6590, "total_steps": 15621, "loss": 0.3966, "lr": 1.4328191961216835e-06, "epoch": 0.4218679982075411, "percentage": 42.19, "elapsed_time": "0:25:34", "remaining_time": "0:35:02", "throughput": 13535.58, "total_tokens": 20766016} +{"current_steps": 6595, "total_steps": 15621, "loss": 0.4812, "lr": 1.4318116401465427e-06, "epoch": 0.422188080148518, "percentage": 42.22, "elapsed_time": "0:25:34", "remaining_time": "0:35:00", "throughput": 13540.8, "total_tokens": 20782720} +{"current_steps": 6600, "total_steps": 15621, "loss": 0.388, "lr": 1.430803545049499e-06, "epoch": 0.42250816208949493, "percentage": 42.25, "elapsed_time": "0:25:35", "remaining_time": "0:34:58", "throughput": 13545.52, "total_tokens": 20798208} +{"current_steps": 6605, "total_steps": 15621, "loss": 0.5652, "lr": 1.4297949120891716e-06, "epoch": 0.4228282440304718, "percentage": 42.28, "elapsed_time": "0:25:36", "remaining_time": "0:34:56", "throughput": 13550.02, "total_tokens": 20813056} +{"current_steps": 6610, "total_steps": 15621, "loss": 0.4121, "lr": 1.4287857425248497e-06, "epoch": 0.4231483259714487, "percentage": 42.31, "elapsed_time": "0:25:36", "remaining_time": "0:34:54", "throughput": 13554.87, "total_tokens": 20828800} +{"current_steps": 6615, "total_steps": 15621, "loss": 0.4974, "lr": 1.427776037616494e-06, "epoch": 0.42346840791242557, "percentage": 42.35, "elapsed_time": "0:25:37", "remaining_time": "0:34:52", "throughput": 13559.74, "total_tokens": 20844736} +{"current_steps": 6620, "total_steps": 15621, "loss": 0.3527, "lr": 1.4267657986247326e-06, "epoch": 0.42378848985340245, "percentage": 42.38, "elapsed_time": "0:25:37", "remaining_time": "0:34:50", "throughput": 13564.67, "total_tokens": 20860672} +{"current_steps": 6625, "total_steps": 15621, "loss": 0.3746, "lr": 1.425755026810861e-06, "epoch": 0.4241085717943794, "percentage": 42.41, "elapsed_time": "0:25:38", "remaining_time": "0:34:49", "throughput": 13569.74, "total_tokens": 20877184} +{"current_steps": 6630, "total_steps": 15621, "loss": 0.4095, "lr": 1.4247437234368394e-06, "epoch": 0.42442865373535626, "percentage": 42.44, "elapsed_time": "0:25:39", "remaining_time": "0:34:47", "throughput": 13575.04, "total_tokens": 20894208} +{"current_steps": 6635, "total_steps": 15621, "loss": 0.4001, "lr": 1.423731889765292e-06, "epoch": 0.42474873567633314, "percentage": 42.47, "elapsed_time": "0:25:39", "remaining_time": "0:34:45", "throughput": 13579.74, "total_tokens": 20909696} +{"current_steps": 6640, "total_steps": 15621, "loss": 0.3504, "lr": 1.422719527059505e-06, "epoch": 0.42506881761731, "percentage": 42.51, "elapsed_time": "0:25:40", "remaining_time": "0:34:43", "throughput": 13584.8, "total_tokens": 20926016} +{"current_steps": 6645, "total_steps": 15621, "loss": 0.3636, "lr": 1.4217066365834253e-06, "epoch": 0.4253888995582869, "percentage": 42.54, "elapsed_time": "0:25:41", "remaining_time": "0:34:41", "throughput": 13589.47, "total_tokens": 20941440} +{"current_steps": 6650, "total_steps": 15621, "loss": 0.4406, "lr": 1.4206932196016586e-06, "epoch": 0.42570898149926384, "percentage": 42.57, "elapsed_time": "0:25:41", "remaining_time": "0:34:39", "throughput": 13593.92, "total_tokens": 20956352} +{"current_steps": 6655, "total_steps": 15621, "loss": 0.3928, "lr": 1.4196792773794672e-06, "epoch": 0.4260290634402407, "percentage": 42.6, "elapsed_time": "0:25:42", "remaining_time": "0:34:37", "throughput": 13599.1, "total_tokens": 20973056} +{"current_steps": 6660, "total_steps": 15621, "loss": 0.438, "lr": 1.418664811182771e-06, "epoch": 0.4263491453812176, "percentage": 42.63, "elapsed_time": "0:25:42", "remaining_time": "0:34:35", "throughput": 13604.01, "total_tokens": 20989248} +{"current_steps": 6665, "total_steps": 15621, "loss": 0.4836, "lr": 1.417649822278142e-06, "epoch": 0.4266692273221945, "percentage": 42.67, "elapsed_time": "0:25:43", "remaining_time": "0:34:33", "throughput": 13608.51, "total_tokens": 21004096} +{"current_steps": 6670, "total_steps": 15621, "loss": 0.4722, "lr": 1.4166343119328064e-06, "epoch": 0.42698930926317136, "percentage": 42.7, "elapsed_time": "0:25:44", "remaining_time": "0:34:32", "throughput": 13613.51, "total_tokens": 21020224} +{"current_steps": 6675, "total_steps": 15621, "loss": 0.4616, "lr": 1.4156182814146404e-06, "epoch": 0.42730939120414824, "percentage": 42.73, "elapsed_time": "0:25:44", "remaining_time": "0:34:30", "throughput": 13618.08, "total_tokens": 21035264} +{"current_steps": 6680, "total_steps": 15621, "loss": 0.3497, "lr": 1.4146017319921701e-06, "epoch": 0.42762947314512517, "percentage": 42.76, "elapsed_time": "0:25:45", "remaining_time": "0:34:28", "throughput": 13623.2, "total_tokens": 21051904} +{"current_steps": 6685, "total_steps": 15621, "loss": 0.4215, "lr": 1.4135846649345695e-06, "epoch": 0.42794955508610205, "percentage": 42.79, "elapsed_time": "0:25:45", "remaining_time": "0:34:26", "throughput": 13628.68, "total_tokens": 21069504} +{"current_steps": 6690, "total_steps": 15621, "loss": 0.427, "lr": 1.4125670815116589e-06, "epoch": 0.42826963702707893, "percentage": 42.83, "elapsed_time": "0:25:46", "remaining_time": "0:34:24", "throughput": 13633.15, "total_tokens": 21084288} +{"current_steps": 6695, "total_steps": 15621, "loss": 0.2926, "lr": 1.4115489829939025e-06, "epoch": 0.4285897189680558, "percentage": 42.86, "elapsed_time": "0:25:47", "remaining_time": "0:34:22", "throughput": 13638.14, "total_tokens": 21100544} +{"current_steps": 6700, "total_steps": 15621, "loss": 0.4407, "lr": 1.4105303706524093e-06, "epoch": 0.4289098009090327, "percentage": 42.89, "elapsed_time": "0:25:47", "remaining_time": "0:34:20", "throughput": 13642.98, "total_tokens": 21116608} +{"current_steps": 6705, "total_steps": 15621, "loss": 0.5926, "lr": 1.4095112457589276e-06, "epoch": 0.4292298828500096, "percentage": 42.92, "elapsed_time": "0:25:48", "remaining_time": "0:34:18", "throughput": 13647.55, "total_tokens": 21131776} +{"current_steps": 6710, "total_steps": 15621, "loss": 0.3962, "lr": 1.4084916095858477e-06, "epoch": 0.4295499647909865, "percentage": 42.95, "elapsed_time": "0:25:48", "remaining_time": "0:34:17", "throughput": 13651.81, "total_tokens": 21146368} +{"current_steps": 6715, "total_steps": 15621, "loss": 0.4951, "lr": 1.407471463406197e-06, "epoch": 0.4298700467319634, "percentage": 42.99, "elapsed_time": "0:25:49", "remaining_time": "0:34:15", "throughput": 13656.66, "total_tokens": 21162368} +{"current_steps": 6720, "total_steps": 15621, "loss": 0.4329, "lr": 1.4064508084936399e-06, "epoch": 0.43019012867294026, "percentage": 43.02, "elapsed_time": "0:25:50", "remaining_time": "0:34:13", "throughput": 13661.79, "total_tokens": 21179008} +{"current_steps": 6725, "total_steps": 15621, "loss": 0.5761, "lr": 1.405429646122476e-06, "epoch": 0.43051021061391714, "percentage": 43.05, "elapsed_time": "0:25:50", "remaining_time": "0:34:11", "throughput": 13667.07, "total_tokens": 21196160} +{"current_steps": 6730, "total_steps": 15621, "loss": 0.5175, "lr": 1.4044079775676392e-06, "epoch": 0.4308302925548941, "percentage": 43.08, "elapsed_time": "0:25:51", "remaining_time": "0:34:09", "throughput": 13671.89, "total_tokens": 21212032} +{"current_steps": 6735, "total_steps": 15621, "loss": 0.3659, "lr": 1.4033858041046936e-06, "epoch": 0.43115037449587096, "percentage": 43.12, "elapsed_time": "0:25:52", "remaining_time": "0:34:07", "throughput": 13677.64, "total_tokens": 21230272} +{"current_steps": 6740, "total_steps": 15621, "loss": 0.3926, "lr": 1.4023631270098352e-06, "epoch": 0.43147045643684784, "percentage": 43.15, "elapsed_time": "0:25:52", "remaining_time": "0:34:06", "throughput": 13682.28, "total_tokens": 21245760} +{"current_steps": 6745, "total_steps": 15621, "loss": 0.3411, "lr": 1.4013399475598888e-06, "epoch": 0.4317905383778247, "percentage": 43.18, "elapsed_time": "0:25:53", "remaining_time": "0:34:04", "throughput": 13686.78, "total_tokens": 21260992} +{"current_steps": 6750, "total_steps": 15621, "loss": 0.2807, "lr": 1.4003162670323056e-06, "epoch": 0.4321106203188016, "percentage": 43.21, "elapsed_time": "0:25:53", "remaining_time": "0:34:02", "throughput": 13690.88, "total_tokens": 21275136} +{"current_steps": 6755, "total_steps": 15621, "loss": 0.5292, "lr": 1.3992920867051627e-06, "epoch": 0.4324307022597785, "percentage": 43.24, "elapsed_time": "0:25:54", "remaining_time": "0:34:00", "throughput": 13695.49, "total_tokens": 21290560} +{"current_steps": 6760, "total_steps": 15621, "loss": 0.3525, "lr": 1.3982674078571614e-06, "epoch": 0.4327507842007554, "percentage": 43.28, "elapsed_time": "0:25:55", "remaining_time": "0:33:58", "throughput": 13699.94, "total_tokens": 21305536} +{"current_steps": 6765, "total_steps": 15621, "loss": 0.3785, "lr": 1.3972422317676252e-06, "epoch": 0.4330708661417323, "percentage": 43.31, "elapsed_time": "0:25:55", "remaining_time": "0:33:56", "throughput": 13704.41, "total_tokens": 21320576} +{"current_steps": 6770, "total_steps": 15621, "loss": 0.367, "lr": 1.3962165597164985e-06, "epoch": 0.43339094808270917, "percentage": 43.34, "elapsed_time": "0:25:56", "remaining_time": "0:33:54", "throughput": 13708.84, "total_tokens": 21335680} +{"current_steps": 6775, "total_steps": 15621, "loss": 0.3496, "lr": 1.395190392984345e-06, "epoch": 0.43371103002368605, "percentage": 43.37, "elapsed_time": "0:25:56", "remaining_time": "0:33:52", "throughput": 13713.63, "total_tokens": 21351808} +{"current_steps": 6780, "total_steps": 15621, "loss": 0.4482, "lr": 1.3941637328523452e-06, "epoch": 0.43403111196466293, "percentage": 43.4, "elapsed_time": "0:25:57", "remaining_time": "0:33:51", "throughput": 13717.97, "total_tokens": 21366464} +{"current_steps": 6785, "total_steps": 15621, "loss": 0.3094, "lr": 1.3931365806022978e-06, "epoch": 0.43435119390563987, "percentage": 43.44, "elapsed_time": "0:25:58", "remaining_time": "0:33:49", "throughput": 13723.04, "total_tokens": 21383296} +{"current_steps": 6790, "total_steps": 15621, "loss": 0.3178, "lr": 1.3921089375166131e-06, "epoch": 0.43467127584661674, "percentage": 43.47, "elapsed_time": "0:25:58", "remaining_time": "0:33:47", "throughput": 13727.96, "total_tokens": 21399616} +{"current_steps": 6795, "total_steps": 15621, "loss": 0.4475, "lr": 1.391080804878316e-06, "epoch": 0.4349913577875936, "percentage": 43.5, "elapsed_time": "0:25:59", "remaining_time": "0:33:45", "throughput": 13732.43, "total_tokens": 21414848} +{"current_steps": 6800, "total_steps": 15621, "loss": 0.3748, "lr": 1.3900521839710427e-06, "epoch": 0.4353114397285705, "percentage": 43.53, "elapsed_time": "0:26:00", "remaining_time": "0:33:43", "throughput": 13736.95, "total_tokens": 21430144} +{"current_steps": 6805, "total_steps": 15621, "loss": 0.3516, "lr": 1.3890230760790373e-06, "epoch": 0.4356315216695474, "percentage": 43.56, "elapsed_time": "0:26:00", "remaining_time": "0:33:41", "throughput": 13741.36, "total_tokens": 21445248} +{"current_steps": 6810, "total_steps": 15621, "loss": 0.5972, "lr": 1.3879934824871544e-06, "epoch": 0.4359516036105243, "percentage": 43.6, "elapsed_time": "0:26:01", "remaining_time": "0:33:39", "throughput": 13745.87, "total_tokens": 21460544} +{"current_steps": 6815, "total_steps": 15621, "loss": 0.4871, "lr": 1.3869634044808526e-06, "epoch": 0.4362716855515012, "percentage": 43.63, "elapsed_time": "0:26:01", "remaining_time": "0:33:38", "throughput": 13750.55, "total_tokens": 21476224} +{"current_steps": 6820, "total_steps": 15621, "loss": 0.5996, "lr": 1.3859328433461971e-06, "epoch": 0.4365917674924781, "percentage": 43.66, "elapsed_time": "0:26:02", "remaining_time": "0:33:36", "throughput": 13755.18, "total_tokens": 21491712} +{"current_steps": 6825, "total_steps": 15621, "loss": 0.5784, "lr": 1.3849018003698553e-06, "epoch": 0.43691184943345496, "percentage": 43.69, "elapsed_time": "0:26:03", "remaining_time": "0:33:34", "throughput": 13760.4, "total_tokens": 21508928} +{"current_steps": 6830, "total_steps": 15621, "loss": 0.415, "lr": 1.3838702768390964e-06, "epoch": 0.43723193137443184, "percentage": 43.72, "elapsed_time": "0:26:03", "remaining_time": "0:33:32", "throughput": 13764.68, "total_tokens": 21523648} +{"current_steps": 6835, "total_steps": 15621, "loss": 0.4777, "lr": 1.38283827404179e-06, "epoch": 0.43755201331540877, "percentage": 43.76, "elapsed_time": "0:26:04", "remaining_time": "0:33:30", "throughput": 13769.28, "total_tokens": 21539264} +{"current_steps": 6840, "total_steps": 15621, "loss": 0.3776, "lr": 1.381805793266403e-06, "epoch": 0.43787209525638565, "percentage": 43.79, "elapsed_time": "0:26:04", "remaining_time": "0:33:29", "throughput": 13774.06, "total_tokens": 21555520} +{"current_steps": 6845, "total_steps": 15621, "loss": 0.4517, "lr": 1.3807728358020009e-06, "epoch": 0.43819217719736253, "percentage": 43.82, "elapsed_time": "0:26:05", "remaining_time": "0:33:27", "throughput": 13778.32, "total_tokens": 21570112} +{"current_steps": 6850, "total_steps": 15621, "loss": 0.3386, "lr": 1.3797394029382416e-06, "epoch": 0.4385122591383394, "percentage": 43.85, "elapsed_time": "0:26:06", "remaining_time": "0:33:25", "throughput": 13782.55, "total_tokens": 21584768} +{"current_steps": 6855, "total_steps": 15621, "loss": 0.2963, "lr": 1.37870549596538e-06, "epoch": 0.4388323410793163, "percentage": 43.88, "elapsed_time": "0:26:06", "remaining_time": "0:33:23", "throughput": 13786.98, "total_tokens": 21599872} +{"current_steps": 6860, "total_steps": 15621, "loss": 0.5262, "lr": 1.3776711161742595e-06, "epoch": 0.43915242302029317, "percentage": 43.92, "elapsed_time": "0:26:07", "remaining_time": "0:33:21", "throughput": 13791.68, "total_tokens": 21615808} +{"current_steps": 6865, "total_steps": 15621, "loss": 0.4639, "lr": 1.3766362648563166e-06, "epoch": 0.4394725049612701, "percentage": 43.95, "elapsed_time": "0:26:07", "remaining_time": "0:33:19", "throughput": 13795.99, "total_tokens": 21630656} +{"current_steps": 6870, "total_steps": 15621, "loss": 0.4073, "lr": 1.3756009433035744e-06, "epoch": 0.439792586902247, "percentage": 43.98, "elapsed_time": "0:26:08", "remaining_time": "0:33:17", "throughput": 13800.84, "total_tokens": 21646976} +{"current_steps": 6875, "total_steps": 15621, "loss": 0.5615, "lr": 1.3745651528086447e-06, "epoch": 0.44011266884322386, "percentage": 44.01, "elapsed_time": "0:26:09", "remaining_time": "0:33:16", "throughput": 13806.38, "total_tokens": 21665024} +{"current_steps": 6880, "total_steps": 15621, "loss": 0.4486, "lr": 1.373528894664724e-06, "epoch": 0.44043275078420074, "percentage": 44.04, "elapsed_time": "0:26:09", "remaining_time": "0:33:14", "throughput": 13810.79, "total_tokens": 21680128} +{"current_steps": 6885, "total_steps": 15621, "loss": 0.3509, "lr": 1.3724921701655924e-06, "epoch": 0.4407528327251776, "percentage": 44.08, "elapsed_time": "0:26:10", "remaining_time": "0:33:12", "throughput": 13815.35, "total_tokens": 21695808} +{"current_steps": 6890, "total_steps": 15621, "loss": 0.3155, "lr": 1.3714549806056125e-06, "epoch": 0.44107291466615456, "percentage": 44.11, "elapsed_time": "0:26:11", "remaining_time": "0:33:10", "throughput": 13819.98, "total_tokens": 21711936} +{"current_steps": 6895, "total_steps": 15621, "loss": 0.4241, "lr": 1.3704173272797283e-06, "epoch": 0.44139299660713144, "percentage": 44.14, "elapsed_time": "0:26:11", "remaining_time": "0:33:09", "throughput": 13824.49, "total_tokens": 21727488} +{"current_steps": 6900, "total_steps": 15621, "loss": 0.4386, "lr": 1.3693792114834619e-06, "epoch": 0.4417130785481083, "percentage": 44.17, "elapsed_time": "0:26:12", "remaining_time": "0:33:07", "throughput": 13829.92, "total_tokens": 21745280} +{"current_steps": 6905, "total_steps": 15621, "loss": 0.4684, "lr": 1.3683406345129129e-06, "epoch": 0.4420331604890852, "percentage": 44.2, "elapsed_time": "0:26:12", "remaining_time": "0:33:05", "throughput": 13834.15, "total_tokens": 21760000} +{"current_steps": 6910, "total_steps": 15621, "loss": 0.4025, "lr": 1.3673015976647567e-06, "epoch": 0.4423532424300621, "percentage": 44.24, "elapsed_time": "0:26:13", "remaining_time": "0:33:03", "throughput": 13838.59, "total_tokens": 21775232} +{"current_steps": 6915, "total_steps": 15621, "loss": 0.3967, "lr": 1.3662621022362435e-06, "epoch": 0.442673324371039, "percentage": 44.27, "elapsed_time": "0:26:14", "remaining_time": "0:33:01", "throughput": 13843.14, "total_tokens": 21790656} +{"current_steps": 6920, "total_steps": 15621, "loss": 0.4654, "lr": 1.3652221495251952e-06, "epoch": 0.4429934063120159, "percentage": 44.3, "elapsed_time": "0:26:14", "remaining_time": "0:33:00", "throughput": 13847.68, "total_tokens": 21806336} +{"current_steps": 6925, "total_steps": 15621, "loss": 0.3204, "lr": 1.3641817408300049e-06, "epoch": 0.44331348825299277, "percentage": 44.33, "elapsed_time": "0:26:15", "remaining_time": "0:32:58", "throughput": 13852.96, "total_tokens": 21823744} +{"current_steps": 6930, "total_steps": 15621, "loss": 0.5579, "lr": 1.3631408774496352e-06, "epoch": 0.44363357019396965, "percentage": 44.36, "elapsed_time": "0:26:15", "remaining_time": "0:32:56", "throughput": 13857.38, "total_tokens": 21839104} +{"current_steps": 6935, "total_steps": 15621, "loss": 0.3566, "lr": 1.3620995606836165e-06, "epoch": 0.44395365213494653, "percentage": 44.4, "elapsed_time": "0:26:16", "remaining_time": "0:32:54", "throughput": 13861.86, "total_tokens": 21854528} +{"current_steps": 6940, "total_steps": 15621, "loss": 0.6023, "lr": 1.3610577918320446e-06, "epoch": 0.4442737340759234, "percentage": 44.43, "elapsed_time": "0:26:17", "remaining_time": "0:32:52", "throughput": 13866.57, "total_tokens": 21870592} +{"current_steps": 6945, "total_steps": 15621, "loss": 0.3743, "lr": 1.3600155721955802e-06, "epoch": 0.44459381601690035, "percentage": 44.46, "elapsed_time": "0:26:17", "remaining_time": "0:32:51", "throughput": 13870.89, "total_tokens": 21885696} +{"current_steps": 6950, "total_steps": 15621, "loss": 0.3819, "lr": 1.3589729030754468e-06, "epoch": 0.4449138979578772, "percentage": 44.49, "elapsed_time": "0:26:18", "remaining_time": "0:32:49", "throughput": 13875.41, "total_tokens": 21901248} +{"current_steps": 6955, "total_steps": 15621, "loss": 0.4341, "lr": 1.3579297857734293e-06, "epoch": 0.4452339798988541, "percentage": 44.52, "elapsed_time": "0:26:19", "remaining_time": "0:32:47", "throughput": 13879.69, "total_tokens": 21916352} +{"current_steps": 6960, "total_steps": 15621, "loss": 0.3365, "lr": 1.3568862215918717e-06, "epoch": 0.445554061839831, "percentage": 44.56, "elapsed_time": "0:26:19", "remaining_time": "0:32:45", "throughput": 13883.84, "total_tokens": 21931072} +{"current_steps": 6965, "total_steps": 15621, "loss": 0.4944, "lr": 1.3558422118336762e-06, "epoch": 0.44587414378080786, "percentage": 44.59, "elapsed_time": "0:26:20", "remaining_time": "0:32:43", "throughput": 13888.39, "total_tokens": 21946752} +{"current_steps": 6970, "total_steps": 15621, "loss": 0.4804, "lr": 1.354797757802301e-06, "epoch": 0.4461942257217848, "percentage": 44.62, "elapsed_time": "0:26:20", "remaining_time": "0:32:42", "throughput": 13892.84, "total_tokens": 21962176} +{"current_steps": 6975, "total_steps": 15621, "loss": 0.392, "lr": 1.3537528608017596e-06, "epoch": 0.4465143076627617, "percentage": 44.65, "elapsed_time": "0:26:21", "remaining_time": "0:32:40", "throughput": 13897.61, "total_tokens": 21978496} +{"current_steps": 6980, "total_steps": 15621, "loss": 0.3973, "lr": 1.352707522136618e-06, "epoch": 0.44683438960373856, "percentage": 44.68, "elapsed_time": "0:26:22", "remaining_time": "0:32:38", "throughput": 13901.51, "total_tokens": 21992576} +{"current_steps": 6985, "total_steps": 15621, "loss": 0.3998, "lr": 1.3516617431119934e-06, "epoch": 0.44715447154471544, "percentage": 44.72, "elapsed_time": "0:26:22", "remaining_time": "0:32:36", "throughput": 13905.95, "total_tokens": 22008000} +{"current_steps": 6990, "total_steps": 15621, "loss": 0.53, "lr": 1.350615525033554e-06, "epoch": 0.4474745534856923, "percentage": 44.75, "elapsed_time": "0:26:23", "remaining_time": "0:32:34", "throughput": 13910.22, "total_tokens": 22022976} +{"current_steps": 6995, "total_steps": 15621, "loss": 0.4027, "lr": 1.3495688692075144e-06, "epoch": 0.44779463542666925, "percentage": 44.78, "elapsed_time": "0:26:23", "remaining_time": "0:32:33", "throughput": 13914.6, "total_tokens": 22038144} +{"current_steps": 7000, "total_steps": 15621, "loss": 0.3435, "lr": 1.3485217769406376e-06, "epoch": 0.44811471736764613, "percentage": 44.81, "elapsed_time": "0:26:24", "remaining_time": "0:32:31", "throughput": 13919.22, "total_tokens": 22054016} +{"current_steps": 7005, "total_steps": 15621, "loss": 0.3605, "lr": 1.3474742495402303e-06, "epoch": 0.448434799308623, "percentage": 44.84, "elapsed_time": "0:26:25", "remaining_time": "0:32:29", "throughput": 13925.21, "total_tokens": 22073920} +{"current_steps": 7010, "total_steps": 15621, "loss": 0.4297, "lr": 1.3464262883141425e-06, "epoch": 0.4487548812495999, "percentage": 44.88, "elapsed_time": "0:26:25", "remaining_time": "0:32:27", "throughput": 13929.85, "total_tokens": 22089728} +{"current_steps": 7015, "total_steps": 15621, "loss": 0.5687, "lr": 1.3453778945707663e-06, "epoch": 0.44907496319057677, "percentage": 44.91, "elapsed_time": "0:26:26", "remaining_time": "0:32:26", "throughput": 13934.25, "total_tokens": 22105344} +{"current_steps": 7020, "total_steps": 15621, "loss": 0.4471, "lr": 1.3443290696190332e-06, "epoch": 0.4493950451315537, "percentage": 44.94, "elapsed_time": "0:26:27", "remaining_time": "0:32:24", "throughput": 13939.06, "total_tokens": 22121792} +{"current_steps": 7025, "total_steps": 15621, "loss": 0.4034, "lr": 1.343279814768414e-06, "epoch": 0.4497151270725306, "percentage": 44.97, "elapsed_time": "0:26:27", "remaining_time": "0:32:22", "throughput": 13943.05, "total_tokens": 22136128} +{"current_steps": 7030, "total_steps": 15621, "loss": 0.38, "lr": 1.3422301313289156e-06, "epoch": 0.45003520901350746, "percentage": 45.0, "elapsed_time": "0:26:28", "remaining_time": "0:32:20", "throughput": 13947.6, "total_tokens": 22151936} +{"current_steps": 7035, "total_steps": 15621, "loss": 0.3794, "lr": 1.34118002061108e-06, "epoch": 0.45035529095448434, "percentage": 45.04, "elapsed_time": "0:26:28", "remaining_time": "0:32:19", "throughput": 13952.27, "total_tokens": 22168128} +{"current_steps": 7038, "total_steps": 15621, "eval_loss": 0.43158382177352905, "epoch": 0.4505473401190705, "percentage": 45.05, "elapsed_time": "0:27:18", "remaining_time": "0:33:18", "throughput": 13536.59, "total_tokens": 22178432} +{"current_steps": 7040, "total_steps": 15621, "loss": 0.4309, "lr": 1.3401294839259828e-06, "epoch": 0.4506753728954612, "percentage": 45.07, "elapsed_time": "0:27:47", "remaining_time": "0:33:51", "throughput": 13307.35, "total_tokens": 22184512} +{"current_steps": 7045, "total_steps": 15621, "loss": 0.54, "lr": 1.3390785225852312e-06, "epoch": 0.4509954548364381, "percentage": 45.1, "elapsed_time": "0:27:47", "remaining_time": "0:33:50", "throughput": 13311.77, "total_tokens": 22199872} +{"current_steps": 7050, "total_steps": 15621, "loss": 0.4411, "lr": 1.3380271379009631e-06, "epoch": 0.45131553677741504, "percentage": 45.13, "elapsed_time": "0:27:48", "remaining_time": "0:33:48", "throughput": 13316.8, "total_tokens": 22216960} +{"current_steps": 7055, "total_steps": 15621, "loss": 0.2615, "lr": 1.3369753311858442e-06, "epoch": 0.4516356187183919, "percentage": 45.16, "elapsed_time": "0:27:48", "remaining_time": "0:33:46", "throughput": 13320.92, "total_tokens": 22231488} +{"current_steps": 7060, "total_steps": 15621, "loss": 0.4584, "lr": 1.3359231037530682e-06, "epoch": 0.4519557006593688, "percentage": 45.2, "elapsed_time": "0:27:49", "remaining_time": "0:33:44", "throughput": 13325.35, "total_tokens": 22246976} +{"current_steps": 7065, "total_steps": 15621, "loss": 0.4139, "lr": 1.3348704569163527e-06, "epoch": 0.4522757826003457, "percentage": 45.23, "elapsed_time": "0:27:50", "remaining_time": "0:33:42", "throughput": 13330.16, "total_tokens": 22263680} +{"current_steps": 7070, "total_steps": 15621, "loss": 0.3347, "lr": 1.33381739198994e-06, "epoch": 0.45259586454132256, "percentage": 45.26, "elapsed_time": "0:27:50", "remaining_time": "0:33:40", "throughput": 13334.74, "total_tokens": 22279552} +{"current_steps": 7075, "total_steps": 15621, "loss": 0.4436, "lr": 1.3327639102885938e-06, "epoch": 0.4529159464822995, "percentage": 45.29, "elapsed_time": "0:27:51", "remaining_time": "0:33:38", "throughput": 13339.3, "total_tokens": 22295296} +{"current_steps": 7080, "total_steps": 15621, "loss": 0.3973, "lr": 1.3317100131275986e-06, "epoch": 0.45323602842327637, "percentage": 45.32, "elapsed_time": "0:27:52", "remaining_time": "0:33:37", "throughput": 13343.53, "total_tokens": 22310400} +{"current_steps": 7085, "total_steps": 15621, "loss": 0.492, "lr": 1.3306557018227576e-06, "epoch": 0.45355611036425325, "percentage": 45.36, "elapsed_time": "0:27:52", "remaining_time": "0:33:35", "throughput": 13348.35, "total_tokens": 22326848} +{"current_steps": 7090, "total_steps": 15621, "loss": 0.47, "lr": 1.3296009776903903e-06, "epoch": 0.45387619230523013, "percentage": 45.39, "elapsed_time": "0:27:53", "remaining_time": "0:33:33", "throughput": 13352.86, "total_tokens": 22342592} +{"current_steps": 7095, "total_steps": 15621, "loss": 0.4386, "lr": 1.3285458420473323e-06, "epoch": 0.454196274246207, "percentage": 45.42, "elapsed_time": "0:27:53", "remaining_time": "0:33:31", "throughput": 13357.6, "total_tokens": 22358912} +{"current_steps": 7100, "total_steps": 15621, "loss": 0.3744, "lr": 1.3274902962109332e-06, "epoch": 0.45451635618718395, "percentage": 45.45, "elapsed_time": "0:27:54", "remaining_time": "0:33:29", "throughput": 13362.08, "total_tokens": 22374528} +{"current_steps": 7105, "total_steps": 15621, "loss": 0.3686, "lr": 1.3264343414990539e-06, "epoch": 0.4548364381281608, "percentage": 45.48, "elapsed_time": "0:27:55", "remaining_time": "0:33:27", "throughput": 13366.43, "total_tokens": 22389824} +{"current_steps": 7110, "total_steps": 15621, "loss": 0.4148, "lr": 1.3253779792300663e-06, "epoch": 0.4551565200691377, "percentage": 45.52, "elapsed_time": "0:27:55", "remaining_time": "0:33:25", "throughput": 13370.83, "total_tokens": 22405376} +{"current_steps": 7115, "total_steps": 15621, "loss": 0.3551, "lr": 1.3243212107228518e-06, "epoch": 0.4554766020101146, "percentage": 45.55, "elapsed_time": "0:27:56", "remaining_time": "0:33:23", "throughput": 13374.97, "total_tokens": 22420032} +{"current_steps": 7120, "total_steps": 15621, "loss": 0.3909, "lr": 1.3232640372967974e-06, "epoch": 0.45579668395109146, "percentage": 45.58, "elapsed_time": "0:27:56", "remaining_time": "0:33:22", "throughput": 13379.08, "total_tokens": 22434688} +{"current_steps": 7125, "total_steps": 15621, "loss": 0.4645, "lr": 1.3222064602717974e-06, "epoch": 0.45611676589206834, "percentage": 45.61, "elapsed_time": "0:27:57", "remaining_time": "0:33:20", "throughput": 13383.79, "total_tokens": 22451072} +{"current_steps": 7130, "total_steps": 15621, "loss": 0.3488, "lr": 1.321148480968248e-06, "epoch": 0.4564368478330453, "percentage": 45.64, "elapsed_time": "0:27:58", "remaining_time": "0:33:18", "throughput": 13388.19, "total_tokens": 22466688} +{"current_steps": 7135, "total_steps": 15621, "loss": 0.4609, "lr": 1.3200901007070495e-06, "epoch": 0.45675692977402216, "percentage": 45.68, "elapsed_time": "0:27:58", "remaining_time": "0:33:16", "throughput": 13392.65, "total_tokens": 22482432} +{"current_steps": 7140, "total_steps": 15621, "loss": 0.4616, "lr": 1.3190313208096022e-06, "epoch": 0.45707701171499904, "percentage": 45.71, "elapsed_time": "0:27:59", "remaining_time": "0:33:14", "throughput": 13396.7, "total_tokens": 22496960} +{"current_steps": 7145, "total_steps": 15621, "loss": 0.3617, "lr": 1.3179721425978048e-06, "epoch": 0.4573970936559759, "percentage": 45.74, "elapsed_time": "0:27:59", "remaining_time": "0:33:12", "throughput": 13401.03, "total_tokens": 22512256} +{"current_steps": 7150, "total_steps": 15621, "loss": 0.4002, "lr": 1.3169125673940541e-06, "epoch": 0.4577171755969528, "percentage": 45.77, "elapsed_time": "0:28:00", "remaining_time": "0:33:10", "throughput": 13405.62, "total_tokens": 22528192} +{"current_steps": 7155, "total_steps": 15621, "loss": 0.4126, "lr": 1.3158525965212422e-06, "epoch": 0.45803725753792973, "percentage": 45.8, "elapsed_time": "0:28:01", "remaining_time": "0:33:09", "throughput": 13410.6, "total_tokens": 22545408} +{"current_steps": 7160, "total_steps": 15621, "loss": 0.5063, "lr": 1.3147922313027548e-06, "epoch": 0.4583573394789066, "percentage": 45.84, "elapsed_time": "0:28:01", "remaining_time": "0:33:07", "throughput": 13414.95, "total_tokens": 22560832} +{"current_steps": 7165, "total_steps": 15621, "loss": 0.3456, "lr": 1.3137314730624707e-06, "epoch": 0.4586774214198835, "percentage": 45.87, "elapsed_time": "0:28:02", "remaining_time": "0:33:05", "throughput": 13419.86, "total_tokens": 22577728} +{"current_steps": 7170, "total_steps": 15621, "loss": 0.4722, "lr": 1.3126703231247588e-06, "epoch": 0.45899750336086037, "percentage": 45.9, "elapsed_time": "0:28:03", "remaining_time": "0:33:03", "throughput": 13424.56, "total_tokens": 22594112} +{"current_steps": 7175, "total_steps": 15621, "loss": 0.3917, "lr": 1.3116087828144772e-06, "epoch": 0.45931758530183725, "percentage": 45.93, "elapsed_time": "0:28:03", "remaining_time": "0:33:01", "throughput": 13428.93, "total_tokens": 22609728} +{"current_steps": 7180, "total_steps": 15621, "loss": 0.4692, "lr": 1.310546853456972e-06, "epoch": 0.4596376672428142, "percentage": 45.96, "elapsed_time": "0:28:04", "remaining_time": "0:33:00", "throughput": 13433.08, "total_tokens": 22624704} +{"current_steps": 7185, "total_steps": 15621, "loss": 0.3145, "lr": 1.3094845363780737e-06, "epoch": 0.45995774918379106, "percentage": 46.0, "elapsed_time": "0:28:04", "remaining_time": "0:32:58", "throughput": 13437.52, "total_tokens": 22640448} +{"current_steps": 7190, "total_steps": 15621, "loss": 0.2277, "lr": 1.3084218329040976e-06, "epoch": 0.46027783112476794, "percentage": 46.03, "elapsed_time": "0:28:05", "remaining_time": "0:32:56", "throughput": 13441.8, "total_tokens": 22655680} +{"current_steps": 7195, "total_steps": 15621, "loss": 0.3769, "lr": 1.3073587443618425e-06, "epoch": 0.4605979130657448, "percentage": 46.06, "elapsed_time": "0:28:06", "remaining_time": "0:32:54", "throughput": 13446.57, "total_tokens": 22672128} +{"current_steps": 7200, "total_steps": 15621, "loss": 0.5418, "lr": 1.3062952720785861e-06, "epoch": 0.4609179950067217, "percentage": 46.09, "elapsed_time": "0:28:06", "remaining_time": "0:32:52", "throughput": 13450.71, "total_tokens": 22687104} +{"current_steps": 7205, "total_steps": 15621, "loss": 0.3724, "lr": 1.305231417382086e-06, "epoch": 0.4612380769476986, "percentage": 46.12, "elapsed_time": "0:28:07", "remaining_time": "0:32:50", "throughput": 13455.19, "total_tokens": 22702976} +{"current_steps": 7210, "total_steps": 15621, "loss": 0.3522, "lr": 1.3041671816005777e-06, "epoch": 0.4615581588886755, "percentage": 46.16, "elapsed_time": "0:28:07", "remaining_time": "0:32:49", "throughput": 13459.52, "total_tokens": 22718464} +{"current_steps": 7215, "total_steps": 15621, "loss": 0.3783, "lr": 1.3031025660627718e-06, "epoch": 0.4618782408296524, "percentage": 46.19, "elapsed_time": "0:28:08", "remaining_time": "0:32:47", "throughput": 13464.07, "total_tokens": 22734656} +{"current_steps": 7220, "total_steps": 15621, "loss": 0.4376, "lr": 1.3020375720978534e-06, "epoch": 0.4621983227706293, "percentage": 46.22, "elapsed_time": "0:28:09", "remaining_time": "0:32:45", "throughput": 13468.41, "total_tokens": 22750016} +{"current_steps": 7225, "total_steps": 15621, "loss": 0.3855, "lr": 1.3009722010354799e-06, "epoch": 0.46251840471160616, "percentage": 46.25, "elapsed_time": "0:28:09", "remaining_time": "0:32:43", "throughput": 13472.77, "total_tokens": 22765632} +{"current_steps": 7230, "total_steps": 15621, "loss": 0.4528, "lr": 1.2999064542057794e-06, "epoch": 0.46283848665258304, "percentage": 46.28, "elapsed_time": "0:28:10", "remaining_time": "0:32:41", "throughput": 13477.12, "total_tokens": 22781184} +{"current_steps": 7235, "total_steps": 15621, "loss": 0.4842, "lr": 1.2988403329393495e-06, "epoch": 0.46315856859355997, "percentage": 46.32, "elapsed_time": "0:28:10", "remaining_time": "0:32:40", "throughput": 13481.62, "total_tokens": 22797248} +{"current_steps": 7240, "total_steps": 15621, "loss": 0.4177, "lr": 1.2977738385672557e-06, "epoch": 0.46347865053453685, "percentage": 46.35, "elapsed_time": "0:28:11", "remaining_time": "0:32:38", "throughput": 13485.94, "total_tokens": 22812800} +{"current_steps": 7245, "total_steps": 15621, "loss": 0.4087, "lr": 1.2967069724210278e-06, "epoch": 0.46379873247551373, "percentage": 46.38, "elapsed_time": "0:28:12", "remaining_time": "0:32:36", "throughput": 13489.89, "total_tokens": 22827200} +{"current_steps": 7250, "total_steps": 15621, "loss": 0.5265, "lr": 1.2956397358326609e-06, "epoch": 0.4641188144164906, "percentage": 46.41, "elapsed_time": "0:28:12", "remaining_time": "0:32:34", "throughput": 13494.39, "total_tokens": 22843264} +{"current_steps": 7255, "total_steps": 15621, "loss": 0.3799, "lr": 1.294572130134613e-06, "epoch": 0.4644388963574675, "percentage": 46.44, "elapsed_time": "0:28:13", "remaining_time": "0:32:32", "throughput": 13498.67, "total_tokens": 22858624} +{"current_steps": 7260, "total_steps": 15621, "loss": 0.5557, "lr": 1.2935041566598016e-06, "epoch": 0.4647589782984444, "percentage": 46.48, "elapsed_time": "0:28:13", "remaining_time": "0:32:30", "throughput": 13502.92, "total_tokens": 22873856} +{"current_steps": 7265, "total_steps": 15621, "loss": 0.356, "lr": 1.2924358167416049e-06, "epoch": 0.4650790602394213, "percentage": 46.51, "elapsed_time": "0:28:14", "remaining_time": "0:32:29", "throughput": 13507.3, "total_tokens": 22889600} +{"current_steps": 7270, "total_steps": 15621, "loss": 0.4007, "lr": 1.2913671117138572e-06, "epoch": 0.4653991421803982, "percentage": 46.54, "elapsed_time": "0:28:15", "remaining_time": "0:32:27", "throughput": 13511.45, "total_tokens": 22904704} +{"current_steps": 7275, "total_steps": 15621, "loss": 0.3471, "lr": 1.29029804291085e-06, "epoch": 0.46571922412137506, "percentage": 46.57, "elapsed_time": "0:28:15", "remaining_time": "0:32:25", "throughput": 13515.77, "total_tokens": 22920384} +{"current_steps": 7280, "total_steps": 15621, "loss": 0.3475, "lr": 1.2892286116673269e-06, "epoch": 0.46603930606235194, "percentage": 46.6, "elapsed_time": "0:28:16", "remaining_time": "0:32:23", "throughput": 13520.44, "total_tokens": 22937024} +{"current_steps": 7285, "total_steps": 15621, "loss": 0.4934, "lr": 1.2881588193184865e-06, "epoch": 0.4663593880033289, "percentage": 46.64, "elapsed_time": "0:28:17", "remaining_time": "0:32:21", "throughput": 13525.56, "total_tokens": 22954816} +{"current_steps": 7290, "total_steps": 15621, "loss": 0.2918, "lr": 1.287088667199977e-06, "epoch": 0.46667946994430576, "percentage": 46.67, "elapsed_time": "0:28:17", "remaining_time": "0:32:20", "throughput": 13529.59, "total_tokens": 22969472} +{"current_steps": 7295, "total_steps": 15621, "loss": 0.4681, "lr": 1.2860181566478956e-06, "epoch": 0.46699955188528264, "percentage": 46.7, "elapsed_time": "0:28:18", "remaining_time": "0:32:18", "throughput": 13533.58, "total_tokens": 22984192} +{"current_steps": 7300, "total_steps": 15621, "loss": 0.3868, "lr": 1.2849472889987874e-06, "epoch": 0.4673196338262595, "percentage": 46.73, "elapsed_time": "0:28:18", "remaining_time": "0:32:16", "throughput": 13537.88, "total_tokens": 22999680} +{"current_steps": 7305, "total_steps": 15621, "loss": 0.3784, "lr": 1.2838760655896431e-06, "epoch": 0.4676397157672364, "percentage": 46.76, "elapsed_time": "0:28:19", "remaining_time": "0:32:14", "throughput": 13541.98, "total_tokens": 23014720} +{"current_steps": 7310, "total_steps": 15621, "loss": 0.4544, "lr": 1.2828044877578983e-06, "epoch": 0.4679597977082133, "percentage": 46.8, "elapsed_time": "0:28:20", "remaining_time": "0:32:12", "throughput": 13546.42, "total_tokens": 23030528} +{"current_steps": 7315, "total_steps": 15621, "loss": 0.5205, "lr": 1.2817325568414297e-06, "epoch": 0.4682798796491902, "percentage": 46.83, "elapsed_time": "0:28:20", "remaining_time": "0:32:11", "throughput": 13550.99, "total_tokens": 23046784} +{"current_steps": 7320, "total_steps": 15621, "loss": 0.3379, "lr": 1.2806602741785562e-06, "epoch": 0.4685999615901671, "percentage": 46.86, "elapsed_time": "0:28:21", "remaining_time": "0:32:09", "throughput": 13555.03, "total_tokens": 23061632} +{"current_steps": 7325, "total_steps": 15621, "loss": 0.3202, "lr": 1.2795876411080346e-06, "epoch": 0.46892004353114397, "percentage": 46.89, "elapsed_time": "0:28:21", "remaining_time": "0:32:07", "throughput": 13559.55, "total_tokens": 23077888} +{"current_steps": 7330, "total_steps": 15621, "loss": 0.3308, "lr": 1.278514658969061e-06, "epoch": 0.46924012547212085, "percentage": 46.92, "elapsed_time": "0:28:22", "remaining_time": "0:32:05", "throughput": 13563.82, "total_tokens": 23093568} +{"current_steps": 7335, "total_steps": 15621, "loss": 0.5047, "lr": 1.2774413291012648e-06, "epoch": 0.46956020741309773, "percentage": 46.96, "elapsed_time": "0:28:23", "remaining_time": "0:32:04", "throughput": 13568.01, "total_tokens": 23108992} +{"current_steps": 7340, "total_steps": 15621, "loss": 0.4191, "lr": 1.2763676528447122e-06, "epoch": 0.46988028935407467, "percentage": 46.99, "elapsed_time": "0:28:23", "remaining_time": "0:32:02", "throughput": 13572.43, "total_tokens": 23124992} +{"current_steps": 7345, "total_steps": 15621, "loss": 0.3417, "lr": 1.2752936315399003e-06, "epoch": 0.47020037129505154, "percentage": 47.02, "elapsed_time": "0:28:24", "remaining_time": "0:32:00", "throughput": 13577.16, "total_tokens": 23141888} +{"current_steps": 7350, "total_steps": 15621, "loss": 0.3346, "lr": 1.2742192665277566e-06, "epoch": 0.4705204532360284, "percentage": 47.05, "elapsed_time": "0:28:25", "remaining_time": "0:31:58", "throughput": 13581.57, "total_tokens": 23157888} +{"current_steps": 7355, "total_steps": 15621, "loss": 0.2813, "lr": 1.2731445591496393e-06, "epoch": 0.4708405351770053, "percentage": 47.08, "elapsed_time": "0:28:25", "remaining_time": "0:31:56", "throughput": 13585.63, "total_tokens": 23172864} +{"current_steps": 7360, "total_steps": 15621, "loss": 0.4622, "lr": 1.2720695107473325e-06, "epoch": 0.4711606171179822, "percentage": 47.12, "elapsed_time": "0:28:26", "remaining_time": "0:31:55", "throughput": 13589.91, "total_tokens": 23188352} +{"current_steps": 7365, "total_steps": 15621, "loss": 0.3897, "lr": 1.2709941226630475e-06, "epoch": 0.4714806990589591, "percentage": 47.15, "elapsed_time": "0:28:26", "remaining_time": "0:31:53", "throughput": 13594.17, "total_tokens": 23204096} +{"current_steps": 7370, "total_steps": 15621, "loss": 0.3513, "lr": 1.2699183962394182e-06, "epoch": 0.471800780999936, "percentage": 47.18, "elapsed_time": "0:28:27", "remaining_time": "0:31:51", "throughput": 13598.21, "total_tokens": 23219072} +{"current_steps": 7375, "total_steps": 15621, "loss": 0.4198, "lr": 1.2688423328195021e-06, "epoch": 0.4721208629409129, "percentage": 47.21, "elapsed_time": "0:28:28", "remaining_time": "0:31:49", "throughput": 13602.52, "total_tokens": 23234560} +{"current_steps": 7380, "total_steps": 15621, "loss": 0.3426, "lr": 1.267765933746777e-06, "epoch": 0.47244094488188976, "percentage": 47.24, "elapsed_time": "0:28:28", "remaining_time": "0:31:48", "throughput": 13606.9, "total_tokens": 23250304} +{"current_steps": 7385, "total_steps": 15621, "loss": 0.6245, "lr": 1.2666892003651397e-06, "epoch": 0.47276102682286664, "percentage": 47.28, "elapsed_time": "0:28:29", "remaining_time": "0:31:46", "throughput": 13611.08, "total_tokens": 23265664} +{"current_steps": 7390, "total_steps": 15621, "loss": 0.442, "lr": 1.2656121340189043e-06, "epoch": 0.4730811087638435, "percentage": 47.31, "elapsed_time": "0:28:29", "remaining_time": "0:31:44", "throughput": 13615.46, "total_tokens": 23281472} +{"current_steps": 7395, "total_steps": 15621, "loss": 0.411, "lr": 1.264534736052801e-06, "epoch": 0.47340119070482045, "percentage": 47.34, "elapsed_time": "0:28:30", "remaining_time": "0:31:42", "throughput": 13619.68, "total_tokens": 23297024} +{"current_steps": 7400, "total_steps": 15621, "loss": 0.4385, "lr": 1.2634570078119739e-06, "epoch": 0.47372127264579733, "percentage": 47.37, "elapsed_time": "0:28:31", "remaining_time": "0:31:41", "throughput": 13624.14, "total_tokens": 23313344} +{"current_steps": 7405, "total_steps": 15621, "loss": 0.5213, "lr": 1.262378950641979e-06, "epoch": 0.4740413545867742, "percentage": 47.4, "elapsed_time": "0:28:31", "remaining_time": "0:31:39", "throughput": 13628.23, "total_tokens": 23328512} +{"current_steps": 7410, "total_steps": 15621, "loss": 0.4465, "lr": 1.2613005658887836e-06, "epoch": 0.4743614365277511, "percentage": 47.44, "elapsed_time": "0:28:32", "remaining_time": "0:31:37", "throughput": 13631.84, "total_tokens": 23342400} +{"current_steps": 7415, "total_steps": 15621, "loss": 0.4134, "lr": 1.2602218548987637e-06, "epoch": 0.47468151846872797, "percentage": 47.47, "elapsed_time": "0:28:32", "remaining_time": "0:31:35", "throughput": 13636.24, "total_tokens": 23358400} +{"current_steps": 7420, "total_steps": 15621, "loss": 0.4102, "lr": 1.2591428190187029e-06, "epoch": 0.4750016004097049, "percentage": 47.5, "elapsed_time": "0:28:33", "remaining_time": "0:31:33", "throughput": 13640.29, "total_tokens": 23373376} +{"current_steps": 7425, "total_steps": 15621, "loss": 0.5013, "lr": 1.2580634595957898e-06, "epoch": 0.4753216823506818, "percentage": 47.53, "elapsed_time": "0:28:34", "remaining_time": "0:31:32", "throughput": 13645.07, "total_tokens": 23390400} +{"current_steps": 7430, "total_steps": 15621, "loss": 0.3705, "lr": 1.2569837779776172e-06, "epoch": 0.47564176429165866, "percentage": 47.56, "elapsed_time": "0:28:34", "remaining_time": "0:31:30", "throughput": 13649.45, "total_tokens": 23406400} +{"current_steps": 7435, "total_steps": 15621, "loss": 0.3131, "lr": 1.2559037755121804e-06, "epoch": 0.47596184623263554, "percentage": 47.6, "elapsed_time": "0:28:35", "remaining_time": "0:31:28", "throughput": 13653.62, "total_tokens": 23421824} +{"current_steps": 7440, "total_steps": 15621, "loss": 0.4512, "lr": 1.2548234535478754e-06, "epoch": 0.4762819281736124, "percentage": 47.63, "elapsed_time": "0:28:36", "remaining_time": "0:31:26", "throughput": 13658.21, "total_tokens": 23438272} +{"current_steps": 7445, "total_steps": 15621, "loss": 0.4216, "lr": 1.2537428134334968e-06, "epoch": 0.47660201011458936, "percentage": 47.66, "elapsed_time": "0:28:36", "remaining_time": "0:31:25", "throughput": 13662.8, "total_tokens": 23454976} +{"current_steps": 7450, "total_steps": 15621, "loss": 0.5189, "lr": 1.252661856518236e-06, "epoch": 0.47692209205556624, "percentage": 47.69, "elapsed_time": "0:28:37", "remaining_time": "0:31:23", "throughput": 13667.31, "total_tokens": 23471168} +{"current_steps": 7455, "total_steps": 15621, "loss": 0.3564, "lr": 1.251580584151681e-06, "epoch": 0.4772421739965431, "percentage": 47.72, "elapsed_time": "0:28:37", "remaining_time": "0:31:21", "throughput": 13671.54, "total_tokens": 23486720} +{"current_steps": 7460, "total_steps": 15621, "loss": 0.3059, "lr": 1.2504989976838129e-06, "epoch": 0.47756225593752, "percentage": 47.76, "elapsed_time": "0:28:38", "remaining_time": "0:31:20", "throughput": 13675.99, "total_tokens": 23502912} +{"current_steps": 7465, "total_steps": 15621, "loss": 0.3667, "lr": 1.2494170984650048e-06, "epoch": 0.4778823378784969, "percentage": 47.79, "elapsed_time": "0:28:39", "remaining_time": "0:31:18", "throughput": 13680.67, "total_tokens": 23519552} +{"current_steps": 7470, "total_steps": 15621, "loss": 0.4019, "lr": 1.248334887846021e-06, "epoch": 0.4782024198194738, "percentage": 47.82, "elapsed_time": "0:28:39", "remaining_time": "0:31:16", "throughput": 13685.16, "total_tokens": 23535936} +{"current_steps": 7475, "total_steps": 15621, "loss": 0.4373, "lr": 1.2472523671780135e-06, "epoch": 0.4785225017604507, "percentage": 47.85, "elapsed_time": "0:28:40", "remaining_time": "0:31:14", "throughput": 13689.19, "total_tokens": 23551040} +{"current_steps": 7480, "total_steps": 15621, "loss": 0.3115, "lr": 1.2461695378125233e-06, "epoch": 0.47884258370142757, "percentage": 47.88, "elapsed_time": "0:28:41", "remaining_time": "0:31:13", "throughput": 13693.22, "total_tokens": 23566208} +{"current_steps": 7485, "total_steps": 15621, "loss": 0.4197, "lr": 1.245086401101474e-06, "epoch": 0.47916266564240445, "percentage": 47.92, "elapsed_time": "0:28:41", "remaining_time": "0:31:11", "throughput": 13697.43, "total_tokens": 23581696} +{"current_steps": 7490, "total_steps": 15621, "loss": 0.4454, "lr": 1.2440029583971757e-06, "epoch": 0.47948274758338133, "percentage": 47.95, "elapsed_time": "0:28:42", "remaining_time": "0:31:09", "throughput": 13701.59, "total_tokens": 23597248} +{"current_steps": 7495, "total_steps": 15621, "loss": 0.4913, "lr": 1.2429192110523188e-06, "epoch": 0.4798028295243582, "percentage": 47.98, "elapsed_time": "0:28:42", "remaining_time": "0:31:07", "throughput": 13705.75, "total_tokens": 23612800} +{"current_steps": 7500, "total_steps": 15621, "loss": 0.3338, "lr": 1.2418351604199746e-06, "epoch": 0.48012291146533514, "percentage": 48.01, "elapsed_time": "0:28:43", "remaining_time": "0:31:06", "throughput": 13710.17, "total_tokens": 23629056} +{"current_steps": 7505, "total_steps": 15621, "loss": 0.4447, "lr": 1.2407508078535934e-06, "epoch": 0.480442993406312, "percentage": 48.04, "elapsed_time": "0:28:44", "remaining_time": "0:31:04", "throughput": 13714.26, "total_tokens": 23644352} +{"current_steps": 7510, "total_steps": 15621, "loss": 0.2785, "lr": 1.2396661547070017e-06, "epoch": 0.4807630753472889, "percentage": 48.08, "elapsed_time": "0:28:44", "remaining_time": "0:31:02", "throughput": 13718.89, "total_tokens": 23661120} +{"current_steps": 7515, "total_steps": 15621, "loss": 0.3347, "lr": 1.238581202334402e-06, "epoch": 0.4810831572882658, "percentage": 48.11, "elapsed_time": "0:28:45", "remaining_time": "0:31:01", "throughput": 13723.41, "total_tokens": 23677632} +{"current_steps": 7520, "total_steps": 15621, "loss": 0.3673, "lr": 1.2374959520903699e-06, "epoch": 0.48140323922924266, "percentage": 48.14, "elapsed_time": "0:28:45", "remaining_time": "0:30:59", "throughput": 13727.89, "total_tokens": 23693952} +{"current_steps": 7525, "total_steps": 15621, "loss": 0.3341, "lr": 1.2364104053298531e-06, "epoch": 0.4817233211702196, "percentage": 48.17, "elapsed_time": "0:28:46", "remaining_time": "0:30:57", "throughput": 13731.84, "total_tokens": 23708736} +{"current_steps": 7530, "total_steps": 15621, "loss": 0.3913, "lr": 1.2353245634081692e-06, "epoch": 0.4820434031111965, "percentage": 48.2, "elapsed_time": "0:28:47", "remaining_time": "0:30:55", "throughput": 13736.24, "total_tokens": 23724864} +{"current_steps": 7535, "total_steps": 15621, "loss": 0.4148, "lr": 1.2342384276810053e-06, "epoch": 0.48236348505217336, "percentage": 48.24, "elapsed_time": "0:28:47", "remaining_time": "0:30:54", "throughput": 13740.31, "total_tokens": 23740160} +{"current_steps": 7540, "total_steps": 15621, "loss": 0.423, "lr": 1.233151999504414e-06, "epoch": 0.48268356699315024, "percentage": 48.27, "elapsed_time": "0:28:48", "remaining_time": "0:30:52", "throughput": 13744.33, "total_tokens": 23755264} +{"current_steps": 7545, "total_steps": 15621, "loss": 0.3317, "lr": 1.232065280234814e-06, "epoch": 0.4830036489341271, "percentage": 48.3, "elapsed_time": "0:28:48", "remaining_time": "0:30:50", "throughput": 13748.19, "total_tokens": 23770112} +{"current_steps": 7550, "total_steps": 15621, "loss": 0.4189, "lr": 1.2309782712289867e-06, "epoch": 0.48332373087510405, "percentage": 48.33, "elapsed_time": "0:28:49", "remaining_time": "0:30:48", "throughput": 13752.37, "total_tokens": 23785536} +{"current_steps": 7555, "total_steps": 15621, "loss": 0.4307, "lr": 1.2298909738440758e-06, "epoch": 0.48364381281608093, "percentage": 48.36, "elapsed_time": "0:28:50", "remaining_time": "0:30:47", "throughput": 13756.64, "total_tokens": 23801280} +{"current_steps": 7560, "total_steps": 15621, "loss": 0.371, "lr": 1.2288033894375847e-06, "epoch": 0.4839638947570578, "percentage": 48.4, "elapsed_time": "0:28:50", "remaining_time": "0:30:45", "throughput": 13760.65, "total_tokens": 23816448} +{"current_steps": 7565, "total_steps": 15621, "loss": 0.5539, "lr": 1.2277155193673755e-06, "epoch": 0.4842839766980347, "percentage": 48.43, "elapsed_time": "0:28:51", "remaining_time": "0:30:43", "throughput": 13764.98, "total_tokens": 23832512} +{"current_steps": 7570, "total_steps": 15621, "loss": 0.3968, "lr": 1.2266273649916668e-06, "epoch": 0.48460405863901157, "percentage": 48.46, "elapsed_time": "0:28:51", "remaining_time": "0:30:42", "throughput": 13769.24, "total_tokens": 23848192} +{"current_steps": 7575, "total_steps": 15621, "loss": 0.4249, "lr": 1.2255389276690318e-06, "epoch": 0.48492414057998845, "percentage": 48.49, "elapsed_time": "0:28:52", "remaining_time": "0:30:40", "throughput": 13773.42, "total_tokens": 23863808} +{"current_steps": 7580, "total_steps": 15621, "loss": 0.2927, "lr": 1.2244502087583978e-06, "epoch": 0.4852442225209654, "percentage": 48.52, "elapsed_time": "0:28:53", "remaining_time": "0:30:38", "throughput": 13778.1, "total_tokens": 23880960} +{"current_steps": 7585, "total_steps": 15621, "loss": 0.3969, "lr": 1.2233612096190426e-06, "epoch": 0.48556430446194226, "percentage": 48.56, "elapsed_time": "0:28:53", "remaining_time": "0:30:36", "throughput": 13782.15, "total_tokens": 23896256} +{"current_steps": 7590, "total_steps": 15621, "loss": 0.5189, "lr": 1.222271931610595e-06, "epoch": 0.48588438640291914, "percentage": 48.59, "elapsed_time": "0:28:54", "remaining_time": "0:30:35", "throughput": 13786.66, "total_tokens": 23912832} +{"current_steps": 7595, "total_steps": 15621, "loss": 0.4929, "lr": 1.2211823760930306e-06, "epoch": 0.486204468343896, "percentage": 48.62, "elapsed_time": "0:28:55", "remaining_time": "0:30:33", "throughput": 13790.96, "total_tokens": 23928768} +{"current_steps": 7600, "total_steps": 15621, "loss": 0.4206, "lr": 1.2200925444266726e-06, "epoch": 0.4865245502848729, "percentage": 48.65, "elapsed_time": "0:28:55", "remaining_time": "0:30:31", "throughput": 13795.37, "total_tokens": 23945088} +{"current_steps": 7605, "total_steps": 15621, "loss": 0.5087, "lr": 1.219002437972189e-06, "epoch": 0.48684463222584984, "percentage": 48.68, "elapsed_time": "0:28:56", "remaining_time": "0:30:30", "throughput": 13799.33, "total_tokens": 23960192} +{"current_steps": 7610, "total_steps": 15621, "loss": 0.4208, "lr": 1.21791205809059e-06, "epoch": 0.4871647141668267, "percentage": 48.72, "elapsed_time": "0:28:56", "remaining_time": "0:30:28", "throughput": 13803.92, "total_tokens": 23977152} +{"current_steps": 7615, "total_steps": 15621, "loss": 0.3611, "lr": 1.2168214061432283e-06, "epoch": 0.4874847961078036, "percentage": 48.75, "elapsed_time": "0:28:57", "remaining_time": "0:30:26", "throughput": 13807.9, "total_tokens": 23992448} +{"current_steps": 7620, "total_steps": 15621, "loss": 0.4276, "lr": 1.2157304834917947e-06, "epoch": 0.4878048780487805, "percentage": 48.78, "elapsed_time": "0:28:58", "remaining_time": "0:30:25", "throughput": 13812.12, "total_tokens": 24008384} +{"current_steps": 7625, "total_steps": 15621, "loss": 0.6241, "lr": 1.2146392914983202e-06, "epoch": 0.48812495998975736, "percentage": 48.81, "elapsed_time": "0:28:58", "remaining_time": "0:30:23", "throughput": 13816.82, "total_tokens": 24025728} +{"current_steps": 7630, "total_steps": 15621, "loss": 0.5169, "lr": 1.2135478315251694e-06, "epoch": 0.4884450419307343, "percentage": 48.84, "elapsed_time": "0:28:59", "remaining_time": "0:30:21", "throughput": 13820.65, "total_tokens": 24040448} +{"current_steps": 7635, "total_steps": 15621, "loss": 0.3428, "lr": 1.2124561049350442e-06, "epoch": 0.48876512387171117, "percentage": 48.88, "elapsed_time": "0:29:00", "remaining_time": "0:30:20", "throughput": 13824.46, "total_tokens": 24055168} +{"current_steps": 7640, "total_steps": 15621, "loss": 0.453, "lr": 1.2113641130909772e-06, "epoch": 0.48908520581268805, "percentage": 48.91, "elapsed_time": "0:29:00", "remaining_time": "0:30:18", "throughput": 13828.31, "total_tokens": 24070016} +{"current_steps": 7645, "total_steps": 15621, "loss": 0.3108, "lr": 1.2102718573563334e-06, "epoch": 0.48940528775366493, "percentage": 48.94, "elapsed_time": "0:29:01", "remaining_time": "0:30:16", "throughput": 13832.14, "total_tokens": 24084800} +{"current_steps": 7650, "total_steps": 15621, "loss": 0.4842, "lr": 1.2091793390948066e-06, "epoch": 0.4897253696946418, "percentage": 48.97, "elapsed_time": "0:29:01", "remaining_time": "0:30:14", "throughput": 13836.28, "total_tokens": 24100416} +{"current_steps": 7655, "total_steps": 15621, "loss": 0.2906, "lr": 1.2080865596704191e-06, "epoch": 0.49004545163561875, "percentage": 49.0, "elapsed_time": "0:29:02", "remaining_time": "0:30:13", "throughput": 13840.75, "total_tokens": 24117120} +{"current_steps": 7660, "total_steps": 15621, "loss": 0.4391, "lr": 1.2069935204475187e-06, "epoch": 0.4903655335765956, "percentage": 49.04, "elapsed_time": "0:29:03", "remaining_time": "0:30:11", "throughput": 13844.68, "total_tokens": 24132224} +{"current_steps": 7665, "total_steps": 15621, "loss": 0.3992, "lr": 1.2059002227907776e-06, "epoch": 0.4906856155175725, "percentage": 49.07, "elapsed_time": "0:29:03", "remaining_time": "0:30:09", "throughput": 13848.7, "total_tokens": 24147712} +{"current_steps": 7670, "total_steps": 15621, "loss": 0.4121, "lr": 1.2048066680651908e-06, "epoch": 0.4910056974585494, "percentage": 49.1, "elapsed_time": "0:29:04", "remaining_time": "0:30:08", "throughput": 13853.16, "total_tokens": 24164288} +{"current_steps": 7675, "total_steps": 15621, "loss": 0.5577, "lr": 1.2037128576360743e-06, "epoch": 0.49132577939952626, "percentage": 49.13, "elapsed_time": "0:29:05", "remaining_time": "0:30:07", "throughput": 13861.27, "total_tokens": 24193728} +{"current_steps": 7680, "total_steps": 15621, "loss": 0.4148, "lr": 1.2026187928690627e-06, "epoch": 0.49164586134050314, "percentage": 49.16, "elapsed_time": "0:29:06", "remaining_time": "0:30:05", "throughput": 13865.22, "total_tokens": 24208832} +{"current_steps": 7685, "total_steps": 15621, "loss": 0.5085, "lr": 1.2015244751301098e-06, "epoch": 0.4919659432814801, "percentage": 49.2, "elapsed_time": "0:29:06", "remaining_time": "0:30:03", "throughput": 13869.0, "total_tokens": 24223424} +{"current_steps": 7690, "total_steps": 15621, "loss": 0.43, "lr": 1.2004299057854832e-06, "epoch": 0.49228602522245696, "percentage": 49.23, "elapsed_time": "0:29:07", "remaining_time": "0:30:01", "throughput": 13873.06, "total_tokens": 24238976} +{"current_steps": 7695, "total_steps": 15621, "loss": 0.3893, "lr": 1.1993350862017661e-06, "epoch": 0.49260610716343384, "percentage": 49.26, "elapsed_time": "0:29:07", "remaining_time": "0:30:00", "throughput": 13876.84, "total_tokens": 24253632} +{"current_steps": 7700, "total_steps": 15621, "loss": 0.3968, "lr": 1.1982400177458534e-06, "epoch": 0.4929261891044107, "percentage": 49.29, "elapsed_time": "0:29:08", "remaining_time": "0:29:58", "throughput": 13881.51, "total_tokens": 24270720} +{"current_steps": 7705, "total_steps": 15621, "loss": 0.4284, "lr": 1.197144701784951e-06, "epoch": 0.4932462710453876, "percentage": 49.32, "elapsed_time": "0:29:09", "remaining_time": "0:29:56", "throughput": 13885.21, "total_tokens": 24285312} +{"current_steps": 7710, "total_steps": 15621, "loss": 0.3926, "lr": 1.1960491396865735e-06, "epoch": 0.49356635298636453, "percentage": 49.36, "elapsed_time": "0:29:09", "remaining_time": "0:29:55", "throughput": 13889.09, "total_tokens": 24300352} +{"current_steps": 7715, "total_steps": 15621, "loss": 0.3458, "lr": 1.1949533328185435e-06, "epoch": 0.4938864349273414, "percentage": 49.39, "elapsed_time": "0:29:10", "remaining_time": "0:29:53", "throughput": 13893.56, "total_tokens": 24317056} +{"current_steps": 7720, "total_steps": 15621, "loss": 0.3741, "lr": 1.1938572825489883e-06, "epoch": 0.4942065168683183, "percentage": 49.42, "elapsed_time": "0:29:10", "remaining_time": "0:29:51", "throughput": 13897.84, "total_tokens": 24333184} +{"current_steps": 7725, "total_steps": 15621, "loss": 0.409, "lr": 1.1927609902463394e-06, "epoch": 0.49452659880929517, "percentage": 49.45, "elapsed_time": "0:29:11", "remaining_time": "0:29:50", "throughput": 13901.82, "total_tokens": 24348672} +{"current_steps": 7730, "total_steps": 15621, "loss": 0.4346, "lr": 1.1916644572793314e-06, "epoch": 0.49484668075027205, "percentage": 49.48, "elapsed_time": "0:29:12", "remaining_time": "0:29:48", "throughput": 13905.66, "total_tokens": 24363648} +{"current_steps": 7735, "total_steps": 15621, "loss": 0.4964, "lr": 1.190567685016998e-06, "epoch": 0.495166762691249, "percentage": 49.52, "elapsed_time": "0:29:12", "remaining_time": "0:29:46", "throughput": 13910.35, "total_tokens": 24380992} +{"current_steps": 7740, "total_steps": 15621, "loss": 0.4107, "lr": 1.189470674828672e-06, "epoch": 0.49548684463222586, "percentage": 49.55, "elapsed_time": "0:29:13", "remaining_time": "0:29:45", "throughput": 13914.12, "total_tokens": 24395776} +{"current_steps": 7745, "total_steps": 15621, "loss": 0.3878, "lr": 1.188373428083984e-06, "epoch": 0.49580692657320274, "percentage": 49.58, "elapsed_time": "0:29:13", "remaining_time": "0:29:43", "throughput": 13918.28, "total_tokens": 24411584} +{"current_steps": 7750, "total_steps": 15621, "loss": 0.5219, "lr": 1.1872759461528596e-06, "epoch": 0.4961270085141796, "percentage": 49.61, "elapsed_time": "0:29:14", "remaining_time": "0:29:41", "throughput": 13922.14, "total_tokens": 24426560} +{"current_steps": 7755, "total_steps": 15621, "loss": 0.39, "lr": 1.1861782304055174e-06, "epoch": 0.4964470904551565, "percentage": 49.64, "elapsed_time": "0:29:15", "remaining_time": "0:29:40", "throughput": 13926.12, "total_tokens": 24441856} +{"current_steps": 7760, "total_steps": 15621, "loss": 0.3345, "lr": 1.1850802822124686e-06, "epoch": 0.4967671723961334, "percentage": 49.68, "elapsed_time": "0:29:15", "remaining_time": "0:29:38", "throughput": 13930.2, "total_tokens": 24457472} +{"current_steps": 7765, "total_steps": 15621, "loss": 0.5005, "lr": 1.1839821029445143e-06, "epoch": 0.4970872543371103, "percentage": 49.71, "elapsed_time": "0:29:16", "remaining_time": "0:29:36", "throughput": 13933.9, "total_tokens": 24471936} +{"current_steps": 7770, "total_steps": 15621, "loss": 0.3195, "lr": 1.1828836939727442e-06, "epoch": 0.4974073362780872, "percentage": 49.74, "elapsed_time": "0:29:16", "remaining_time": "0:29:35", "throughput": 13938.0, "total_tokens": 24487616} +{"current_steps": 7775, "total_steps": 15621, "loss": 0.433, "lr": 1.181785056668535e-06, "epoch": 0.4977274182190641, "percentage": 49.77, "elapsed_time": "0:29:17", "remaining_time": "0:29:33", "throughput": 13942.25, "total_tokens": 24503936} +{"current_steps": 7780, "total_steps": 15621, "loss": 0.4212, "lr": 1.180686192403548e-06, "epoch": 0.49804750016004096, "percentage": 49.8, "elapsed_time": "0:29:18", "remaining_time": "0:29:31", "throughput": 13945.95, "total_tokens": 24518464} +{"current_steps": 7785, "total_steps": 15621, "loss": 0.3439, "lr": 1.1795871025497285e-06, "epoch": 0.49836758210101784, "percentage": 49.84, "elapsed_time": "0:29:18", "remaining_time": "0:29:30", "throughput": 13949.68, "total_tokens": 24533184} +{"current_steps": 7790, "total_steps": 15621, "loss": 0.4122, "lr": 1.1784877884793029e-06, "epoch": 0.49868766404199477, "percentage": 49.87, "elapsed_time": "0:29:19", "remaining_time": "0:29:28", "throughput": 13953.81, "total_tokens": 24548992} +{"current_steps": 7795, "total_steps": 15621, "loss": 0.3627, "lr": 1.1773882515647776e-06, "epoch": 0.49900774598297165, "percentage": 49.9, "elapsed_time": "0:29:19", "remaining_time": "0:29:26", "throughput": 13958.48, "total_tokens": 24566592} +{"current_steps": 7800, "total_steps": 15621, "loss": 0.4811, "lr": 1.1762884931789376e-06, "epoch": 0.49932782792394853, "percentage": 49.93, "elapsed_time": "0:29:20", "remaining_time": "0:29:25", "throughput": 13963.01, "total_tokens": 24583552} +{"current_steps": 7805, "total_steps": 15621, "loss": 0.4548, "lr": 1.1751885146948436e-06, "epoch": 0.4996479098649254, "percentage": 49.96, "elapsed_time": "0:29:21", "remaining_time": "0:29:23", "throughput": 13967.15, "total_tokens": 24599552} +{"current_steps": 7810, "total_steps": 15621, "loss": 0.3633, "lr": 1.1740883174858327e-06, "epoch": 0.4999679918059023, "percentage": 50.0, "elapsed_time": "0:29:21", "remaining_time": "0:29:22", "throughput": 13971.07, "total_tokens": 24614912} +{"current_steps": 7815, "total_steps": 15621, "loss": 0.3649, "lr": 1.1729879029255127e-06, "epoch": 0.5002880737468792, "percentage": 50.03, "elapsed_time": "0:29:22", "remaining_time": "0:29:20", "throughput": 13974.85, "total_tokens": 24629696} +{"current_steps": 7820, "total_steps": 15621, "loss": 0.3939, "lr": 1.171887272387765e-06, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:29:23", "remaining_time": "0:29:18", "throughput": 13979.17, "total_tokens": 24646208} +{"current_steps": 7820, "total_steps": 15621, "eval_loss": 0.4134162962436676, "epoch": 0.5006081556878561, "percentage": 50.06, "elapsed_time": "0:30:12", "remaining_time": "0:30:07", "throughput": 13600.06, "total_tokens": 24646208} +{"current_steps": 7825, "total_steps": 15621, "loss": 0.4985, "lr": 1.1707864272467397e-06, "epoch": 0.500928237628833, "percentage": 50.09, "elapsed_time": "0:30:43", "remaining_time": "0:30:36", "throughput": 13379.2, "total_tokens": 24661120} +{"current_steps": 7830, "total_steps": 15621, "loss": 0.423, "lr": 1.169685368876855e-06, "epoch": 0.5012483195698099, "percentage": 50.12, "elapsed_time": "0:30:43", "remaining_time": "0:30:34", "throughput": 13383.83, "total_tokens": 24678336} +{"current_steps": 7835, "total_steps": 15621, "loss": 0.5534, "lr": 1.1685840986527946e-06, "epoch": 0.5015684015107867, "percentage": 50.16, "elapsed_time": "0:30:44", "remaining_time": "0:30:32", "throughput": 13387.99, "total_tokens": 24694336} +{"current_steps": 7840, "total_steps": 15621, "loss": 0.4044, "lr": 1.1674826179495076e-06, "epoch": 0.5018884834517636, "percentage": 50.19, "elapsed_time": "0:30:45", "remaining_time": "0:30:31", "throughput": 13391.6, "total_tokens": 24708608} +{"current_steps": 7845, "total_steps": 15621, "loss": 0.415, "lr": 1.1663809281422056e-06, "epoch": 0.5022085653927405, "percentage": 50.22, "elapsed_time": "0:30:45", "remaining_time": "0:30:29", "throughput": 13395.76, "total_tokens": 24724672} +{"current_steps": 7850, "total_steps": 15621, "loss": 0.4562, "lr": 1.1652790306063615e-06, "epoch": 0.5025286473337174, "percentage": 50.25, "elapsed_time": "0:30:46", "remaining_time": "0:30:27", "throughput": 13399.84, "total_tokens": 24740608} +{"current_steps": 7855, "total_steps": 15621, "loss": 0.416, "lr": 1.164176926717707e-06, "epoch": 0.5028487292746944, "percentage": 50.28, "elapsed_time": "0:30:47", "remaining_time": "0:30:26", "throughput": 13404.62, "total_tokens": 24758528} +{"current_steps": 7860, "total_steps": 15621, "loss": 0.3702, "lr": 1.1630746178522315e-06, "epoch": 0.5031688112156713, "percentage": 50.32, "elapsed_time": "0:30:47", "remaining_time": "0:30:24", "throughput": 13408.28, "total_tokens": 24772992} +{"current_steps": 7865, "total_steps": 15621, "loss": 0.4398, "lr": 1.1619721053861816e-06, "epoch": 0.5034888931566481, "percentage": 50.35, "elapsed_time": "0:30:48", "remaining_time": "0:30:22", "throughput": 13412.14, "total_tokens": 24788160} +{"current_steps": 7870, "total_steps": 15621, "loss": 0.4093, "lr": 1.1608693906960558e-06, "epoch": 0.503808975097625, "percentage": 50.38, "elapsed_time": "0:30:48", "remaining_time": "0:30:20", "throughput": 13416.31, "total_tokens": 24804224} +{"current_steps": 7875, "total_steps": 15621, "loss": 0.4426, "lr": 1.1597664751586069e-06, "epoch": 0.5041290570386019, "percentage": 50.41, "elapsed_time": "0:30:49", "remaining_time": "0:30:19", "throughput": 13420.67, "total_tokens": 24820928} +{"current_steps": 7880, "total_steps": 15621, "loss": 0.3837, "lr": 1.1586633601508382e-06, "epoch": 0.5044491389795788, "percentage": 50.44, "elapsed_time": "0:30:50", "remaining_time": "0:30:17", "throughput": 13424.43, "total_tokens": 24835776} +{"current_steps": 7885, "total_steps": 15621, "loss": 0.3858, "lr": 1.1575600470500014e-06, "epoch": 0.5047692209205557, "percentage": 50.48, "elapsed_time": "0:30:50", "remaining_time": "0:30:15", "throughput": 13428.53, "total_tokens": 24851648} +{"current_steps": 7890, "total_steps": 15621, "loss": 0.42, "lr": 1.1564565372335957e-06, "epoch": 0.5050893028615325, "percentage": 50.51, "elapsed_time": "0:30:51", "remaining_time": "0:30:13", "throughput": 13432.45, "total_tokens": 24866880} +{"current_steps": 7895, "total_steps": 15621, "loss": 0.3162, "lr": 1.1553528320793663e-06, "epoch": 0.5054093848025094, "percentage": 50.54, "elapsed_time": "0:30:51", "remaining_time": "0:30:12", "throughput": 13436.26, "total_tokens": 24881856} +{"current_steps": 7900, "total_steps": 15621, "loss": 0.4364, "lr": 1.1542489329653022e-06, "epoch": 0.5057294667434863, "percentage": 50.57, "elapsed_time": "0:30:52", "remaining_time": "0:30:10", "throughput": 13440.67, "total_tokens": 24898560} +{"current_steps": 7905, "total_steps": 15621, "loss": 0.3754, "lr": 1.1531448412696343e-06, "epoch": 0.5060495486844632, "percentage": 50.6, "elapsed_time": "0:30:53", "remaining_time": "0:30:08", "throughput": 13444.36, "total_tokens": 24913216} +{"current_steps": 7910, "total_steps": 15621, "loss": 0.4913, "lr": 1.1520405583708337e-06, "epoch": 0.5063696306254402, "percentage": 50.64, "elapsed_time": "0:30:53", "remaining_time": "0:30:07", "throughput": 13448.35, "total_tokens": 24928832} +{"current_steps": 7915, "total_steps": 15621, "loss": 0.4917, "lr": 1.1509360856476109e-06, "epoch": 0.506689712566417, "percentage": 50.67, "elapsed_time": "0:30:54", "remaining_time": "0:30:05", "throughput": 13452.37, "total_tokens": 24944512} +{"current_steps": 7920, "total_steps": 15621, "loss": 0.4612, "lr": 1.149831424478913e-06, "epoch": 0.5070097945073939, "percentage": 50.7, "elapsed_time": "0:30:54", "remaining_time": "0:30:03", "throughput": 13456.22, "total_tokens": 24959744} +{"current_steps": 7925, "total_steps": 15621, "loss": 0.3948, "lr": 1.1487265762439224e-06, "epoch": 0.5073298764483708, "percentage": 50.73, "elapsed_time": "0:30:55", "remaining_time": "0:30:01", "throughput": 13460.24, "total_tokens": 24975488} +{"current_steps": 7930, "total_steps": 15621, "loss": 0.362, "lr": 1.1476215423220547e-06, "epoch": 0.5076499583893477, "percentage": 50.76, "elapsed_time": "0:30:56", "remaining_time": "0:30:00", "throughput": 13463.95, "total_tokens": 24990272} +{"current_steps": 7935, "total_steps": 15621, "loss": 0.3761, "lr": 1.146516324092959e-06, "epoch": 0.5079700403303246, "percentage": 50.8, "elapsed_time": "0:30:56", "remaining_time": "0:29:58", "throughput": 13468.09, "total_tokens": 25006272} +{"current_steps": 7940, "total_steps": 15621, "loss": 0.2954, "lr": 1.1454109229365117e-06, "epoch": 0.5082901222713014, "percentage": 50.83, "elapsed_time": "0:30:57", "remaining_time": "0:29:56", "throughput": 13472.26, "total_tokens": 25022464} +{"current_steps": 7945, "total_steps": 15621, "loss": 0.3132, "lr": 1.14430534023282e-06, "epoch": 0.5086102042122783, "percentage": 50.86, "elapsed_time": "0:30:57", "remaining_time": "0:29:55", "throughput": 13476.01, "total_tokens": 25037376} +{"current_steps": 7950, "total_steps": 15621, "loss": 0.4736, "lr": 1.1431995773622167e-06, "epoch": 0.5089302861532552, "percentage": 50.89, "elapsed_time": "0:30:58", "remaining_time": "0:29:53", "throughput": 13480.14, "total_tokens": 25053440} +{"current_steps": 7955, "total_steps": 15621, "loss": 0.4369, "lr": 1.1420936357052597e-06, "epoch": 0.5092503680942321, "percentage": 50.93, "elapsed_time": "0:30:59", "remaining_time": "0:29:51", "throughput": 13484.13, "total_tokens": 25069120} +{"current_steps": 7960, "total_steps": 15621, "loss": 0.3078, "lr": 1.1409875166427303e-06, "epoch": 0.5095704500352091, "percentage": 50.96, "elapsed_time": "0:30:59", "remaining_time": "0:29:49", "throughput": 13487.97, "total_tokens": 25084224} +{"current_steps": 7965, "total_steps": 15621, "loss": 0.4996, "lr": 1.1398812215556308e-06, "epoch": 0.509890531976186, "percentage": 50.99, "elapsed_time": "0:31:00", "remaining_time": "0:29:48", "throughput": 13491.86, "total_tokens": 25099520} +{"current_steps": 7970, "total_steps": 15621, "loss": 0.362, "lr": 1.1387747518251837e-06, "epoch": 0.5102106139171628, "percentage": 51.02, "elapsed_time": "0:31:00", "remaining_time": "0:29:46", "throughput": 13495.85, "total_tokens": 25115200} +{"current_steps": 7975, "total_steps": 15621, "loss": 0.3266, "lr": 1.13766810883283e-06, "epoch": 0.5105306958581397, "percentage": 51.05, "elapsed_time": "0:31:01", "remaining_time": "0:29:44", "throughput": 13500.03, "total_tokens": 25131520} +{"current_steps": 7980, "total_steps": 15621, "loss": 0.5172, "lr": 1.1365612939602255e-06, "epoch": 0.5108507777991166, "percentage": 51.09, "elapsed_time": "0:31:02", "remaining_time": "0:29:43", "throughput": 13504.17, "total_tokens": 25147776} +{"current_steps": 7985, "total_steps": 15621, "loss": 0.3683, "lr": 1.1354543085892423e-06, "epoch": 0.5111708597400935, "percentage": 51.12, "elapsed_time": "0:31:02", "remaining_time": "0:29:41", "throughput": 13507.92, "total_tokens": 25162816} +{"current_steps": 7990, "total_steps": 15621, "loss": 0.3333, "lr": 1.1343471541019646e-06, "epoch": 0.5114909416810703, "percentage": 51.15, "elapsed_time": "0:31:03", "remaining_time": "0:29:39", "throughput": 13511.98, "total_tokens": 25178752} +{"current_steps": 7995, "total_steps": 15621, "loss": 0.3719, "lr": 1.1332398318806872e-06, "epoch": 0.5118110236220472, "percentage": 51.18, "elapsed_time": "0:31:04", "remaining_time": "0:29:38", "throughput": 13515.79, "total_tokens": 25194048} +{"current_steps": 8000, "total_steps": 15621, "loss": 0.3796, "lr": 1.1321323433079158e-06, "epoch": 0.5121311055630241, "percentage": 51.21, "elapsed_time": "0:31:04", "remaining_time": "0:29:36", "throughput": 13519.6, "total_tokens": 25209216} +{"current_steps": 8005, "total_steps": 15621, "loss": 0.379, "lr": 1.1310246897663623e-06, "epoch": 0.512451187504001, "percentage": 51.25, "elapsed_time": "0:31:05", "remaining_time": "0:29:34", "throughput": 13523.47, "total_tokens": 25224640} +{"current_steps": 8010, "total_steps": 15621, "loss": 0.408, "lr": 1.1299168726389447e-06, "epoch": 0.5127712694449779, "percentage": 51.28, "elapsed_time": "0:31:05", "remaining_time": "0:29:32", "throughput": 13527.21, "total_tokens": 25239808} +{"current_steps": 8015, "total_steps": 15621, "loss": 0.3354, "lr": 1.1288088933087868e-06, "epoch": 0.5130913513859549, "percentage": 51.31, "elapsed_time": "0:31:06", "remaining_time": "0:29:31", "throughput": 13531.77, "total_tokens": 25257344} +{"current_steps": 8020, "total_steps": 15621, "loss": 0.3365, "lr": 1.1277007531592127e-06, "epoch": 0.5134114333269317, "percentage": 51.34, "elapsed_time": "0:31:07", "remaining_time": "0:29:29", "throughput": 13535.39, "total_tokens": 25272064} +{"current_steps": 8025, "total_steps": 15621, "loss": 0.3619, "lr": 1.1265924535737492e-06, "epoch": 0.5137315152679086, "percentage": 51.37, "elapsed_time": "0:31:07", "remaining_time": "0:29:27", "throughput": 13539.4, "total_tokens": 25287936} +{"current_steps": 8030, "total_steps": 15621, "loss": 0.3007, "lr": 1.125483995936121e-06, "epoch": 0.5140515972088855, "percentage": 51.41, "elapsed_time": "0:31:08", "remaining_time": "0:29:26", "throughput": 13543.19, "total_tokens": 25303232} +{"current_steps": 8035, "total_steps": 15621, "loss": 0.376, "lr": 1.1243753816302507e-06, "epoch": 0.5143716791498624, "percentage": 51.44, "elapsed_time": "0:31:08", "remaining_time": "0:29:24", "throughput": 13547.02, "total_tokens": 25318656} +{"current_steps": 8040, "total_steps": 15621, "loss": 0.417, "lr": 1.1232666120402558e-06, "epoch": 0.5146917610908393, "percentage": 51.47, "elapsed_time": "0:31:09", "remaining_time": "0:29:22", "throughput": 13550.73, "total_tokens": 25333760} +{"current_steps": 8045, "total_steps": 15621, "loss": 0.3827, "lr": 1.1221576885504487e-06, "epoch": 0.5150118430318161, "percentage": 51.5, "elapsed_time": "0:31:10", "remaining_time": "0:29:21", "throughput": 13554.77, "total_tokens": 25349824} +{"current_steps": 8050, "total_steps": 15621, "loss": 0.4027, "lr": 1.121048612545333e-06, "epoch": 0.515331924972793, "percentage": 51.53, "elapsed_time": "0:31:10", "remaining_time": "0:29:19", "throughput": 13558.64, "total_tokens": 25365376} +{"current_steps": 8055, "total_steps": 15621, "loss": 0.4599, "lr": 1.1199393854096034e-06, "epoch": 0.5156520069137699, "percentage": 51.57, "elapsed_time": "0:31:11", "remaining_time": "0:29:17", "throughput": 13562.52, "total_tokens": 25380928} +{"current_steps": 8060, "total_steps": 15621, "loss": 0.3487, "lr": 1.118830008528143e-06, "epoch": 0.5159720888547468, "percentage": 51.6, "elapsed_time": "0:31:12", "remaining_time": "0:29:16", "throughput": 13566.33, "total_tokens": 25396352} +{"current_steps": 8065, "total_steps": 15621, "loss": 0.3159, "lr": 1.1177204832860212e-06, "epoch": 0.5162921707957238, "percentage": 51.63, "elapsed_time": "0:31:12", "remaining_time": "0:29:14", "throughput": 13570.07, "total_tokens": 25411456} +{"current_steps": 8070, "total_steps": 15621, "loss": 0.4322, "lr": 1.1166108110684947e-06, "epoch": 0.5166122527367006, "percentage": 51.66, "elapsed_time": "0:31:13", "remaining_time": "0:29:12", "throughput": 13574.48, "total_tokens": 25428544} +{"current_steps": 8075, "total_steps": 15621, "loss": 0.3988, "lr": 1.1155009932610003e-06, "epoch": 0.5169323346776775, "percentage": 51.69, "elapsed_time": "0:31:13", "remaining_time": "0:29:11", "throughput": 13578.33, "total_tokens": 25443968} +{"current_steps": 8080, "total_steps": 15621, "loss": 0.3273, "lr": 1.1143910312491605e-06, "epoch": 0.5172524166186544, "percentage": 51.73, "elapsed_time": "0:31:14", "remaining_time": "0:29:09", "throughput": 13582.03, "total_tokens": 25458880} +{"current_steps": 8085, "total_steps": 15621, "loss": 0.3196, "lr": 1.1132809264187748e-06, "epoch": 0.5175724985596313, "percentage": 51.76, "elapsed_time": "0:31:15", "remaining_time": "0:29:07", "throughput": 13585.84, "total_tokens": 25474304} +{"current_steps": 8090, "total_steps": 15621, "loss": 0.3884, "lr": 1.1121706801558226e-06, "epoch": 0.5178925805006082, "percentage": 51.79, "elapsed_time": "0:31:15", "remaining_time": "0:29:06", "throughput": 13589.53, "total_tokens": 25489472} +{"current_steps": 8095, "total_steps": 15621, "loss": 0.3827, "lr": 1.111060293846459e-06, "epoch": 0.518212662441585, "percentage": 51.82, "elapsed_time": "0:31:16", "remaining_time": "0:29:04", "throughput": 13593.33, "total_tokens": 25504896} +{"current_steps": 8100, "total_steps": 15621, "loss": 0.4807, "lr": 1.1099497688770148e-06, "epoch": 0.5185327443825619, "percentage": 51.85, "elapsed_time": "0:31:16", "remaining_time": "0:29:02", "throughput": 13596.84, "total_tokens": 25519360} +{"current_steps": 8105, "total_steps": 15621, "loss": 0.4418, "lr": 1.1088391066339928e-06, "epoch": 0.5188528263235388, "percentage": 51.89, "elapsed_time": "0:31:17", "remaining_time": "0:29:01", "throughput": 13600.93, "total_tokens": 25535680} +{"current_steps": 8110, "total_steps": 15621, "loss": 0.5327, "lr": 1.1077283085040684e-06, "epoch": 0.5191729082645157, "percentage": 51.92, "elapsed_time": "0:31:18", "remaining_time": "0:28:59", "throughput": 13604.57, "total_tokens": 25550592} +{"current_steps": 8115, "total_steps": 15621, "loss": 0.4083, "lr": 1.1066173758740863e-06, "epoch": 0.5194929902054926, "percentage": 51.95, "elapsed_time": "0:31:18", "remaining_time": "0:28:57", "throughput": 13608.24, "total_tokens": 25565696} +{"current_steps": 8120, "total_steps": 15621, "loss": 0.3485, "lr": 1.105506310131058e-06, "epoch": 0.5198130721464695, "percentage": 51.98, "elapsed_time": "0:31:19", "remaining_time": "0:28:56", "throughput": 13612.22, "total_tokens": 25581568} +{"current_steps": 8125, "total_steps": 15621, "loss": 0.466, "lr": 1.1043951126621634e-06, "epoch": 0.5201331540874464, "percentage": 52.01, "elapsed_time": "0:31:19", "remaining_time": "0:28:54", "throughput": 13616.22, "total_tokens": 25597760} +{"current_steps": 8130, "total_steps": 15621, "loss": 0.4111, "lr": 1.1032837848547445e-06, "epoch": 0.5204532360284233, "percentage": 52.05, "elapsed_time": "0:31:20", "remaining_time": "0:28:52", "throughput": 13620.72, "total_tokens": 25615424} +{"current_steps": 8135, "total_steps": 15621, "loss": 0.4094, "lr": 1.1021723280963074e-06, "epoch": 0.5207733179694002, "percentage": 52.08, "elapsed_time": "0:31:21", "remaining_time": "0:28:51", "throughput": 13624.49, "total_tokens": 25630720} +{"current_steps": 8140, "total_steps": 15621, "loss": 0.4886, "lr": 1.1010607437745194e-06, "epoch": 0.5210933999103771, "percentage": 52.11, "elapsed_time": "0:31:21", "remaining_time": "0:28:49", "throughput": 13629.27, "total_tokens": 25649280} +{"current_steps": 8145, "total_steps": 15621, "loss": 0.5002, "lr": 1.0999490332772057e-06, "epoch": 0.5214134818513539, "percentage": 52.14, "elapsed_time": "0:31:22", "remaining_time": "0:28:47", "throughput": 13632.97, "total_tokens": 25664576} +{"current_steps": 8150, "total_steps": 15621, "loss": 0.4193, "lr": 1.0988371979923507e-06, "epoch": 0.5217335637923308, "percentage": 52.17, "elapsed_time": "0:31:23", "remaining_time": "0:28:46", "throughput": 13636.91, "total_tokens": 25680384} +{"current_steps": 8155, "total_steps": 15621, "loss": 0.4017, "lr": 1.097725239308094e-06, "epoch": 0.5220536457333077, "percentage": 52.21, "elapsed_time": "0:31:23", "remaining_time": "0:28:44", "throughput": 13640.82, "total_tokens": 25696128} +{"current_steps": 8160, "total_steps": 15621, "loss": 0.2794, "lr": 1.0966131586127278e-06, "epoch": 0.5223737276742846, "percentage": 52.24, "elapsed_time": "0:31:24", "remaining_time": "0:28:42", "throughput": 13645.0, "total_tokens": 25712768} +{"current_steps": 8165, "total_steps": 15621, "loss": 0.4033, "lr": 1.0955009572946992e-06, "epoch": 0.5226938096152615, "percentage": 52.27, "elapsed_time": "0:31:24", "remaining_time": "0:28:41", "throughput": 13648.62, "total_tokens": 25727616} +{"current_steps": 8170, "total_steps": 15621, "loss": 0.4149, "lr": 1.094388636742604e-06, "epoch": 0.5230138915562383, "percentage": 52.3, "elapsed_time": "0:31:25", "remaining_time": "0:28:39", "throughput": 13652.89, "total_tokens": 25744384} +{"current_steps": 8175, "total_steps": 15621, "loss": 0.3376, "lr": 1.0932761983451878e-06, "epoch": 0.5233339734972153, "percentage": 52.33, "elapsed_time": "0:31:26", "remaining_time": "0:28:38", "throughput": 13656.91, "total_tokens": 25760640} +{"current_steps": 8180, "total_steps": 15621, "loss": 0.3116, "lr": 1.0921636434913425e-06, "epoch": 0.5236540554381922, "percentage": 52.37, "elapsed_time": "0:31:26", "remaining_time": "0:28:36", "throughput": 13660.89, "total_tokens": 25776640} +{"current_steps": 8185, "total_steps": 15621, "loss": 0.2977, "lr": 1.091050973570106e-06, "epoch": 0.5239741373791691, "percentage": 52.4, "elapsed_time": "0:31:27", "remaining_time": "0:28:34", "throughput": 13664.59, "total_tokens": 25791744} +{"current_steps": 8190, "total_steps": 15621, "loss": 0.5531, "lr": 1.08993818997066e-06, "epoch": 0.524294219320146, "percentage": 52.43, "elapsed_time": "0:31:28", "remaining_time": "0:28:33", "throughput": 13668.74, "total_tokens": 25808256} +{"current_steps": 8195, "total_steps": 15621, "loss": 0.4378, "lr": 1.0888252940823283e-06, "epoch": 0.5246143012611229, "percentage": 52.46, "elapsed_time": "0:31:28", "remaining_time": "0:28:31", "throughput": 13672.65, "total_tokens": 25824128} +{"current_steps": 8200, "total_steps": 15621, "loss": 0.4676, "lr": 1.0877122872945737e-06, "epoch": 0.5249343832020997, "percentage": 52.49, "elapsed_time": "0:31:29", "remaining_time": "0:28:29", "throughput": 13676.81, "total_tokens": 25840576} +{"current_steps": 8205, "total_steps": 15621, "loss": 0.317, "lr": 1.0865991709969983e-06, "epoch": 0.5252544651430766, "percentage": 52.53, "elapsed_time": "0:31:29", "remaining_time": "0:28:28", "throughput": 13680.67, "total_tokens": 25856256} +{"current_steps": 8210, "total_steps": 15621, "loss": 0.4482, "lr": 1.0854859465793416e-06, "epoch": 0.5255745470840535, "percentage": 52.56, "elapsed_time": "0:31:30", "remaining_time": "0:28:26", "throughput": 13684.4, "total_tokens": 25871424} +{"current_steps": 8215, "total_steps": 15621, "loss": 0.4974, "lr": 1.0843726154314767e-06, "epoch": 0.5258946290250304, "percentage": 52.59, "elapsed_time": "0:31:31", "remaining_time": "0:28:24", "throughput": 13688.01, "total_tokens": 25886272} +{"current_steps": 8220, "total_steps": 15621, "loss": 0.4376, "lr": 1.083259178943411e-06, "epoch": 0.5262147109660072, "percentage": 52.62, "elapsed_time": "0:31:31", "remaining_time": "0:28:23", "throughput": 13691.88, "total_tokens": 25901952} +{"current_steps": 8225, "total_steps": 15621, "loss": 0.3694, "lr": 1.0821456385052822e-06, "epoch": 0.5265347929069842, "percentage": 52.65, "elapsed_time": "0:31:32", "remaining_time": "0:28:21", "throughput": 13695.77, "total_tokens": 25917888} +{"current_steps": 8230, "total_steps": 15621, "loss": 0.4199, "lr": 1.0810319955073598e-06, "epoch": 0.5268548748479611, "percentage": 52.69, "elapsed_time": "0:31:33", "remaining_time": "0:28:20", "throughput": 13699.69, "total_tokens": 25933824} +{"current_steps": 8235, "total_steps": 15621, "loss": 0.3888, "lr": 1.0799182513400393e-06, "epoch": 0.527174956788938, "percentage": 52.72, "elapsed_time": "0:31:33", "remaining_time": "0:28:18", "throughput": 13704.11, "total_tokens": 25951360} +{"current_steps": 8240, "total_steps": 15621, "loss": 0.3594, "lr": 1.0788044073938438e-06, "epoch": 0.5274950387299149, "percentage": 52.75, "elapsed_time": "0:31:34", "remaining_time": "0:28:16", "throughput": 13708.04, "total_tokens": 25967232} +{"current_steps": 8245, "total_steps": 15621, "loss": 0.4146, "lr": 1.0776904650594205e-06, "epoch": 0.5278151206708918, "percentage": 52.78, "elapsed_time": "0:31:34", "remaining_time": "0:28:15", "throughput": 13711.79, "total_tokens": 25982592} +{"current_steps": 8250, "total_steps": 15621, "loss": 0.4094, "lr": 1.0765764257275394e-06, "epoch": 0.5281352026118686, "percentage": 52.81, "elapsed_time": "0:31:35", "remaining_time": "0:28:13", "throughput": 13715.51, "total_tokens": 25997824} +{"current_steps": 8255, "total_steps": 15621, "loss": 0.4292, "lr": 1.0754622907890914e-06, "epoch": 0.5284552845528455, "percentage": 52.85, "elapsed_time": "0:31:36", "remaining_time": "0:28:11", "throughput": 13719.42, "total_tokens": 26013632} +{"current_steps": 8260, "total_steps": 15621, "loss": 0.3249, "lr": 1.0743480616350873e-06, "epoch": 0.5287753664938224, "percentage": 52.88, "elapsed_time": "0:31:36", "remaining_time": "0:28:10", "throughput": 13723.07, "total_tokens": 26028800} +{"current_steps": 8265, "total_steps": 15621, "loss": 0.339, "lr": 1.0732337396566558e-06, "epoch": 0.5290954484347993, "percentage": 52.91, "elapsed_time": "0:31:37", "remaining_time": "0:28:08", "throughput": 13726.92, "total_tokens": 26044672} +{"current_steps": 8270, "total_steps": 15621, "loss": 0.396, "lr": 1.07211932624504e-06, "epoch": 0.5294155303757762, "percentage": 52.94, "elapsed_time": "0:31:37", "remaining_time": "0:28:07", "throughput": 13730.81, "total_tokens": 26060544} +{"current_steps": 8275, "total_steps": 15621, "loss": 0.3786, "lr": 1.0710048227915988e-06, "epoch": 0.529735612316753, "percentage": 52.97, "elapsed_time": "0:31:38", "remaining_time": "0:28:05", "throughput": 13734.62, "total_tokens": 26076160} +{"current_steps": 8280, "total_steps": 15621, "loss": 0.4186, "lr": 1.0698902306878024e-06, "epoch": 0.53005569425773, "percentage": 53.01, "elapsed_time": "0:31:39", "remaining_time": "0:28:03", "throughput": 13738.64, "total_tokens": 26092352} +{"current_steps": 8285, "total_steps": 15621, "loss": 0.3024, "lr": 1.0687755513252325e-06, "epoch": 0.5303757761987069, "percentage": 53.04, "elapsed_time": "0:31:39", "remaining_time": "0:28:02", "throughput": 13742.33, "total_tokens": 26107776} +{"current_steps": 8290, "total_steps": 15621, "loss": 0.31, "lr": 1.0676607860955794e-06, "epoch": 0.5306958581396838, "percentage": 53.07, "elapsed_time": "0:31:40", "remaining_time": "0:28:00", "throughput": 13746.26, "total_tokens": 26123712} +{"current_steps": 8295, "total_steps": 15621, "loss": 0.386, "lr": 1.0665459363906404e-06, "epoch": 0.5310159400806607, "percentage": 53.1, "elapsed_time": "0:31:41", "remaining_time": "0:27:58", "throughput": 13750.05, "total_tokens": 26139200} +{"current_steps": 8300, "total_steps": 15621, "loss": 0.4355, "lr": 1.0654310036023185e-06, "epoch": 0.5313360220216375, "percentage": 53.13, "elapsed_time": "0:31:41", "remaining_time": "0:27:57", "throughput": 13753.47, "total_tokens": 26153600} +{"current_steps": 8305, "total_steps": 15621, "loss": 0.4206, "lr": 1.0643159891226203e-06, "epoch": 0.5316561039626144, "percentage": 53.17, "elapsed_time": "0:31:42", "remaining_time": "0:27:55", "throughput": 13757.4, "total_tokens": 26169600} +{"current_steps": 8310, "total_steps": 15621, "loss": 0.3398, "lr": 1.0632008943436545e-06, "epoch": 0.5319761859035913, "percentage": 53.2, "elapsed_time": "0:31:42", "remaining_time": "0:27:54", "throughput": 13761.38, "total_tokens": 26185536} +{"current_steps": 8315, "total_steps": 15621, "loss": 0.453, "lr": 1.0620857206576299e-06, "epoch": 0.5322962678445682, "percentage": 53.23, "elapsed_time": "0:31:43", "remaining_time": "0:27:52", "throughput": 13765.27, "total_tokens": 26201536} +{"current_steps": 8320, "total_steps": 15621, "loss": 0.2888, "lr": 1.0609704694568546e-06, "epoch": 0.5326163497855451, "percentage": 53.26, "elapsed_time": "0:31:44", "remaining_time": "0:27:50", "throughput": 13768.87, "total_tokens": 26216576} +{"current_steps": 8325, "total_steps": 15621, "loss": 0.2904, "lr": 1.0598551421337318e-06, "epoch": 0.5329364317265219, "percentage": 53.29, "elapsed_time": "0:31:44", "remaining_time": "0:27:49", "throughput": 13772.79, "total_tokens": 26232640} +{"current_steps": 8330, "total_steps": 15621, "loss": 0.5146, "lr": 1.0587397400807617e-06, "epoch": 0.5332565136674989, "percentage": 53.33, "elapsed_time": "0:31:45", "remaining_time": "0:27:47", "throughput": 13776.61, "total_tokens": 26248448} +{"current_steps": 8335, "total_steps": 15621, "loss": 0.519, "lr": 1.057624264690536e-06, "epoch": 0.5335765956084758, "percentage": 53.36, "elapsed_time": "0:31:45", "remaining_time": "0:27:46", "throughput": 13780.32, "total_tokens": 26263872} +{"current_steps": 8340, "total_steps": 15621, "loss": 0.4598, "lr": 1.0565087173557394e-06, "epoch": 0.5338966775494527, "percentage": 53.39, "elapsed_time": "0:31:46", "remaining_time": "0:27:44", "throughput": 13784.24, "total_tokens": 26279872} +{"current_steps": 8345, "total_steps": 15621, "loss": 0.3428, "lr": 1.055393099469146e-06, "epoch": 0.5342167594904296, "percentage": 53.42, "elapsed_time": "0:31:47", "remaining_time": "0:27:42", "throughput": 13788.15, "total_tokens": 26295680} +{"current_steps": 8350, "total_steps": 15621, "loss": 0.4057, "lr": 1.054277412423617e-06, "epoch": 0.5345368414314065, "percentage": 53.45, "elapsed_time": "0:31:47", "remaining_time": "0:27:41", "throughput": 13791.84, "total_tokens": 26311040} +{"current_steps": 8355, "total_steps": 15621, "loss": 0.4603, "lr": 1.0531616576121017e-06, "epoch": 0.5348569233723833, "percentage": 53.49, "elapsed_time": "0:31:48", "remaining_time": "0:27:39", "throughput": 13795.5, "total_tokens": 26326144} +{"current_steps": 8360, "total_steps": 15621, "loss": 0.3347, "lr": 1.0520458364276325e-06, "epoch": 0.5351770053133602, "percentage": 53.52, "elapsed_time": "0:31:48", "remaining_time": "0:27:37", "throughput": 13799.29, "total_tokens": 26341952} +{"current_steps": 8365, "total_steps": 15621, "loss": 0.3565, "lr": 1.0509299502633256e-06, "epoch": 0.5354970872543371, "percentage": 53.55, "elapsed_time": "0:31:49", "remaining_time": "0:27:36", "throughput": 13802.77, "total_tokens": 26356672} +{"current_steps": 8370, "total_steps": 15621, "loss": 0.4493, "lr": 1.0498140005123777e-06, "epoch": 0.535817169195314, "percentage": 53.58, "elapsed_time": "0:31:50", "remaining_time": "0:27:34", "throughput": 13806.74, "total_tokens": 26373056} +{"current_steps": 8375, "total_steps": 15621, "loss": 0.426, "lr": 1.0486979885680653e-06, "epoch": 0.5361372511362908, "percentage": 53.61, "elapsed_time": "0:31:50", "remaining_time": "0:27:33", "throughput": 13810.29, "total_tokens": 26388032} +{"current_steps": 8380, "total_steps": 15621, "loss": 0.4115, "lr": 1.0475819158237424e-06, "epoch": 0.5364573330772677, "percentage": 53.65, "elapsed_time": "0:31:51", "remaining_time": "0:27:31", "throughput": 13813.81, "total_tokens": 26402880} +{"current_steps": 8385, "total_steps": 15621, "loss": 0.4713, "lr": 1.0464657836728389e-06, "epoch": 0.5367774150182447, "percentage": 53.68, "elapsed_time": "0:31:51", "remaining_time": "0:27:29", "throughput": 13817.8, "total_tokens": 26419328} +{"current_steps": 8390, "total_steps": 15621, "loss": 0.3981, "lr": 1.045349593508859e-06, "epoch": 0.5370974969592216, "percentage": 53.71, "elapsed_time": "0:31:52", "remaining_time": "0:27:28", "throughput": 13821.33, "total_tokens": 26434112} +{"current_steps": 8395, "total_steps": 15621, "loss": 0.297, "lr": 1.0442333467253788e-06, "epoch": 0.5374175789001985, "percentage": 53.74, "elapsed_time": "0:31:53", "remaining_time": "0:27:26", "throughput": 13825.38, "total_tokens": 26450688} +{"current_steps": 8400, "total_steps": 15621, "loss": 0.3602, "lr": 1.0431170447160463e-06, "epoch": 0.5377376608411754, "percentage": 53.77, "elapsed_time": "0:31:53", "remaining_time": "0:27:25", "throughput": 13829.13, "total_tokens": 26466368} +{"current_steps": 8405, "total_steps": 15621, "loss": 0.3495, "lr": 1.0420006888745767e-06, "epoch": 0.5380577427821522, "percentage": 53.81, "elapsed_time": "0:31:54", "remaining_time": "0:27:23", "throughput": 13833.07, "total_tokens": 26482624} +{"current_steps": 8410, "total_steps": 15621, "loss": 0.3668, "lr": 1.0408842805947543e-06, "epoch": 0.5383778247231291, "percentage": 53.84, "elapsed_time": "0:31:55", "remaining_time": "0:27:22", "throughput": 13837.1, "total_tokens": 26499200} +{"current_steps": 8415, "total_steps": 15621, "loss": 0.5119, "lr": 1.0397678212704276e-06, "epoch": 0.538697906664106, "percentage": 53.87, "elapsed_time": "0:31:55", "remaining_time": "0:27:20", "throughput": 13840.59, "total_tokens": 26514048} +{"current_steps": 8420, "total_steps": 15621, "loss": 0.4034, "lr": 1.038651312295509e-06, "epoch": 0.5390179886050829, "percentage": 53.9, "elapsed_time": "0:31:56", "remaining_time": "0:27:18", "throughput": 13844.2, "total_tokens": 26529216} +{"current_steps": 8425, "total_steps": 15621, "loss": 0.4192, "lr": 1.037534755063973e-06, "epoch": 0.5393380705460598, "percentage": 53.93, "elapsed_time": "0:31:56", "remaining_time": "0:27:17", "throughput": 13848.04, "total_tokens": 26545152} +{"current_steps": 8430, "total_steps": 15621, "loss": 0.4147, "lr": 1.0364181509698548e-06, "epoch": 0.5396581524870366, "percentage": 53.97, "elapsed_time": "0:31:57", "remaining_time": "0:27:15", "throughput": 13851.7, "total_tokens": 26560512} +{"current_steps": 8435, "total_steps": 15621, "loss": 0.35, "lr": 1.0353015014072476e-06, "epoch": 0.5399782344280136, "percentage": 54.0, "elapsed_time": "0:31:58", "remaining_time": "0:27:14", "throughput": 13855.23, "total_tokens": 26575488} +{"current_steps": 8440, "total_steps": 15621, "loss": 0.405, "lr": 1.0341848077703013e-06, "epoch": 0.5402983163689905, "percentage": 54.03, "elapsed_time": "0:31:58", "remaining_time": "0:27:12", "throughput": 13858.99, "total_tokens": 26591040} +{"current_steps": 8445, "total_steps": 15621, "loss": 0.3229, "lr": 1.033068071453221e-06, "epoch": 0.5406183983099674, "percentage": 54.06, "elapsed_time": "0:31:59", "remaining_time": "0:27:10", "throughput": 13862.87, "total_tokens": 26606976} +{"current_steps": 8450, "total_steps": 15621, "loss": 0.3623, "lr": 1.0319512938502653e-06, "epoch": 0.5409384802509443, "percentage": 54.09, "elapsed_time": "0:31:59", "remaining_time": "0:27:09", "throughput": 13866.85, "total_tokens": 26623296} +{"current_steps": 8455, "total_steps": 15621, "loss": 0.3123, "lr": 1.0308344763557444e-06, "epoch": 0.5412585621919211, "percentage": 54.13, "elapsed_time": "0:32:00", "remaining_time": "0:27:07", "throughput": 13870.41, "total_tokens": 26638336} +{"current_steps": 8460, "total_steps": 15621, "loss": 0.2841, "lr": 1.0297176203640175e-06, "epoch": 0.541578644132898, "percentage": 54.16, "elapsed_time": "0:32:01", "remaining_time": "0:27:06", "throughput": 13874.35, "total_tokens": 26654400} +{"current_steps": 8465, "total_steps": 15621, "loss": 0.3482, "lr": 1.0286007272694924e-06, "epoch": 0.5418987260738749, "percentage": 54.19, "elapsed_time": "0:32:01", "remaining_time": "0:27:04", "throughput": 13877.93, "total_tokens": 26669568} +{"current_steps": 8470, "total_steps": 15621, "loss": 0.4695, "lr": 1.0274837984666239e-06, "epoch": 0.5422188080148518, "percentage": 54.22, "elapsed_time": "0:32:02", "remaining_time": "0:27:02", "throughput": 13881.89, "total_tokens": 26686016} +{"current_steps": 8475, "total_steps": 15621, "loss": 0.4184, "lr": 1.02636683534991e-06, "epoch": 0.5425388899558287, "percentage": 54.25, "elapsed_time": "0:32:02", "remaining_time": "0:27:01", "throughput": 13885.57, "total_tokens": 26701504} +{"current_steps": 8480, "total_steps": 15621, "loss": 0.5884, "lr": 1.0252498393138928e-06, "epoch": 0.5428589718968055, "percentage": 54.29, "elapsed_time": "0:32:03", "remaining_time": "0:26:59", "throughput": 13889.3, "total_tokens": 26717120} +{"current_steps": 8485, "total_steps": 15621, "loss": 0.4193, "lr": 1.0241328117531546e-06, "epoch": 0.5431790538377824, "percentage": 54.32, "elapsed_time": "0:32:04", "remaining_time": "0:26:58", "throughput": 13893.03, "total_tokens": 26732736} +{"current_steps": 8490, "total_steps": 15621, "loss": 0.4126, "lr": 1.0230157540623174e-06, "epoch": 0.5434991357787594, "percentage": 54.35, "elapsed_time": "0:32:04", "remaining_time": "0:26:56", "throughput": 13896.4, "total_tokens": 26747392} +{"current_steps": 8495, "total_steps": 15621, "loss": 0.4462, "lr": 1.0218986676360415e-06, "epoch": 0.5438192177197363, "percentage": 54.38, "elapsed_time": "0:32:05", "remaining_time": "0:26:55", "throughput": 13899.83, "total_tokens": 26762112} +{"current_steps": 8500, "total_steps": 15621, "loss": 0.3709, "lr": 1.0207815538690216e-06, "epoch": 0.5441392996607132, "percentage": 54.41, "elapsed_time": "0:32:05", "remaining_time": "0:26:53", "throughput": 13903.55, "total_tokens": 26777856} +{"current_steps": 8505, "total_steps": 15621, "loss": 0.3055, "lr": 1.0196644141559877e-06, "epoch": 0.54445938160169, "percentage": 54.45, "elapsed_time": "0:32:06", "remaining_time": "0:26:51", "throughput": 13907.44, "total_tokens": 26794048} +{"current_steps": 8510, "total_steps": 15621, "loss": 0.3509, "lr": 1.0185472498917021e-06, "epoch": 0.5447794635426669, "percentage": 54.48, "elapsed_time": "0:32:07", "remaining_time": "0:26:50", "throughput": 13911.21, "total_tokens": 26809792} +{"current_steps": 8515, "total_steps": 15621, "loss": 0.4421, "lr": 1.017430062470957e-06, "epoch": 0.5450995454836438, "percentage": 54.51, "elapsed_time": "0:32:07", "remaining_time": "0:26:48", "throughput": 13914.8, "total_tokens": 26825024} +{"current_steps": 8520, "total_steps": 15621, "loss": 0.3472, "lr": 1.016312853288574e-06, "epoch": 0.5454196274246207, "percentage": 54.54, "elapsed_time": "0:32:08", "remaining_time": "0:26:47", "throughput": 13918.85, "total_tokens": 26841536} +{"current_steps": 8525, "total_steps": 15621, "loss": 0.395, "lr": 1.0151956237394027e-06, "epoch": 0.5457397093655976, "percentage": 54.57, "elapsed_time": "0:32:09", "remaining_time": "0:26:45", "throughput": 13922.69, "total_tokens": 26857600} +{"current_steps": 8530, "total_steps": 15621, "loss": 0.3942, "lr": 1.0140783752183164e-06, "epoch": 0.5460597913065744, "percentage": 54.61, "elapsed_time": "0:32:09", "remaining_time": "0:26:44", "throughput": 13926.76, "total_tokens": 26874176} +{"current_steps": 8535, "total_steps": 15621, "loss": 0.4162, "lr": 1.0129611091202138e-06, "epoch": 0.5463798732475513, "percentage": 54.64, "elapsed_time": "0:32:10", "remaining_time": "0:26:42", "throughput": 13930.61, "total_tokens": 26890176} +{"current_steps": 8540, "total_steps": 15621, "loss": 0.2897, "lr": 1.0118438268400135e-06, "epoch": 0.5466999551885282, "percentage": 54.67, "elapsed_time": "0:32:10", "remaining_time": "0:26:41", "throughput": 13934.23, "total_tokens": 26905728} +{"current_steps": 8545, "total_steps": 15621, "loss": 0.4655, "lr": 1.0107265297726568e-06, "epoch": 0.5470200371295052, "percentage": 54.7, "elapsed_time": "0:32:11", "remaining_time": "0:26:39", "throughput": 13937.85, "total_tokens": 26921280} +{"current_steps": 8550, "total_steps": 15621, "loss": 0.4065, "lr": 1.009609219313102e-06, "epoch": 0.5473401190704821, "percentage": 54.73, "elapsed_time": "0:32:12", "remaining_time": "0:26:37", "throughput": 13941.45, "total_tokens": 26936704} +{"current_steps": 8555, "total_steps": 15621, "loss": 0.4008, "lr": 1.0084918968563236e-06, "epoch": 0.547660201011459, "percentage": 54.77, "elapsed_time": "0:32:12", "remaining_time": "0:26:36", "throughput": 13945.2, "total_tokens": 26952448} +{"current_steps": 8560, "total_steps": 15621, "loss": 0.3928, "lr": 1.0073745637973124e-06, "epoch": 0.5479802829524358, "percentage": 54.8, "elapsed_time": "0:32:13", "remaining_time": "0:26:34", "throughput": 13948.76, "total_tokens": 26967680} +{"current_steps": 8565, "total_steps": 15621, "loss": 0.3489, "lr": 1.0062572215310718e-06, "epoch": 0.5483003648934127, "percentage": 54.83, "elapsed_time": "0:32:13", "remaining_time": "0:26:33", "throughput": 13952.14, "total_tokens": 26982400} +{"current_steps": 8570, "total_steps": 15621, "loss": 0.313, "lr": 1.0051398714526165e-06, "epoch": 0.5486204468343896, "percentage": 54.86, "elapsed_time": "0:32:14", "remaining_time": "0:26:31", "throughput": 13955.96, "total_tokens": 26998400} +{"current_steps": 8575, "total_steps": 15621, "loss": 0.3506, "lr": 1.0040225149569712e-06, "epoch": 0.5489405287753665, "percentage": 54.89, "elapsed_time": "0:32:15", "remaining_time": "0:26:30", "throughput": 13960.21, "total_tokens": 27015936} +{"current_steps": 8580, "total_steps": 15621, "loss": 0.3263, "lr": 1.0029051534391693e-06, "epoch": 0.5492606107163434, "percentage": 54.93, "elapsed_time": "0:32:15", "remaining_time": "0:26:28", "throughput": 13963.6, "total_tokens": 27030528} +{"current_steps": 8585, "total_steps": 15621, "loss": 0.3621, "lr": 1.001787788294249e-06, "epoch": 0.5495806926573202, "percentage": 54.96, "elapsed_time": "0:32:16", "remaining_time": "0:26:27", "throughput": 13967.25, "total_tokens": 27046080} +{"current_steps": 8590, "total_steps": 15621, "loss": 0.4206, "lr": 1.0006704209172537e-06, "epoch": 0.5499007745982971, "percentage": 54.99, "elapsed_time": "0:32:16", "remaining_time": "0:26:25", "throughput": 13970.85, "total_tokens": 27061504} +{"current_steps": 8595, "total_steps": 15621, "loss": 0.4297, "lr": 9.995530527032301e-07, "epoch": 0.5502208565392741, "percentage": 55.02, "elapsed_time": "0:32:17", "remaining_time": "0:26:23", "throughput": 13974.49, "total_tokens": 27077056} +{"current_steps": 8600, "total_steps": 15621, "loss": 0.3382, "lr": 9.984356850472257e-07, "epoch": 0.550540938480251, "percentage": 55.05, "elapsed_time": "0:32:18", "remaining_time": "0:26:22", "throughput": 13978.95, "total_tokens": 27095168} +{"current_steps": 8602, "total_steps": 15621, "eval_loss": 0.3985471725463867, "epoch": 0.5506689712566417, "percentage": 55.07, "elapsed_time": "0:33:07", "remaining_time": "0:27:01", "throughput": 13634.54, "total_tokens": 27101056} +{"current_steps": 8605, "total_steps": 15621, "loss": 0.3698, "lr": 9.97318319344287e-07, "epoch": 0.5508610204212279, "percentage": 55.09, "elapsed_time": "0:33:29", "remaining_time": "0:27:18", "throughput": 13487.95, "total_tokens": 27110144} +{"current_steps": 8610, "total_steps": 15621, "loss": 0.5311, "lr": 9.962009569894577e-07, "epoch": 0.5511811023622047, "percentage": 55.12, "elapsed_time": "0:33:30", "remaining_time": "0:27:17", "throughput": 13491.37, "total_tokens": 27124864} +{"current_steps": 8615, "total_steps": 15621, "loss": 0.3769, "lr": 9.95083599377778e-07, "epoch": 0.5515011843031816, "percentage": 55.15, "elapsed_time": "0:33:31", "remaining_time": "0:27:15", "throughput": 13494.93, "total_tokens": 27140160} +{"current_steps": 8620, "total_steps": 15621, "loss": 0.374, "lr": 9.939662479042828e-07, "epoch": 0.5518212662441585, "percentage": 55.18, "elapsed_time": "0:33:31", "remaining_time": "0:27:13", "throughput": 13498.55, "total_tokens": 27155712} +{"current_steps": 8625, "total_steps": 15621, "loss": 0.4573, "lr": 9.92848903963998e-07, "epoch": 0.5521413481851354, "percentage": 55.21, "elapsed_time": "0:33:32", "remaining_time": "0:27:12", "throughput": 13502.33, "total_tokens": 27171520} +{"current_steps": 8630, "total_steps": 15621, "loss": 0.4487, "lr": 9.9173156895194e-07, "epoch": 0.5524614301261123, "percentage": 55.25, "elapsed_time": "0:33:32", "remaining_time": "0:27:10", "throughput": 13505.9, "total_tokens": 27186752} +{"current_steps": 8635, "total_steps": 15621, "loss": 0.3823, "lr": 9.906142442631154e-07, "epoch": 0.5527815120670891, "percentage": 55.28, "elapsed_time": "0:33:33", "remaining_time": "0:27:09", "throughput": 13509.37, "total_tokens": 27201664} +{"current_steps": 8640, "total_steps": 15621, "loss": 0.3804, "lr": 9.894969312925171e-07, "epoch": 0.553101594008066, "percentage": 55.31, "elapsed_time": "0:33:34", "remaining_time": "0:27:07", "throughput": 13513.51, "total_tokens": 27218880} +{"current_steps": 8645, "total_steps": 15621, "loss": 0.3448, "lr": 9.883796314351234e-07, "epoch": 0.5534216759490429, "percentage": 55.34, "elapsed_time": "0:33:34", "remaining_time": "0:27:05", "throughput": 13517.51, "total_tokens": 27235648} +{"current_steps": 8650, "total_steps": 15621, "loss": 0.3997, "lr": 9.872623460858966e-07, "epoch": 0.5537417578900199, "percentage": 55.37, "elapsed_time": "0:33:35", "remaining_time": "0:27:04", "throughput": 13521.04, "total_tokens": 27250880} +{"current_steps": 8655, "total_steps": 15621, "loss": 0.3163, "lr": 9.861450766397799e-07, "epoch": 0.5540618398309968, "percentage": 55.41, "elapsed_time": "0:33:36", "remaining_time": "0:27:02", "throughput": 13524.81, "total_tokens": 27266880} +{"current_steps": 8660, "total_steps": 15621, "loss": 0.411, "lr": 9.850278244916976e-07, "epoch": 0.5543819217719737, "percentage": 55.44, "elapsed_time": "0:33:36", "remaining_time": "0:27:01", "throughput": 13528.56, "total_tokens": 27282816} +{"current_steps": 8665, "total_steps": 15621, "loss": 0.4309, "lr": 9.839105910365524e-07, "epoch": 0.5547020037129505, "percentage": 55.47, "elapsed_time": "0:33:37", "remaining_time": "0:26:59", "throughput": 13532.22, "total_tokens": 27298496} +{"current_steps": 8670, "total_steps": 15621, "loss": 0.331, "lr": 9.827933776692235e-07, "epoch": 0.5550220856539274, "percentage": 55.5, "elapsed_time": "0:33:37", "remaining_time": "0:26:57", "throughput": 13535.79, "total_tokens": 27313856} +{"current_steps": 8675, "total_steps": 15621, "loss": 0.34, "lr": 9.81676185784564e-07, "epoch": 0.5553421675949043, "percentage": 55.53, "elapsed_time": "0:33:38", "remaining_time": "0:26:56", "throughput": 13539.17, "total_tokens": 27328448} +{"current_steps": 8680, "total_steps": 15621, "loss": 0.3916, "lr": 9.805590167774021e-07, "epoch": 0.5556622495358812, "percentage": 55.57, "elapsed_time": "0:33:39", "remaining_time": "0:26:54", "throughput": 13542.76, "total_tokens": 27343872} +{"current_steps": 8685, "total_steps": 15621, "loss": 0.5632, "lr": 9.79441872042536e-07, "epoch": 0.555982331476858, "percentage": 55.6, "elapsed_time": "0:33:39", "remaining_time": "0:26:52", "throughput": 13546.19, "total_tokens": 27358720} +{"current_steps": 8690, "total_steps": 15621, "loss": 0.3856, "lr": 9.783247529747338e-07, "epoch": 0.5563024134178349, "percentage": 55.63, "elapsed_time": "0:33:40", "remaining_time": "0:26:51", "throughput": 13549.5, "total_tokens": 27373312} +{"current_steps": 8695, "total_steps": 15621, "loss": 0.3571, "lr": 9.772076609687323e-07, "epoch": 0.5566224953588118, "percentage": 55.66, "elapsed_time": "0:33:40", "remaining_time": "0:26:49", "throughput": 13553.01, "total_tokens": 27388544} +{"current_steps": 8700, "total_steps": 15621, "loss": 0.3259, "lr": 9.760905974192334e-07, "epoch": 0.5569425772997888, "percentage": 55.69, "elapsed_time": "0:33:41", "remaining_time": "0:26:48", "throughput": 13556.93, "total_tokens": 27405120} +{"current_steps": 8705, "total_steps": 15621, "loss": 0.4078, "lr": 9.749735637209044e-07, "epoch": 0.5572626592407657, "percentage": 55.73, "elapsed_time": "0:33:42", "remaining_time": "0:26:46", "throughput": 13560.48, "total_tokens": 27420544} +{"current_steps": 8710, "total_steps": 15621, "loss": 0.3137, "lr": 9.738565612683754e-07, "epoch": 0.5575827411817426, "percentage": 55.76, "elapsed_time": "0:33:42", "remaining_time": "0:26:44", "throughput": 13563.95, "total_tokens": 27435456} +{"current_steps": 8715, "total_steps": 15621, "loss": 0.3477, "lr": 9.727395914562363e-07, "epoch": 0.5579028231227194, "percentage": 55.79, "elapsed_time": "0:33:43", "remaining_time": "0:26:43", "throughput": 13567.85, "total_tokens": 27452032} +{"current_steps": 8720, "total_steps": 15621, "loss": 0.4159, "lr": 9.716226556790372e-07, "epoch": 0.5582229050636963, "percentage": 55.82, "elapsed_time": "0:33:43", "remaining_time": "0:26:41", "throughput": 13571.48, "total_tokens": 27467520} +{"current_steps": 8725, "total_steps": 15621, "loss": 0.312, "lr": 9.705057553312855e-07, "epoch": 0.5585429870046732, "percentage": 55.85, "elapsed_time": "0:33:44", "remaining_time": "0:26:40", "throughput": 13575.01, "total_tokens": 27482816} +{"current_steps": 8730, "total_steps": 15621, "loss": 0.374, "lr": 9.693888918074452e-07, "epoch": 0.5588630689456501, "percentage": 55.89, "elapsed_time": "0:33:45", "remaining_time": "0:26:38", "throughput": 13578.39, "total_tokens": 27497600} +{"current_steps": 8735, "total_steps": 15621, "loss": 0.4861, "lr": 9.682720665019325e-07, "epoch": 0.559183150886627, "percentage": 55.92, "elapsed_time": "0:33:45", "remaining_time": "0:26:36", "throughput": 13582.02, "total_tokens": 27513344} +{"current_steps": 8740, "total_steps": 15621, "loss": 0.4204, "lr": 9.671552808091172e-07, "epoch": 0.5595032328276038, "percentage": 55.95, "elapsed_time": "0:33:46", "remaining_time": "0:26:35", "throughput": 13586.05, "total_tokens": 27530304} +{"current_steps": 8745, "total_steps": 15621, "loss": 0.3409, "lr": 9.660385361233195e-07, "epoch": 0.5598233147685807, "percentage": 55.98, "elapsed_time": "0:33:46", "remaining_time": "0:26:33", "throughput": 13589.55, "total_tokens": 27545664} +{"current_steps": 8750, "total_steps": 15621, "loss": 0.2987, "lr": 9.649218338388084e-07, "epoch": 0.5601433967095576, "percentage": 56.01, "elapsed_time": "0:33:47", "remaining_time": "0:26:32", "throughput": 13593.01, "total_tokens": 27560704} +{"current_steps": 8755, "total_steps": 15621, "loss": 0.4353, "lr": 9.638051753497994e-07, "epoch": 0.5604634786505346, "percentage": 56.05, "elapsed_time": "0:33:48", "remaining_time": "0:26:30", "throughput": 13596.9, "total_tokens": 27577472} +{"current_steps": 8760, "total_steps": 15621, "loss": 0.3597, "lr": 9.62688562050454e-07, "epoch": 0.5607835605915115, "percentage": 56.08, "elapsed_time": "0:33:48", "remaining_time": "0:26:29", "throughput": 13600.46, "total_tokens": 27592960} +{"current_steps": 8765, "total_steps": 15621, "loss": 0.4033, "lr": 9.615719953348772e-07, "epoch": 0.5611036425324883, "percentage": 56.11, "elapsed_time": "0:33:49", "remaining_time": "0:26:27", "throughput": 13604.58, "total_tokens": 27610304} +{"current_steps": 8770, "total_steps": 15621, "loss": 0.5574, "lr": 9.604554765971148e-07, "epoch": 0.5614237244734652, "percentage": 56.14, "elapsed_time": "0:33:50", "remaining_time": "0:26:25", "throughput": 13608.88, "total_tokens": 27628288} +{"current_steps": 8775, "total_steps": 15621, "loss": 0.4069, "lr": 9.593390072311549e-07, "epoch": 0.5617438064144421, "percentage": 56.17, "elapsed_time": "0:33:50", "remaining_time": "0:26:24", "throughput": 13612.47, "total_tokens": 27643904} +{"current_steps": 8780, "total_steps": 15621, "loss": 0.3576, "lr": 9.582225886309216e-07, "epoch": 0.562063888355419, "percentage": 56.21, "elapsed_time": "0:33:51", "remaining_time": "0:26:22", "throughput": 13616.25, "total_tokens": 27660224} +{"current_steps": 8785, "total_steps": 15621, "loss": 0.3015, "lr": 9.571062221902767e-07, "epoch": 0.5623839702963959, "percentage": 56.24, "elapsed_time": "0:33:51", "remaining_time": "0:26:21", "throughput": 13619.67, "total_tokens": 27675136} +{"current_steps": 8790, "total_steps": 15621, "loss": 0.3485, "lr": 9.559899093030175e-07, "epoch": 0.5627040522373727, "percentage": 56.27, "elapsed_time": "0:33:52", "remaining_time": "0:26:19", "throughput": 13623.08, "total_tokens": 27690176} +{"current_steps": 8795, "total_steps": 15621, "loss": 0.3061, "lr": 9.54873651362873e-07, "epoch": 0.5630241341783496, "percentage": 56.3, "elapsed_time": "0:33:53", "remaining_time": "0:26:17", "throughput": 13626.27, "total_tokens": 27704512} +{"current_steps": 8800, "total_steps": 15621, "loss": 0.46, "lr": 9.537574497635043e-07, "epoch": 0.5633442161193265, "percentage": 56.33, "elapsed_time": "0:33:53", "remaining_time": "0:26:16", "throughput": 13629.94, "total_tokens": 27720448} +{"current_steps": 8805, "total_steps": 15621, "loss": 0.4966, "lr": 9.52641305898503e-07, "epoch": 0.5636642980603035, "percentage": 56.37, "elapsed_time": "0:33:54", "remaining_time": "0:26:14", "throughput": 13633.47, "total_tokens": 27735808} +{"current_steps": 8810, "total_steps": 15621, "loss": 0.3122, "lr": 9.515252211613873e-07, "epoch": 0.5639843800012804, "percentage": 56.4, "elapsed_time": "0:33:54", "remaining_time": "0:26:13", "throughput": 13636.73, "total_tokens": 27750464} +{"current_steps": 8815, "total_steps": 15621, "loss": 0.4586, "lr": 9.504091969456021e-07, "epoch": 0.5643044619422573, "percentage": 56.43, "elapsed_time": "0:33:55", "remaining_time": "0:26:11", "throughput": 13639.78, "total_tokens": 27764352} +{"current_steps": 8820, "total_steps": 15621, "loss": 0.338, "lr": 9.492932346445165e-07, "epoch": 0.5646245438832341, "percentage": 56.46, "elapsed_time": "0:33:56", "remaining_time": "0:26:10", "throughput": 13643.27, "total_tokens": 27779840} +{"current_steps": 8825, "total_steps": 15621, "loss": 0.27, "lr": 9.48177335651423e-07, "epoch": 0.564944625824211, "percentage": 56.49, "elapsed_time": "0:33:56", "remaining_time": "0:26:08", "throughput": 13647.09, "total_tokens": 27796352} +{"current_steps": 8830, "total_steps": 15621, "loss": 0.3325, "lr": 9.470615013595346e-07, "epoch": 0.5652647077651879, "percentage": 56.53, "elapsed_time": "0:33:57", "remaining_time": "0:26:06", "throughput": 13650.25, "total_tokens": 27810624} +{"current_steps": 8835, "total_steps": 15621, "loss": 0.4447, "lr": 9.459457331619829e-07, "epoch": 0.5655847897061648, "percentage": 56.56, "elapsed_time": "0:33:57", "remaining_time": "0:26:05", "throughput": 13653.51, "total_tokens": 27825152} +{"current_steps": 8840, "total_steps": 15621, "loss": 0.4076, "lr": 9.448300324518182e-07, "epoch": 0.5659048716471416, "percentage": 56.59, "elapsed_time": "0:33:58", "remaining_time": "0:26:03", "throughput": 13657.0, "total_tokens": 27840384} +{"current_steps": 8845, "total_steps": 15621, "loss": 0.3017, "lr": 9.437144006220058e-07, "epoch": 0.5662249535881185, "percentage": 56.62, "elapsed_time": "0:33:59", "remaining_time": "0:26:02", "throughput": 13660.71, "total_tokens": 27856640} +{"current_steps": 8850, "total_steps": 15621, "loss": 0.2027, "lr": 9.425988390654249e-07, "epoch": 0.5665450355290954, "percentage": 56.65, "elapsed_time": "0:33:59", "remaining_time": "0:26:00", "throughput": 13664.42, "total_tokens": 27872768} +{"current_steps": 8855, "total_steps": 15621, "loss": 0.4955, "lr": 9.414833491748677e-07, "epoch": 0.5668651174700723, "percentage": 56.69, "elapsed_time": "0:34:00", "remaining_time": "0:25:59", "throughput": 13667.7, "total_tokens": 27887488} +{"current_steps": 8860, "total_steps": 15621, "loss": 0.3024, "lr": 9.40367932343036e-07, "epoch": 0.5671851994110493, "percentage": 56.72, "elapsed_time": "0:34:00", "remaining_time": "0:25:57", "throughput": 13671.1, "total_tokens": 27902720} +{"current_steps": 8865, "total_steps": 15621, "loss": 0.374, "lr": 9.392525899625407e-07, "epoch": 0.5675052813520262, "percentage": 56.75, "elapsed_time": "0:34:01", "remaining_time": "0:25:55", "throughput": 13674.59, "total_tokens": 27918080} +{"current_steps": 8870, "total_steps": 15621, "loss": 0.4011, "lr": 9.381373234259004e-07, "epoch": 0.567825363293003, "percentage": 56.78, "elapsed_time": "0:34:02", "remaining_time": "0:25:54", "throughput": 13678.23, "total_tokens": 27933760} +{"current_steps": 8875, "total_steps": 15621, "loss": 0.375, "lr": 9.370221341255382e-07, "epoch": 0.5681454452339799, "percentage": 56.81, "elapsed_time": "0:34:02", "remaining_time": "0:25:52", "throughput": 13681.63, "total_tokens": 27948992} +{"current_steps": 8880, "total_steps": 15621, "loss": 0.3382, "lr": 9.359070234537807e-07, "epoch": 0.5684655271749568, "percentage": 56.85, "elapsed_time": "0:34:03", "remaining_time": "0:25:51", "throughput": 13685.79, "total_tokens": 27966848} +{"current_steps": 8885, "total_steps": 15621, "loss": 0.3803, "lr": 9.34791992802857e-07, "epoch": 0.5687856091159337, "percentage": 56.88, "elapsed_time": "0:34:04", "remaining_time": "0:25:49", "throughput": 13689.04, "total_tokens": 27981696} +{"current_steps": 8890, "total_steps": 15621, "loss": 0.2607, "lr": 9.336770435648963e-07, "epoch": 0.5691056910569106, "percentage": 56.91, "elapsed_time": "0:34:04", "remaining_time": "0:25:48", "throughput": 13692.57, "total_tokens": 27997376} +{"current_steps": 8895, "total_steps": 15621, "loss": 0.4075, "lr": 9.325621771319246e-07, "epoch": 0.5694257729978874, "percentage": 56.94, "elapsed_time": "0:34:05", "remaining_time": "0:25:46", "throughput": 13696.4, "total_tokens": 28014016} +{"current_steps": 8900, "total_steps": 15621, "loss": 0.4178, "lr": 9.314473948958673e-07, "epoch": 0.5697458549388643, "percentage": 56.97, "elapsed_time": "0:34:05", "remaining_time": "0:25:45", "throughput": 13700.11, "total_tokens": 28030400} +{"current_steps": 8905, "total_steps": 15621, "loss": 0.3456, "lr": 9.303326982485422e-07, "epoch": 0.5700659368798412, "percentage": 57.01, "elapsed_time": "0:34:06", "remaining_time": "0:25:43", "throughput": 13703.99, "total_tokens": 28047104} +{"current_steps": 8910, "total_steps": 15621, "loss": 0.3546, "lr": 9.29218088581661e-07, "epoch": 0.5703860188208181, "percentage": 57.04, "elapsed_time": "0:34:07", "remaining_time": "0:25:41", "throughput": 13707.66, "total_tokens": 28063168} +{"current_steps": 8915, "total_steps": 15621, "loss": 0.3462, "lr": 9.281035672868278e-07, "epoch": 0.5707061007617951, "percentage": 57.07, "elapsed_time": "0:34:07", "remaining_time": "0:25:40", "throughput": 13711.25, "total_tokens": 28079104} +{"current_steps": 8920, "total_steps": 15621, "loss": 0.3912, "lr": 9.269891357555348e-07, "epoch": 0.571026182702772, "percentage": 57.1, "elapsed_time": "0:34:08", "remaining_time": "0:25:38", "throughput": 13714.79, "total_tokens": 28094720} +{"current_steps": 8925, "total_steps": 15621, "loss": 0.2754, "lr": 9.25874795379163e-07, "epoch": 0.5713462646437488, "percentage": 57.13, "elapsed_time": "0:34:09", "remaining_time": "0:25:37", "throughput": 13718.45, "total_tokens": 28110848} +{"current_steps": 8930, "total_steps": 15621, "loss": 0.4172, "lr": 9.247605475489793e-07, "epoch": 0.5716663465847257, "percentage": 57.17, "elapsed_time": "0:34:09", "remaining_time": "0:25:35", "throughput": 13722.14, "total_tokens": 28127040} +{"current_steps": 8935, "total_steps": 15621, "loss": 0.3062, "lr": 9.236463936561358e-07, "epoch": 0.5719864285257026, "percentage": 57.2, "elapsed_time": "0:34:10", "remaining_time": "0:25:34", "throughput": 13725.93, "total_tokens": 28143424} +{"current_steps": 8940, "total_steps": 15621, "loss": 0.5365, "lr": 9.225323350916661e-07, "epoch": 0.5723065104666795, "percentage": 57.23, "elapsed_time": "0:34:10", "remaining_time": "0:25:32", "throughput": 13729.3, "total_tokens": 28158528} +{"current_steps": 8945, "total_steps": 15621, "loss": 0.3948, "lr": 9.214183732464855e-07, "epoch": 0.5726265924076563, "percentage": 57.26, "elapsed_time": "0:34:11", "remaining_time": "0:25:31", "throughput": 13732.75, "total_tokens": 28173888} +{"current_steps": 8950, "total_steps": 15621, "loss": 0.3671, "lr": 9.203045095113886e-07, "epoch": 0.5729466743486332, "percentage": 57.29, "elapsed_time": "0:34:12", "remaining_time": "0:25:29", "throughput": 13736.94, "total_tokens": 28191872} +{"current_steps": 8955, "total_steps": 15621, "loss": 0.4305, "lr": 9.191907452770476e-07, "epoch": 0.5732667562896101, "percentage": 57.33, "elapsed_time": "0:34:12", "remaining_time": "0:25:28", "throughput": 13740.31, "total_tokens": 28206912} +{"current_steps": 8960, "total_steps": 15621, "loss": 0.4233, "lr": 9.180770819340095e-07, "epoch": 0.573586838230587, "percentage": 57.36, "elapsed_time": "0:34:13", "remaining_time": "0:25:26", "throughput": 13743.8, "total_tokens": 28222336} +{"current_steps": 8965, "total_steps": 15621, "loss": 0.376, "lr": 9.169635208726967e-07, "epoch": 0.573906920171564, "percentage": 57.39, "elapsed_time": "0:34:14", "remaining_time": "0:25:25", "throughput": 13747.35, "total_tokens": 28238144} +{"current_steps": 8970, "total_steps": 15621, "loss": 0.3787, "lr": 9.15850063483403e-07, "epoch": 0.5742270021125409, "percentage": 57.42, "elapsed_time": "0:34:14", "remaining_time": "0:25:23", "throughput": 13750.76, "total_tokens": 28253376} +{"current_steps": 8975, "total_steps": 15621, "loss": 0.3493, "lr": 9.147367111562928e-07, "epoch": 0.5745470840535177, "percentage": 57.45, "elapsed_time": "0:34:15", "remaining_time": "0:25:21", "throughput": 13754.4, "total_tokens": 28269248} +{"current_steps": 8980, "total_steps": 15621, "loss": 0.4094, "lr": 9.136234652814005e-07, "epoch": 0.5748671659944946, "percentage": 57.49, "elapsed_time": "0:34:15", "remaining_time": "0:25:20", "throughput": 13758.1, "total_tokens": 28285440} +{"current_steps": 8985, "total_steps": 15621, "loss": 0.2965, "lr": 9.125103272486255e-07, "epoch": 0.5751872479354715, "percentage": 57.52, "elapsed_time": "0:34:16", "remaining_time": "0:25:18", "throughput": 13761.5, "total_tokens": 28300736} +{"current_steps": 8990, "total_steps": 15621, "loss": 0.361, "lr": 9.11397298447734e-07, "epoch": 0.5755073298764484, "percentage": 57.55, "elapsed_time": "0:34:17", "remaining_time": "0:25:17", "throughput": 13764.84, "total_tokens": 28315712} +{"current_steps": 8995, "total_steps": 15621, "loss": 0.3287, "lr": 9.10284380268356e-07, "epoch": 0.5758274118174252, "percentage": 57.58, "elapsed_time": "0:34:17", "remaining_time": "0:25:15", "throughput": 13768.52, "total_tokens": 28332032} +{"current_steps": 9000, "total_steps": 15621, "loss": 0.4476, "lr": 9.091715740999828e-07, "epoch": 0.5761474937584021, "percentage": 57.61, "elapsed_time": "0:34:18", "remaining_time": "0:25:14", "throughput": 13772.1, "total_tokens": 28347968} +{"current_steps": 9005, "total_steps": 15621, "loss": 0.3849, "lr": 9.080588813319654e-07, "epoch": 0.576467575699379, "percentage": 57.65, "elapsed_time": "0:34:18", "remaining_time": "0:25:12", "throughput": 13775.43, "total_tokens": 28362944} +{"current_steps": 9010, "total_steps": 15621, "loss": 0.3032, "lr": 9.069463033535143e-07, "epoch": 0.5767876576403559, "percentage": 57.68, "elapsed_time": "0:34:19", "remaining_time": "0:25:11", "throughput": 13778.96, "total_tokens": 28378624} +{"current_steps": 9015, "total_steps": 15621, "loss": 0.3865, "lr": 9.058338415536962e-07, "epoch": 0.5771077395813328, "percentage": 57.71, "elapsed_time": "0:34:20", "remaining_time": "0:25:09", "throughput": 13782.4, "total_tokens": 28394048} +{"current_steps": 9020, "total_steps": 15621, "loss": 0.3808, "lr": 9.04721497321432e-07, "epoch": 0.5774278215223098, "percentage": 57.74, "elapsed_time": "0:34:20", "remaining_time": "0:25:08", "throughput": 13785.88, "total_tokens": 28409664} +{"current_steps": 9025, "total_steps": 15621, "loss": 0.3744, "lr": 9.036092720454977e-07, "epoch": 0.5777479034632866, "percentage": 57.77, "elapsed_time": "0:34:21", "remaining_time": "0:25:06", "throughput": 13789.24, "total_tokens": 28424768} +{"current_steps": 9030, "total_steps": 15621, "loss": 0.3387, "lr": 9.024971671145189e-07, "epoch": 0.5780679854042635, "percentage": 57.81, "elapsed_time": "0:34:21", "remaining_time": "0:25:05", "throughput": 13792.45, "total_tokens": 28439424} +{"current_steps": 9035, "total_steps": 15621, "loss": 0.4406, "lr": 9.013851839169718e-07, "epoch": 0.5783880673452404, "percentage": 57.84, "elapsed_time": "0:34:22", "remaining_time": "0:25:03", "throughput": 13796.29, "total_tokens": 28456064} +{"current_steps": 9040, "total_steps": 15621, "loss": 0.3388, "lr": 9.002733238411801e-07, "epoch": 0.5787081492862173, "percentage": 57.87, "elapsed_time": "0:34:23", "remaining_time": "0:25:02", "throughput": 13800.09, "total_tokens": 28472768} +{"current_steps": 9045, "total_steps": 15621, "loss": 0.3489, "lr": 8.991615882753147e-07, "epoch": 0.5790282312271942, "percentage": 57.9, "elapsed_time": "0:34:23", "remaining_time": "0:25:00", "throughput": 13803.65, "total_tokens": 28488704} +{"current_steps": 9050, "total_steps": 15621, "loss": 0.4431, "lr": 8.980499786073904e-07, "epoch": 0.579348313168171, "percentage": 57.93, "elapsed_time": "0:34:24", "remaining_time": "0:24:58", "throughput": 13807.0, "total_tokens": 28503808} +{"current_steps": 9055, "total_steps": 15621, "loss": 0.4759, "lr": 8.969384962252645e-07, "epoch": 0.5796683951091479, "percentage": 57.97, "elapsed_time": "0:34:25", "remaining_time": "0:24:57", "throughput": 13810.76, "total_tokens": 28520320} +{"current_steps": 9060, "total_steps": 15621, "loss": 0.4431, "lr": 8.958271425166366e-07, "epoch": 0.5799884770501248, "percentage": 58.0, "elapsed_time": "0:34:25", "remaining_time": "0:24:55", "throughput": 13814.18, "total_tokens": 28535680} +{"current_steps": 9065, "total_steps": 15621, "loss": 0.396, "lr": 8.947159188690442e-07, "epoch": 0.5803085589911017, "percentage": 58.03, "elapsed_time": "0:34:26", "remaining_time": "0:24:54", "throughput": 13817.67, "total_tokens": 28551488} +{"current_steps": 9070, "total_steps": 15621, "loss": 0.4786, "lr": 8.93604826669863e-07, "epoch": 0.5806286409320787, "percentage": 58.06, "elapsed_time": "0:34:26", "remaining_time": "0:24:52", "throughput": 13821.14, "total_tokens": 28567040} +{"current_steps": 9075, "total_steps": 15621, "loss": 0.3986, "lr": 8.924938673063052e-07, "epoch": 0.5809487228730555, "percentage": 58.09, "elapsed_time": "0:34:27", "remaining_time": "0:24:51", "throughput": 13824.24, "total_tokens": 28581568} +{"current_steps": 9080, "total_steps": 15621, "loss": 0.3559, "lr": 8.913830421654166e-07, "epoch": 0.5812688048140324, "percentage": 58.13, "elapsed_time": "0:34:28", "remaining_time": "0:24:49", "throughput": 13827.71, "total_tokens": 28596992} +{"current_steps": 9085, "total_steps": 15621, "loss": 0.4757, "lr": 8.902723526340746e-07, "epoch": 0.5815888867550093, "percentage": 58.16, "elapsed_time": "0:34:28", "remaining_time": "0:24:48", "throughput": 13831.59, "total_tokens": 28613952} +{"current_steps": 9090, "total_steps": 15621, "loss": 0.4202, "lr": 8.89161800098989e-07, "epoch": 0.5819089686959862, "percentage": 58.19, "elapsed_time": "0:34:29", "remaining_time": "0:24:46", "throughput": 13834.84, "total_tokens": 28628736} +{"current_steps": 9095, "total_steps": 15621, "loss": 0.3704, "lr": 8.880513859466974e-07, "epoch": 0.5822290506369631, "percentage": 58.22, "elapsed_time": "0:34:29", "remaining_time": "0:24:45", "throughput": 13838.45, "total_tokens": 28644928} +{"current_steps": 9100, "total_steps": 15621, "loss": 0.278, "lr": 8.869411115635645e-07, "epoch": 0.5825491325779399, "percentage": 58.25, "elapsed_time": "0:34:30", "remaining_time": "0:24:43", "throughput": 13842.07, "total_tokens": 28661184} +{"current_steps": 9105, "total_steps": 15621, "loss": 0.2772, "lr": 8.858309783357816e-07, "epoch": 0.5828692145189168, "percentage": 58.29, "elapsed_time": "0:34:31", "remaining_time": "0:24:42", "throughput": 13845.27, "total_tokens": 28675776} +{"current_steps": 9110, "total_steps": 15621, "loss": 0.4318, "lr": 8.847209876493629e-07, "epoch": 0.5831892964598937, "percentage": 58.32, "elapsed_time": "0:34:31", "remaining_time": "0:24:40", "throughput": 13848.92, "total_tokens": 28692160} +{"current_steps": 9115, "total_steps": 15621, "loss": 0.2576, "lr": 8.836111408901441e-07, "epoch": 0.5835093784008706, "percentage": 58.35, "elapsed_time": "0:34:32", "remaining_time": "0:24:39", "throughput": 13852.23, "total_tokens": 28707328} +{"current_steps": 9120, "total_steps": 15621, "loss": 0.4235, "lr": 8.825014394437828e-07, "epoch": 0.5838294603418475, "percentage": 58.38, "elapsed_time": "0:34:32", "remaining_time": "0:24:37", "throughput": 13855.58, "total_tokens": 28722624} +{"current_steps": 9125, "total_steps": 15621, "loss": 0.3748, "lr": 8.813918846957542e-07, "epoch": 0.5841495422828245, "percentage": 58.41, "elapsed_time": "0:34:33", "remaining_time": "0:24:36", "throughput": 13858.9, "total_tokens": 28737856} +{"current_steps": 9130, "total_steps": 15621, "loss": 0.4501, "lr": 8.802824780313499e-07, "epoch": 0.5844696242238013, "percentage": 58.45, "elapsed_time": "0:34:34", "remaining_time": "0:24:34", "throughput": 13862.09, "total_tokens": 28752448} +{"current_steps": 9135, "total_steps": 15621, "loss": 0.3958, "lr": 8.791732208356771e-07, "epoch": 0.5847897061647782, "percentage": 58.48, "elapsed_time": "0:34:34", "remaining_time": "0:24:33", "throughput": 13865.44, "total_tokens": 28767616} +{"current_steps": 9140, "total_steps": 15621, "loss": 0.4649, "lr": 8.780641144936573e-07, "epoch": 0.5851097881057551, "percentage": 58.51, "elapsed_time": "0:34:35", "remaining_time": "0:24:31", "throughput": 13868.64, "total_tokens": 28782400} +{"current_steps": 9145, "total_steps": 15621, "loss": 0.4457, "lr": 8.76955160390022e-07, "epoch": 0.585429870046732, "percentage": 58.54, "elapsed_time": "0:34:35", "remaining_time": "0:24:30", "throughput": 13872.17, "total_tokens": 28798336} +{"current_steps": 9150, "total_steps": 15621, "loss": 0.2868, "lr": 8.758463599093136e-07, "epoch": 0.5857499519877089, "percentage": 58.57, "elapsed_time": "0:34:36", "remaining_time": "0:24:28", "throughput": 13875.73, "total_tokens": 28814336} +{"current_steps": 9155, "total_steps": 15621, "loss": 0.5273, "lr": 8.747377144358825e-07, "epoch": 0.5860700339286857, "percentage": 58.61, "elapsed_time": "0:34:37", "remaining_time": "0:24:27", "throughput": 13879.43, "total_tokens": 28830656} +{"current_steps": 9160, "total_steps": 15621, "loss": 0.418, "lr": 8.736292253538861e-07, "epoch": 0.5863901158696626, "percentage": 58.64, "elapsed_time": "0:34:37", "remaining_time": "0:24:25", "throughput": 13882.94, "total_tokens": 28846656} +{"current_steps": 9165, "total_steps": 15621, "loss": 0.309, "lr": 8.725208940472851e-07, "epoch": 0.5867101978106395, "percentage": 58.67, "elapsed_time": "0:34:38", "remaining_time": "0:24:24", "throughput": 13886.58, "total_tokens": 28862848} +{"current_steps": 9170, "total_steps": 15621, "loss": 0.4083, "lr": 8.714127218998448e-07, "epoch": 0.5870302797516164, "percentage": 58.7, "elapsed_time": "0:34:39", "remaining_time": "0:24:22", "throughput": 13890.0, "total_tokens": 28878400} +{"current_steps": 9175, "total_steps": 15621, "loss": 0.5084, "lr": 8.70304710295131e-07, "epoch": 0.5873503616925934, "percentage": 58.74, "elapsed_time": "0:34:39", "remaining_time": "0:24:21", "throughput": 13893.3, "total_tokens": 28893568} +{"current_steps": 9180, "total_steps": 15621, "loss": 0.367, "lr": 8.691968606165092e-07, "epoch": 0.5876704436335702, "percentage": 58.77, "elapsed_time": "0:34:40", "remaining_time": "0:24:19", "throughput": 13896.91, "total_tokens": 28909824} +{"current_steps": 9185, "total_steps": 15621, "loss": 0.3078, "lr": 8.680891742471429e-07, "epoch": 0.5879905255745471, "percentage": 58.8, "elapsed_time": "0:34:40", "remaining_time": "0:24:18", "throughput": 13900.39, "total_tokens": 28925568} +{"current_steps": 9190, "total_steps": 15621, "loss": 0.3272, "lr": 8.669816525699912e-07, "epoch": 0.588310607515524, "percentage": 58.83, "elapsed_time": "0:34:41", "remaining_time": "0:24:16", "throughput": 13903.77, "total_tokens": 28941056} +{"current_steps": 9195, "total_steps": 15621, "loss": 0.4143, "lr": 8.658742969678079e-07, "epoch": 0.5886306894565009, "percentage": 58.86, "elapsed_time": "0:34:42", "remaining_time": "0:24:15", "throughput": 13906.83, "total_tokens": 28955456} +{"current_steps": 9200, "total_steps": 15621, "loss": 0.2927, "lr": 8.647671088231398e-07, "epoch": 0.5889507713974778, "percentage": 58.9, "elapsed_time": "0:34:42", "remaining_time": "0:24:13", "throughput": 13910.24, "total_tokens": 28971136} +{"current_steps": 9205, "total_steps": 15621, "loss": 0.4087, "lr": 8.636600895183245e-07, "epoch": 0.5892708533384546, "percentage": 58.93, "elapsed_time": "0:34:43", "remaining_time": "0:24:12", "throughput": 13914.14, "total_tokens": 28988480} +{"current_steps": 9210, "total_steps": 15621, "loss": 0.3669, "lr": 8.625532404354877e-07, "epoch": 0.5895909352794315, "percentage": 58.96, "elapsed_time": "0:34:44", "remaining_time": "0:24:10", "throughput": 13917.67, "total_tokens": 29004544} +{"current_steps": 9215, "total_steps": 15621, "loss": 0.3809, "lr": 8.614465629565443e-07, "epoch": 0.5899110172204084, "percentage": 58.99, "elapsed_time": "0:34:44", "remaining_time": "0:24:09", "throughput": 13920.85, "total_tokens": 29019328} +{"current_steps": 9220, "total_steps": 15621, "loss": 0.3336, "lr": 8.603400584631939e-07, "epoch": 0.5902310991613853, "percentage": 59.02, "elapsed_time": "0:34:45", "remaining_time": "0:24:07", "throughput": 13924.21, "total_tokens": 29034752} +{"current_steps": 9225, "total_steps": 15621, "loss": 0.4422, "lr": 8.592337283369198e-07, "epoch": 0.5905511811023622, "percentage": 59.06, "elapsed_time": "0:34:45", "remaining_time": "0:24:06", "throughput": 13927.7, "total_tokens": 29050816} +{"current_steps": 9230, "total_steps": 15621, "loss": 0.2752, "lr": 8.581275739589893e-07, "epoch": 0.5908712630433391, "percentage": 59.09, "elapsed_time": "0:34:46", "remaining_time": "0:24:04", "throughput": 13930.97, "total_tokens": 29065920} +{"current_steps": 9235, "total_steps": 15621, "loss": 0.483, "lr": 8.570215967104481e-07, "epoch": 0.591191344984316, "percentage": 59.12, "elapsed_time": "0:34:47", "remaining_time": "0:24:03", "throughput": 13934.26, "total_tokens": 29080960} +{"current_steps": 9240, "total_steps": 15621, "loss": 0.4786, "lr": 8.559157979721225e-07, "epoch": 0.5915114269252929, "percentage": 59.15, "elapsed_time": "0:34:47", "remaining_time": "0:24:01", "throughput": 13937.68, "total_tokens": 29096768} +{"current_steps": 9245, "total_steps": 15621, "loss": 0.5513, "lr": 8.548101791246145e-07, "epoch": 0.5918315088662698, "percentage": 59.18, "elapsed_time": "0:34:48", "remaining_time": "0:24:00", "throughput": 13941.13, "total_tokens": 29112448} +{"current_steps": 9250, "total_steps": 15621, "loss": 0.3392, "lr": 8.537047415483028e-07, "epoch": 0.5921515908072467, "percentage": 59.22, "elapsed_time": "0:34:48", "remaining_time": "0:23:58", "throughput": 13944.44, "total_tokens": 29127808} +{"current_steps": 9255, "total_steps": 15621, "loss": 0.2774, "lr": 8.525994866233388e-07, "epoch": 0.5924716727482235, "percentage": 59.25, "elapsed_time": "0:34:49", "remaining_time": "0:23:57", "throughput": 13947.72, "total_tokens": 29142912} +{"current_steps": 9260, "total_steps": 15621, "loss": 0.3847, "lr": 8.514944157296464e-07, "epoch": 0.5927917546892004, "percentage": 59.28, "elapsed_time": "0:34:50", "remaining_time": "0:23:55", "throughput": 13951.28, "total_tokens": 29159168} +{"current_steps": 9265, "total_steps": 15621, "loss": 0.3826, "lr": 8.503895302469199e-07, "epoch": 0.5931118366301773, "percentage": 59.31, "elapsed_time": "0:34:50", "remaining_time": "0:23:54", "throughput": 13954.87, "total_tokens": 29175488} +{"current_steps": 9270, "total_steps": 15621, "loss": 0.4143, "lr": 8.492848315546214e-07, "epoch": 0.5934319185711542, "percentage": 59.34, "elapsed_time": "0:34:51", "remaining_time": "0:23:52", "throughput": 13958.26, "total_tokens": 29191104} +{"current_steps": 9275, "total_steps": 15621, "loss": 0.4172, "lr": 8.4818032103198e-07, "epoch": 0.5937520005121311, "percentage": 59.38, "elapsed_time": "0:34:51", "remaining_time": "0:23:51", "throughput": 13961.51, "total_tokens": 29206208} +{"current_steps": 9280, "total_steps": 15621, "loss": 0.4169, "lr": 8.470760000579906e-07, "epoch": 0.5940720824531079, "percentage": 59.41, "elapsed_time": "0:34:52", "remaining_time": "0:23:49", "throughput": 13964.72, "total_tokens": 29221312} +{"current_steps": 9285, "total_steps": 15621, "loss": 0.4932, "lr": 8.459718700114108e-07, "epoch": 0.5943921643940849, "percentage": 59.44, "elapsed_time": "0:34:53", "remaining_time": "0:23:48", "throughput": 13968.08, "total_tokens": 29236800} +{"current_steps": 9290, "total_steps": 15621, "loss": 0.4521, "lr": 8.448679322707595e-07, "epoch": 0.5947122463350618, "percentage": 59.47, "elapsed_time": "0:34:53", "remaining_time": "0:23:46", "throughput": 13971.53, "total_tokens": 29252480} +{"current_steps": 9295, "total_steps": 15621, "loss": 0.5845, "lr": 8.437641882143163e-07, "epoch": 0.5950323282760387, "percentage": 59.5, "elapsed_time": "0:34:54", "remaining_time": "0:23:45", "throughput": 13974.61, "total_tokens": 29266944} +{"current_steps": 9300, "total_steps": 15621, "loss": 0.319, "lr": 8.426606392201185e-07, "epoch": 0.5953524102170156, "percentage": 59.54, "elapsed_time": "0:34:54", "remaining_time": "0:23:43", "throughput": 13978.02, "total_tokens": 29282816} +{"current_steps": 9305, "total_steps": 15621, "loss": 0.3009, "lr": 8.415572866659599e-07, "epoch": 0.5956724921579925, "percentage": 59.57, "elapsed_time": "0:34:55", "remaining_time": "0:23:42", "throughput": 13981.26, "total_tokens": 29297984} +{"current_steps": 9310, "total_steps": 15621, "loss": 0.376, "lr": 8.404541319293896e-07, "epoch": 0.5959925740989693, "percentage": 59.6, "elapsed_time": "0:34:56", "remaining_time": "0:23:40", "throughput": 13984.69, "total_tokens": 29313664} +{"current_steps": 9315, "total_steps": 15621, "loss": 0.5842, "lr": 8.393511763877086e-07, "epoch": 0.5963126560399462, "percentage": 59.63, "elapsed_time": "0:34:56", "remaining_time": "0:23:39", "throughput": 13988.14, "total_tokens": 29329472} +{"current_steps": 9320, "total_steps": 15621, "loss": 0.4463, "lr": 8.3824842141797e-07, "epoch": 0.5966327379809231, "percentage": 59.66, "elapsed_time": "0:34:57", "remaining_time": "0:23:37", "throughput": 13991.85, "total_tokens": 29346048} +{"current_steps": 9325, "total_steps": 15621, "loss": 0.3801, "lr": 8.371458683969765e-07, "epoch": 0.5969528199219, "percentage": 59.7, "elapsed_time": "0:34:57", "remaining_time": "0:23:36", "throughput": 13995.21, "total_tokens": 29361664} +{"current_steps": 9330, "total_steps": 15621, "loss": 0.3887, "lr": 8.360435187012787e-07, "epoch": 0.5972729018628768, "percentage": 59.73, "elapsed_time": "0:34:58", "remaining_time": "0:23:35", "throughput": 13998.46, "total_tokens": 29376896} +{"current_steps": 9335, "total_steps": 15621, "loss": 0.3767, "lr": 8.349413737071725e-07, "epoch": 0.5975929838038538, "percentage": 59.76, "elapsed_time": "0:34:59", "remaining_time": "0:23:33", "throughput": 14001.86, "total_tokens": 29392640} +{"current_steps": 9340, "total_steps": 15621, "loss": 0.4399, "lr": 8.338394347906994e-07, "epoch": 0.5979130657448307, "percentage": 59.79, "elapsed_time": "0:34:59", "remaining_time": "0:23:32", "throughput": 14005.07, "total_tokens": 29407808} +{"current_steps": 9345, "total_steps": 15621, "loss": 0.2995, "lr": 8.327377033276431e-07, "epoch": 0.5982331476858076, "percentage": 59.82, "elapsed_time": "0:35:00", "remaining_time": "0:23:30", "throughput": 14008.18, "total_tokens": 29422528} +{"current_steps": 9350, "total_steps": 15621, "loss": 0.3481, "lr": 8.316361806935279e-07, "epoch": 0.5985532296267845, "percentage": 59.86, "elapsed_time": "0:35:00", "remaining_time": "0:23:29", "throughput": 14011.56, "total_tokens": 29438272} +{"current_steps": 9355, "total_steps": 15621, "loss": 0.4557, "lr": 8.305348682636177e-07, "epoch": 0.5988733115677614, "percentage": 59.89, "elapsed_time": "0:35:01", "remaining_time": "0:23:27", "throughput": 14014.78, "total_tokens": 29453376} +{"current_steps": 9360, "total_steps": 15621, "loss": 0.4204, "lr": 8.294337674129144e-07, "epoch": 0.5991933935087382, "percentage": 59.92, "elapsed_time": "0:35:02", "remaining_time": "0:23:26", "throughput": 14018.26, "total_tokens": 29469248} +{"current_steps": 9365, "total_steps": 15621, "loss": 0.2783, "lr": 8.283328795161554e-07, "epoch": 0.5995134754497151, "percentage": 59.95, "elapsed_time": "0:35:02", "remaining_time": "0:23:24", "throughput": 14021.88, "total_tokens": 29485888} +{"current_steps": 9370, "total_steps": 15621, "loss": 0.3194, "lr": 8.272322059478114e-07, "epoch": 0.599833557390692, "percentage": 59.98, "elapsed_time": "0:35:03", "remaining_time": "0:23:23", "throughput": 14025.03, "total_tokens": 29500864} +{"current_steps": 9375, "total_steps": 15621, "loss": 0.2312, "lr": 8.261317480820871e-07, "epoch": 0.6001536393316689, "percentage": 60.02, "elapsed_time": "0:35:04", "remaining_time": "0:23:21", "throughput": 14028.34, "total_tokens": 29516288} +{"current_steps": 9380, "total_steps": 15621, "loss": 0.4, "lr": 8.250315072929168e-07, "epoch": 0.6004737212726458, "percentage": 60.05, "elapsed_time": "0:35:04", "remaining_time": "0:23:20", "throughput": 14031.37, "total_tokens": 29530880} +{"current_steps": 9384, "total_steps": 15621, "eval_loss": 0.3916759490966797, "epoch": 0.6007297868254273, "percentage": 60.07, "elapsed_time": "0:35:54", "remaining_time": "0:23:51", "throughput": 13714.5, "total_tokens": 29544576} +{"current_steps": 9385, "total_steps": 15621, "loss": 0.3513, "lr": 8.239314849539637e-07, "epoch": 0.6007938032136226, "percentage": 60.08, "elapsed_time": "0:36:22", "remaining_time": "0:24:10", "throughput": 13540.28, "total_tokens": 29547840} +{"current_steps": 9390, "total_steps": 15621, "loss": 0.4204, "lr": 8.228316824386193e-07, "epoch": 0.6011138851545996, "percentage": 60.11, "elapsed_time": "0:36:22", "remaining_time": "0:24:08", "throughput": 13543.81, "total_tokens": 29564096} +{"current_steps": 9395, "total_steps": 15621, "loss": 0.3633, "lr": 8.217321011199995e-07, "epoch": 0.6014339670955765, "percentage": 60.14, "elapsed_time": "0:36:23", "remaining_time": "0:24:06", "throughput": 13547.11, "total_tokens": 29579520} +{"current_steps": 9400, "total_steps": 15621, "loss": 0.4256, "lr": 8.206327423709441e-07, "epoch": 0.6017540490365534, "percentage": 60.18, "elapsed_time": "0:36:24", "remaining_time": "0:24:05", "throughput": 13550.2, "total_tokens": 29594048} +{"current_steps": 9405, "total_steps": 15621, "loss": 0.3871, "lr": 8.195336075640163e-07, "epoch": 0.6020741309775303, "percentage": 60.21, "elapsed_time": "0:36:24", "remaining_time": "0:24:03", "throughput": 13553.75, "total_tokens": 29610368} +{"current_steps": 9410, "total_steps": 15621, "loss": 0.4232, "lr": 8.184346980714984e-07, "epoch": 0.6023942129185071, "percentage": 60.24, "elapsed_time": "0:36:25", "remaining_time": "0:24:02", "throughput": 13557.05, "total_tokens": 29625792} +{"current_steps": 9415, "total_steps": 15621, "loss": 0.3399, "lr": 8.173360152653914e-07, "epoch": 0.602714294859484, "percentage": 60.27, "elapsed_time": "0:36:25", "remaining_time": "0:24:00", "throughput": 13560.6, "total_tokens": 29642240} +{"current_steps": 9420, "total_steps": 15621, "loss": 0.293, "lr": 8.162375605174143e-07, "epoch": 0.6030343768004609, "percentage": 60.3, "elapsed_time": "0:36:26", "remaining_time": "0:23:59", "throughput": 13564.03, "total_tokens": 29658176} +{"current_steps": 9425, "total_steps": 15621, "loss": 0.3118, "lr": 8.151393351990005e-07, "epoch": 0.6033544587414378, "percentage": 60.34, "elapsed_time": "0:36:27", "remaining_time": "0:23:57", "throughput": 13567.86, "total_tokens": 29675392} +{"current_steps": 9430, "total_steps": 15621, "loss": 0.4241, "lr": 8.140413406812971e-07, "epoch": 0.6036745406824147, "percentage": 60.37, "elapsed_time": "0:36:27", "remaining_time": "0:23:56", "throughput": 13570.98, "total_tokens": 29690048} +{"current_steps": 9435, "total_steps": 15621, "loss": 0.3052, "lr": 8.129435783351635e-07, "epoch": 0.6039946226233915, "percentage": 60.4, "elapsed_time": "0:36:28", "remaining_time": "0:23:54", "throughput": 13574.2, "total_tokens": 29705088} +{"current_steps": 9440, "total_steps": 15621, "loss": 0.4482, "lr": 8.118460495311685e-07, "epoch": 0.6043147045643685, "percentage": 60.43, "elapsed_time": "0:36:28", "remaining_time": "0:23:53", "throughput": 13577.52, "total_tokens": 29720576} +{"current_steps": 9445, "total_steps": 15621, "loss": 0.4204, "lr": 8.107487556395901e-07, "epoch": 0.6046347865053454, "percentage": 60.46, "elapsed_time": "0:36:29", "remaining_time": "0:23:51", "throughput": 13581.09, "total_tokens": 29736896} +{"current_steps": 9450, "total_steps": 15621, "loss": 0.3567, "lr": 8.096516980304115e-07, "epoch": 0.6049548684463223, "percentage": 60.5, "elapsed_time": "0:36:30", "remaining_time": "0:23:50", "throughput": 13584.48, "total_tokens": 29752768} +{"current_steps": 9455, "total_steps": 15621, "loss": 0.3355, "lr": 8.085548780733238e-07, "epoch": 0.6052749503872992, "percentage": 60.53, "elapsed_time": "0:36:30", "remaining_time": "0:23:48", "throughput": 13587.9, "total_tokens": 29768640} +{"current_steps": 9460, "total_steps": 15621, "loss": 0.338, "lr": 8.074582971377182e-07, "epoch": 0.605595032328276, "percentage": 60.56, "elapsed_time": "0:36:31", "remaining_time": "0:23:47", "throughput": 13591.73, "total_tokens": 29786240} +{"current_steps": 9465, "total_steps": 15621, "loss": 0.4356, "lr": 8.063619565926892e-07, "epoch": 0.6059151142692529, "percentage": 60.59, "elapsed_time": "0:36:32", "remaining_time": "0:23:45", "throughput": 13595.13, "total_tokens": 29802176} +{"current_steps": 9470, "total_steps": 15621, "loss": 0.3912, "lr": 8.052658578070313e-07, "epoch": 0.6062351962102298, "percentage": 60.62, "elapsed_time": "0:36:32", "remaining_time": "0:23:44", "throughput": 13598.42, "total_tokens": 29817600} +{"current_steps": 9475, "total_steps": 15621, "loss": 0.3313, "lr": 8.041700021492362e-07, "epoch": 0.6065552781512067, "percentage": 60.66, "elapsed_time": "0:36:33", "remaining_time": "0:23:42", "throughput": 13601.7, "total_tokens": 29832960} +{"current_steps": 9480, "total_steps": 15621, "loss": 0.2888, "lr": 8.030743909874924e-07, "epoch": 0.6068753600921836, "percentage": 60.69, "elapsed_time": "0:36:33", "remaining_time": "0:23:41", "throughput": 13604.95, "total_tokens": 29848448} +{"current_steps": 9485, "total_steps": 15621, "loss": 0.3247, "lr": 8.019790256896839e-07, "epoch": 0.6071954420331604, "percentage": 60.72, "elapsed_time": "0:36:34", "remaining_time": "0:23:39", "throughput": 13608.09, "total_tokens": 29863296} +{"current_steps": 9490, "total_steps": 15621, "loss": 0.3806, "lr": 8.008839076233871e-07, "epoch": 0.6075155239741373, "percentage": 60.75, "elapsed_time": "0:36:35", "remaining_time": "0:23:38", "throughput": 13611.75, "total_tokens": 29880128} +{"current_steps": 9495, "total_steps": 15621, "loss": 0.3618, "lr": 7.997890381558691e-07, "epoch": 0.6078356059151143, "percentage": 60.78, "elapsed_time": "0:36:35", "remaining_time": "0:23:36", "throughput": 13614.99, "total_tokens": 29895296} +{"current_steps": 9500, "total_steps": 15621, "loss": 0.4291, "lr": 7.986944186540878e-07, "epoch": 0.6081556878560912, "percentage": 60.82, "elapsed_time": "0:36:36", "remaining_time": "0:23:35", "throughput": 13618.43, "total_tokens": 29911296} +{"current_steps": 9505, "total_steps": 15621, "loss": 0.4594, "lr": 7.976000504846885e-07, "epoch": 0.6084757697970681, "percentage": 60.85, "elapsed_time": "0:36:36", "remaining_time": "0:23:33", "throughput": 13621.8, "total_tokens": 29926912} +{"current_steps": 9510, "total_steps": 15621, "loss": 0.4726, "lr": 7.965059350140024e-07, "epoch": 0.608795851738045, "percentage": 60.88, "elapsed_time": "0:36:37", "remaining_time": "0:23:32", "throughput": 13625.07, "total_tokens": 29942272} +{"current_steps": 9515, "total_steps": 15621, "loss": 0.4037, "lr": 7.954120736080461e-07, "epoch": 0.6091159336790218, "percentage": 60.91, "elapsed_time": "0:36:38", "remaining_time": "0:23:30", "throughput": 13628.37, "total_tokens": 29958016} +{"current_steps": 9520, "total_steps": 15621, "loss": 0.5797, "lr": 7.943184676325178e-07, "epoch": 0.6094360156199987, "percentage": 60.94, "elapsed_time": "0:36:38", "remaining_time": "0:23:29", "throughput": 13631.99, "total_tokens": 29974720} +{"current_steps": 9525, "total_steps": 15621, "loss": 0.4342, "lr": 7.932251184527974e-07, "epoch": 0.6097560975609756, "percentage": 60.98, "elapsed_time": "0:36:39", "remaining_time": "0:23:27", "throughput": 13635.66, "total_tokens": 29991680} +{"current_steps": 9530, "total_steps": 15621, "loss": 0.2753, "lr": 7.921320274339446e-07, "epoch": 0.6100761795019525, "percentage": 61.01, "elapsed_time": "0:36:40", "remaining_time": "0:23:26", "throughput": 13638.96, "total_tokens": 30007168} +{"current_steps": 9535, "total_steps": 15621, "loss": 0.3337, "lr": 7.910391959406966e-07, "epoch": 0.6103962614429294, "percentage": 61.04, "elapsed_time": "0:36:40", "remaining_time": "0:23:24", "throughput": 13642.23, "total_tokens": 30022656} +{"current_steps": 9540, "total_steps": 15621, "loss": 0.3943, "lr": 7.899466253374653e-07, "epoch": 0.6107163433839062, "percentage": 61.07, "elapsed_time": "0:36:41", "remaining_time": "0:23:23", "throughput": 13645.52, "total_tokens": 30038144} +{"current_steps": 9545, "total_steps": 15621, "loss": 0.3347, "lr": 7.88854316988339e-07, "epoch": 0.6110364253248832, "percentage": 61.1, "elapsed_time": "0:36:41", "remaining_time": "0:23:21", "throughput": 13649.33, "total_tokens": 30055488} +{"current_steps": 9550, "total_steps": 15621, "loss": 0.3016, "lr": 7.877622722570771e-07, "epoch": 0.6113565072658601, "percentage": 61.14, "elapsed_time": "0:36:42", "remaining_time": "0:23:20", "throughput": 13652.66, "total_tokens": 30071040} +{"current_steps": 9555, "total_steps": 15621, "loss": 0.4185, "lr": 7.866704925071101e-07, "epoch": 0.611676589206837, "percentage": 61.17, "elapsed_time": "0:36:43", "remaining_time": "0:23:18", "throughput": 13656.34, "total_tokens": 30088000} +{"current_steps": 9560, "total_steps": 15621, "loss": 0.422, "lr": 7.855789791015377e-07, "epoch": 0.6119966711478139, "percentage": 61.2, "elapsed_time": "0:36:43", "remaining_time": "0:23:17", "throughput": 13659.47, "total_tokens": 30103040} +{"current_steps": 9565, "total_steps": 15621, "loss": 0.3946, "lr": 7.844877334031277e-07, "epoch": 0.6123167530887907, "percentage": 61.23, "elapsed_time": "0:36:44", "remaining_time": "0:23:15", "throughput": 13662.56, "total_tokens": 30117760} +{"current_steps": 9570, "total_steps": 15621, "loss": 0.4797, "lr": 7.833967567743131e-07, "epoch": 0.6126368350297676, "percentage": 61.26, "elapsed_time": "0:36:45", "remaining_time": "0:23:14", "throughput": 13666.03, "total_tokens": 30133888} +{"current_steps": 9575, "total_steps": 15621, "loss": 0.3747, "lr": 7.823060505771903e-07, "epoch": 0.6129569169707445, "percentage": 61.3, "elapsed_time": "0:36:45", "remaining_time": "0:23:12", "throughput": 13669.25, "total_tokens": 30149312} +{"current_steps": 9580, "total_steps": 15621, "loss": 0.3944, "lr": 7.812156161735199e-07, "epoch": 0.6132769989117214, "percentage": 61.33, "elapsed_time": "0:36:46", "remaining_time": "0:23:11", "throughput": 13672.3, "total_tokens": 30163840} +{"current_steps": 9585, "total_steps": 15621, "loss": 0.5462, "lr": 7.801254549247215e-07, "epoch": 0.6135970808526983, "percentage": 61.36, "elapsed_time": "0:36:46", "remaining_time": "0:23:09", "throughput": 13675.91, "total_tokens": 30180544} +{"current_steps": 9590, "total_steps": 15621, "loss": 0.3212, "lr": 7.790355681918739e-07, "epoch": 0.6139171627936751, "percentage": 61.39, "elapsed_time": "0:36:47", "remaining_time": "0:23:08", "throughput": 13679.43, "total_tokens": 30197120} +{"current_steps": 9595, "total_steps": 15621, "loss": 0.421, "lr": 7.779459573357144e-07, "epoch": 0.614237244734652, "percentage": 61.42, "elapsed_time": "0:36:48", "remaining_time": "0:23:06", "throughput": 13682.87, "total_tokens": 30213376} +{"current_steps": 9600, "total_steps": 15621, "loss": 0.4225, "lr": 7.768566237166338e-07, "epoch": 0.614557326675629, "percentage": 61.46, "elapsed_time": "0:36:48", "remaining_time": "0:23:05", "throughput": 13686.24, "total_tokens": 30229120} +{"current_steps": 9605, "total_steps": 15621, "loss": 0.5064, "lr": 7.757675686946786e-07, "epoch": 0.6148774086166059, "percentage": 61.49, "elapsed_time": "0:36:49", "remaining_time": "0:23:03", "throughput": 13689.47, "total_tokens": 30244544} +{"current_steps": 9610, "total_steps": 15621, "loss": 0.4207, "lr": 7.746787936295468e-07, "epoch": 0.6151974905575828, "percentage": 61.52, "elapsed_time": "0:36:49", "remaining_time": "0:23:02", "throughput": 13692.94, "total_tokens": 30260864} +{"current_steps": 9615, "total_steps": 15621, "loss": 0.3739, "lr": 7.735902998805868e-07, "epoch": 0.6155175724985597, "percentage": 61.55, "elapsed_time": "0:36:50", "remaining_time": "0:23:00", "throughput": 13695.93, "total_tokens": 30275456} +{"current_steps": 9620, "total_steps": 15621, "loss": 0.4195, "lr": 7.725020888067955e-07, "epoch": 0.6158376544395365, "percentage": 61.58, "elapsed_time": "0:36:51", "remaining_time": "0:22:59", "throughput": 13699.19, "total_tokens": 30291008} +{"current_steps": 9625, "total_steps": 15621, "loss": 0.4814, "lr": 7.714141617668176e-07, "epoch": 0.6161577363805134, "percentage": 61.62, "elapsed_time": "0:36:51", "remaining_time": "0:22:57", "throughput": 13702.53, "total_tokens": 30306816} +{"current_steps": 9630, "total_steps": 15621, "loss": 0.3298, "lr": 7.703265201189426e-07, "epoch": 0.6164778183214903, "percentage": 61.65, "elapsed_time": "0:36:52", "remaining_time": "0:22:56", "throughput": 13705.71, "total_tokens": 30322240} +{"current_steps": 9635, "total_steps": 15621, "loss": 0.3357, "lr": 7.692391652211036e-07, "epoch": 0.6167979002624672, "percentage": 61.68, "elapsed_time": "0:36:53", "remaining_time": "0:22:54", "throughput": 13709.01, "total_tokens": 30338048} +{"current_steps": 9640, "total_steps": 15621, "loss": 0.3313, "lr": 7.681520984308769e-07, "epoch": 0.617117982203444, "percentage": 61.71, "elapsed_time": "0:36:53", "remaining_time": "0:22:53", "throughput": 13712.37, "total_tokens": 30353984} +{"current_steps": 9645, "total_steps": 15621, "loss": 0.4902, "lr": 7.670653211054772e-07, "epoch": 0.6174380641444209, "percentage": 61.74, "elapsed_time": "0:36:54", "remaining_time": "0:22:51", "throughput": 13715.71, "total_tokens": 30370048} +{"current_steps": 9650, "total_steps": 15621, "loss": 0.413, "lr": 7.659788346017591e-07, "epoch": 0.6177581460853978, "percentage": 61.78, "elapsed_time": "0:36:54", "remaining_time": "0:22:50", "throughput": 13718.88, "total_tokens": 30385344} +{"current_steps": 9655, "total_steps": 15621, "loss": 0.3813, "lr": 7.648926402762133e-07, "epoch": 0.6180782280263748, "percentage": 61.81, "elapsed_time": "0:36:55", "remaining_time": "0:22:48", "throughput": 13722.01, "total_tokens": 30400576} +{"current_steps": 9660, "total_steps": 15621, "loss": 0.3867, "lr": 7.638067394849671e-07, "epoch": 0.6183983099673517, "percentage": 61.84, "elapsed_time": "0:36:56", "remaining_time": "0:22:47", "throughput": 13725.1, "total_tokens": 30415424} +{"current_steps": 9665, "total_steps": 15621, "loss": 0.4056, "lr": 7.627211335837797e-07, "epoch": 0.6187183919083286, "percentage": 61.87, "elapsed_time": "0:36:56", "remaining_time": "0:22:45", "throughput": 13728.24, "total_tokens": 30430592} +{"current_steps": 9670, "total_steps": 15621, "loss": 0.4352, "lr": 7.616358239280427e-07, "epoch": 0.6190384738493054, "percentage": 61.9, "elapsed_time": "0:36:57", "remaining_time": "0:22:44", "throughput": 13731.41, "total_tokens": 30445952} +{"current_steps": 9675, "total_steps": 15621, "loss": 0.3274, "lr": 7.605508118727787e-07, "epoch": 0.6193585557902823, "percentage": 61.94, "elapsed_time": "0:36:57", "remaining_time": "0:22:43", "throughput": 13734.67, "total_tokens": 30461568} +{"current_steps": 9680, "total_steps": 15621, "loss": 0.3611, "lr": 7.594660987726373e-07, "epoch": 0.6196786377312592, "percentage": 61.97, "elapsed_time": "0:36:58", "remaining_time": "0:22:41", "throughput": 13737.77, "total_tokens": 30476672} +{"current_steps": 9685, "total_steps": 15621, "loss": 0.4013, "lr": 7.583816859818956e-07, "epoch": 0.6199987196722361, "percentage": 62.0, "elapsed_time": "0:36:59", "remaining_time": "0:22:40", "throughput": 13741.07, "total_tokens": 30492672} +{"current_steps": 9690, "total_steps": 15621, "loss": 0.3785, "lr": 7.57297574854456e-07, "epoch": 0.620318801613213, "percentage": 62.03, "elapsed_time": "0:36:59", "remaining_time": "0:22:38", "throughput": 13744.15, "total_tokens": 30507712} +{"current_steps": 9695, "total_steps": 15621, "loss": 0.4395, "lr": 7.56213766743844e-07, "epoch": 0.6206388835541898, "percentage": 62.06, "elapsed_time": "0:37:00", "remaining_time": "0:22:37", "throughput": 13747.61, "total_tokens": 30524032} +{"current_steps": 9700, "total_steps": 15621, "loss": 0.333, "lr": 7.551302630032064e-07, "epoch": 0.6209589654951667, "percentage": 62.1, "elapsed_time": "0:37:00", "remaining_time": "0:22:35", "throughput": 13750.87, "total_tokens": 30539776} +{"current_steps": 9705, "total_steps": 15621, "loss": 0.3693, "lr": 7.540470649853106e-07, "epoch": 0.6212790474361437, "percentage": 62.13, "elapsed_time": "0:37:01", "remaining_time": "0:22:34", "throughput": 13753.97, "total_tokens": 30554752} +{"current_steps": 9710, "total_steps": 15621, "loss": 0.4034, "lr": 7.529641740425419e-07, "epoch": 0.6215991293771206, "percentage": 62.16, "elapsed_time": "0:37:02", "remaining_time": "0:22:32", "throughput": 13757.64, "total_tokens": 30571968} +{"current_steps": 9715, "total_steps": 15621, "loss": 0.4351, "lr": 7.518815915269023e-07, "epoch": 0.6219192113180975, "percentage": 62.19, "elapsed_time": "0:37:02", "remaining_time": "0:22:31", "throughput": 13760.8, "total_tokens": 30587264} +{"current_steps": 9720, "total_steps": 15621, "loss": 0.3948, "lr": 7.507993187900092e-07, "epoch": 0.6222392932590743, "percentage": 62.22, "elapsed_time": "0:37:03", "remaining_time": "0:22:29", "throughput": 13764.11, "total_tokens": 30603200} +{"current_steps": 9725, "total_steps": 15621, "loss": 0.4253, "lr": 7.497173571830926e-07, "epoch": 0.6225593752000512, "percentage": 62.26, "elapsed_time": "0:37:03", "remaining_time": "0:22:28", "throughput": 13767.09, "total_tokens": 30617856} +{"current_steps": 9730, "total_steps": 15621, "loss": 0.4732, "lr": 7.486357080569938e-07, "epoch": 0.6228794571410281, "percentage": 62.29, "elapsed_time": "0:37:04", "remaining_time": "0:22:26", "throughput": 13770.04, "total_tokens": 30632448} +{"current_steps": 9735, "total_steps": 15621, "loss": 0.3747, "lr": 7.47554372762165e-07, "epoch": 0.623199539082005, "percentage": 62.32, "elapsed_time": "0:37:05", "remaining_time": "0:22:25", "throughput": 13773.18, "total_tokens": 30647680} +{"current_steps": 9740, "total_steps": 15621, "loss": 0.4905, "lr": 7.464733526486662e-07, "epoch": 0.6235196210229819, "percentage": 62.35, "elapsed_time": "0:37:05", "remaining_time": "0:22:23", "throughput": 13776.49, "total_tokens": 30663616} +{"current_steps": 9745, "total_steps": 15621, "loss": 0.3424, "lr": 7.453926490661628e-07, "epoch": 0.6238397029639587, "percentage": 62.38, "elapsed_time": "0:37:06", "remaining_time": "0:22:22", "throughput": 13780.62, "total_tokens": 30682496} +{"current_steps": 9750, "total_steps": 15621, "loss": 0.3639, "lr": 7.443122633639267e-07, "epoch": 0.6241597849049356, "percentage": 62.42, "elapsed_time": "0:37:07", "remaining_time": "0:22:21", "throughput": 13783.73, "total_tokens": 30697664} +{"current_steps": 9755, "total_steps": 15621, "loss": 0.3835, "lr": 7.432321968908319e-07, "epoch": 0.6244798668459125, "percentage": 62.45, "elapsed_time": "0:37:07", "remaining_time": "0:22:19", "throughput": 13786.99, "total_tokens": 30713408} +{"current_steps": 9760, "total_steps": 15621, "loss": 0.3173, "lr": 7.421524509953543e-07, "epoch": 0.6247999487868895, "percentage": 62.48, "elapsed_time": "0:37:08", "remaining_time": "0:22:18", "throughput": 13790.64, "total_tokens": 30730496} +{"current_steps": 9765, "total_steps": 15621, "loss": 0.4158, "lr": 7.410730270255687e-07, "epoch": 0.6251200307278664, "percentage": 62.51, "elapsed_time": "0:37:08", "remaining_time": "0:22:16", "throughput": 13793.76, "total_tokens": 30745664} +{"current_steps": 9770, "total_steps": 15621, "loss": 0.3655, "lr": 7.399939263291493e-07, "epoch": 0.6254401126688433, "percentage": 62.54, "elapsed_time": "0:37:09", "remaining_time": "0:22:15", "throughput": 13796.92, "total_tokens": 30760960} +{"current_steps": 9775, "total_steps": 15621, "loss": 0.4854, "lr": 7.389151502533657e-07, "epoch": 0.6257601946098201, "percentage": 62.58, "elapsed_time": "0:37:10", "remaining_time": "0:22:13", "throughput": 13799.91, "total_tokens": 30775872} +{"current_steps": 9780, "total_steps": 15621, "loss": 0.3683, "lr": 7.378367001450819e-07, "epoch": 0.626080276550797, "percentage": 62.61, "elapsed_time": "0:37:10", "remaining_time": "0:22:12", "throughput": 13803.15, "total_tokens": 30791424} +{"current_steps": 9785, "total_steps": 15621, "loss": 0.4317, "lr": 7.367585773507567e-07, "epoch": 0.6264003584917739, "percentage": 62.64, "elapsed_time": "0:37:11", "remaining_time": "0:22:10", "throughput": 13806.54, "total_tokens": 30807680} +{"current_steps": 9790, "total_steps": 15621, "loss": 0.4428, "lr": 7.356807832164385e-07, "epoch": 0.6267204404327508, "percentage": 62.67, "elapsed_time": "0:37:12", "remaining_time": "0:22:09", "throughput": 13809.88, "total_tokens": 30823680} +{"current_steps": 9795, "total_steps": 15621, "loss": 0.4404, "lr": 7.346033190877654e-07, "epoch": 0.6270405223737276, "percentage": 62.7, "elapsed_time": "0:37:12", "remaining_time": "0:22:07", "throughput": 13813.11, "total_tokens": 30839360} +{"current_steps": 9800, "total_steps": 15621, "loss": 0.3596, "lr": 7.335261863099651e-07, "epoch": 0.6273606043147045, "percentage": 62.74, "elapsed_time": "0:37:13", "remaining_time": "0:22:06", "throughput": 13816.29, "total_tokens": 30854784} +{"current_steps": 9805, "total_steps": 15621, "loss": 0.3969, "lr": 7.324493862278498e-07, "epoch": 0.6276806862556814, "percentage": 62.77, "elapsed_time": "0:37:13", "remaining_time": "0:22:05", "throughput": 13819.58, "total_tokens": 30870592} +{"current_steps": 9810, "total_steps": 15621, "loss": 0.4546, "lr": 7.313729201858167e-07, "epoch": 0.6280007681966584, "percentage": 62.8, "elapsed_time": "0:37:14", "remaining_time": "0:22:03", "throughput": 13822.75, "total_tokens": 30885952} +{"current_steps": 9815, "total_steps": 15621, "loss": 0.3285, "lr": 7.302967895278473e-07, "epoch": 0.6283208501376353, "percentage": 62.83, "elapsed_time": "0:37:15", "remaining_time": "0:22:02", "throughput": 13826.04, "total_tokens": 30902080} +{"current_steps": 9820, "total_steps": 15621, "loss": 0.4045, "lr": 7.292209955975028e-07, "epoch": 0.6286409320786122, "percentage": 62.86, "elapsed_time": "0:37:15", "remaining_time": "0:22:00", "throughput": 13829.69, "total_tokens": 30919232} +{"current_steps": 9825, "total_steps": 15621, "loss": 0.4068, "lr": 7.281455397379244e-07, "epoch": 0.628961014019589, "percentage": 62.9, "elapsed_time": "0:37:16", "remaining_time": "0:21:59", "throughput": 13833.37, "total_tokens": 30936448} +{"current_steps": 9830, "total_steps": 15621, "loss": 0.3249, "lr": 7.270704232918316e-07, "epoch": 0.6292810959605659, "percentage": 62.93, "elapsed_time": "0:37:16", "remaining_time": "0:21:57", "throughput": 13836.65, "total_tokens": 30952256} +{"current_steps": 9835, "total_steps": 15621, "loss": 0.401, "lr": 7.2599564760152e-07, "epoch": 0.6296011779015428, "percentage": 62.96, "elapsed_time": "0:37:17", "remaining_time": "0:21:56", "throughput": 13839.71, "total_tokens": 30967360} +{"current_steps": 9840, "total_steps": 15621, "loss": 0.3851, "lr": 7.249212140088592e-07, "epoch": 0.6299212598425197, "percentage": 62.99, "elapsed_time": "0:37:18", "remaining_time": "0:21:54", "throughput": 13842.64, "total_tokens": 30982016} +{"current_steps": 9845, "total_steps": 15621, "loss": 0.3347, "lr": 7.23847123855293e-07, "epoch": 0.6302413417834966, "percentage": 63.02, "elapsed_time": "0:37:18", "remaining_time": "0:21:53", "throughput": 13845.97, "total_tokens": 30998080} +{"current_steps": 9850, "total_steps": 15621, "loss": 0.274, "lr": 7.227733784818349e-07, "epoch": 0.6305614237244734, "percentage": 63.06, "elapsed_time": "0:37:19", "remaining_time": "0:21:52", "throughput": 13849.04, "total_tokens": 31013184} +{"current_steps": 9855, "total_steps": 15621, "loss": 0.3758, "lr": 7.216999792290683e-07, "epoch": 0.6308815056654503, "percentage": 63.09, "elapsed_time": "0:37:19", "remaining_time": "0:21:50", "throughput": 13852.24, "total_tokens": 31028800} +{"current_steps": 9860, "total_steps": 15621, "loss": 0.4837, "lr": 7.206269274371457e-07, "epoch": 0.6312015876064272, "percentage": 63.12, "elapsed_time": "0:37:20", "remaining_time": "0:21:49", "throughput": 13855.53, "total_tokens": 31044736} +{"current_steps": 9865, "total_steps": 15621, "loss": 0.3489, "lr": 7.195542244457845e-07, "epoch": 0.6315216695474042, "percentage": 63.15, "elapsed_time": "0:37:21", "remaining_time": "0:21:47", "throughput": 13858.64, "total_tokens": 31059968} +{"current_steps": 9870, "total_steps": 15621, "loss": 0.3215, "lr": 7.184818715942666e-07, "epoch": 0.6318417514883811, "percentage": 63.18, "elapsed_time": "0:37:21", "remaining_time": "0:21:46", "throughput": 13861.62, "total_tokens": 31074880} +{"current_steps": 9875, "total_steps": 15621, "loss": 0.3499, "lr": 7.174098702214374e-07, "epoch": 0.6321618334293579, "percentage": 63.22, "elapsed_time": "0:37:22", "remaining_time": "0:21:44", "throughput": 13864.8, "total_tokens": 31090432} +{"current_steps": 9880, "total_steps": 15621, "loss": 0.372, "lr": 7.163382216657033e-07, "epoch": 0.6324819153703348, "percentage": 63.25, "elapsed_time": "0:37:23", "remaining_time": "0:21:43", "throughput": 13868.26, "total_tokens": 31107264} +{"current_steps": 9885, "total_steps": 15621, "loss": 0.3531, "lr": 7.152669272650302e-07, "epoch": 0.6328019973113117, "percentage": 63.28, "elapsed_time": "0:37:23", "remaining_time": "0:21:41", "throughput": 13871.75, "total_tokens": 31124096} +{"current_steps": 9890, "total_steps": 15621, "loss": 0.3881, "lr": 7.141959883569411e-07, "epoch": 0.6331220792522886, "percentage": 63.31, "elapsed_time": "0:37:24", "remaining_time": "0:21:40", "throughput": 13874.7, "total_tokens": 31138752} +{"current_steps": 9895, "total_steps": 15621, "loss": 0.4624, "lr": 7.131254062785165e-07, "epoch": 0.6334421611932655, "percentage": 63.34, "elapsed_time": "0:37:24", "remaining_time": "0:21:39", "throughput": 13877.78, "total_tokens": 31154048} +{"current_steps": 9900, "total_steps": 15621, "loss": 0.5159, "lr": 7.120551823663907e-07, "epoch": 0.6337622431342423, "percentage": 63.38, "elapsed_time": "0:37:25", "remaining_time": "0:21:37", "throughput": 13881.18, "total_tokens": 31170304} +{"current_steps": 9905, "total_steps": 15621, "loss": 0.2778, "lr": 7.109853179567499e-07, "epoch": 0.6340823250752192, "percentage": 63.41, "elapsed_time": "0:37:26", "remaining_time": "0:21:36", "throughput": 13884.46, "total_tokens": 31186368} +{"current_steps": 9910, "total_steps": 15621, "loss": 0.4266, "lr": 7.099158143853337e-07, "epoch": 0.6344024070161961, "percentage": 63.44, "elapsed_time": "0:37:26", "remaining_time": "0:21:34", "throughput": 13887.59, "total_tokens": 31201664} +{"current_steps": 9915, "total_steps": 15621, "loss": 0.396, "lr": 7.088466729874289e-07, "epoch": 0.634722488957173, "percentage": 63.47, "elapsed_time": "0:37:27", "remaining_time": "0:21:33", "throughput": 13890.74, "total_tokens": 31217216} +{"current_steps": 9920, "total_steps": 15621, "loss": 0.3762, "lr": 7.077778950978713e-07, "epoch": 0.63504257089815, "percentage": 63.5, "elapsed_time": "0:37:27", "remaining_time": "0:21:31", "throughput": 13894.19, "total_tokens": 31233728} +{"current_steps": 9925, "total_steps": 15621, "loss": 0.4657, "lr": 7.06709482051043e-07, "epoch": 0.6353626528391269, "percentage": 63.54, "elapsed_time": "0:37:28", "remaining_time": "0:21:30", "throughput": 13897.46, "total_tokens": 31249664} +{"current_steps": 9930, "total_steps": 15621, "loss": 0.2958, "lr": 7.056414351808698e-07, "epoch": 0.6356827347801037, "percentage": 63.57, "elapsed_time": "0:37:29", "remaining_time": "0:21:29", "throughput": 13900.67, "total_tokens": 31265408} +{"current_steps": 9935, "total_steps": 15621, "loss": 0.3557, "lr": 7.045737558208206e-07, "epoch": 0.6360028167210806, "percentage": 63.6, "elapsed_time": "0:37:29", "remaining_time": "0:21:27", "throughput": 13903.85, "total_tokens": 31281088} +{"current_steps": 9940, "total_steps": 15621, "loss": 0.4025, "lr": 7.035064453039064e-07, "epoch": 0.6363228986620575, "percentage": 63.63, "elapsed_time": "0:37:30", "remaining_time": "0:21:26", "throughput": 13906.94, "total_tokens": 31296512} +{"current_steps": 9945, "total_steps": 15621, "loss": 0.3796, "lr": 7.024395049626766e-07, "epoch": 0.6366429806030344, "percentage": 63.66, "elapsed_time": "0:37:31", "remaining_time": "0:21:24", "throughput": 13910.11, "total_tokens": 31312000} +{"current_steps": 9950, "total_steps": 15621, "loss": 0.3378, "lr": 7.013729361292182e-07, "epoch": 0.6369630625440112, "percentage": 63.7, "elapsed_time": "0:37:31", "remaining_time": "0:21:23", "throughput": 13913.2, "total_tokens": 31327488} +{"current_steps": 9955, "total_steps": 15621, "loss": 0.2992, "lr": 7.003067401351554e-07, "epoch": 0.6372831444849881, "percentage": 63.73, "elapsed_time": "0:37:32", "remaining_time": "0:21:21", "throughput": 13916.59, "total_tokens": 31343936} +{"current_steps": 9960, "total_steps": 15621, "loss": 0.3971, "lr": 6.992409183116465e-07, "epoch": 0.637603226425965, "percentage": 63.76, "elapsed_time": "0:37:32", "remaining_time": "0:21:20", "throughput": 13919.66, "total_tokens": 31359232} +{"current_steps": 9965, "total_steps": 15621, "loss": 0.3715, "lr": 6.981754719893826e-07, "epoch": 0.6379233083669419, "percentage": 63.79, "elapsed_time": "0:37:33", "remaining_time": "0:21:19", "throughput": 13923.03, "total_tokens": 31375616} +{"current_steps": 9970, "total_steps": 15621, "loss": 0.4687, "lr": 6.971104024985852e-07, "epoch": 0.6382433903079189, "percentage": 63.82, "elapsed_time": "0:37:34", "remaining_time": "0:21:17", "throughput": 13926.32, "total_tokens": 31391680} +{"current_steps": 9975, "total_steps": 15621, "loss": 0.3829, "lr": 6.960457111690068e-07, "epoch": 0.6385634722488958, "percentage": 63.86, "elapsed_time": "0:37:34", "remaining_time": "0:21:16", "throughput": 13929.5, "total_tokens": 31407424} +{"current_steps": 9980, "total_steps": 15621, "loss": 0.3854, "lr": 6.94981399329927e-07, "epoch": 0.6388835541898726, "percentage": 63.89, "elapsed_time": "0:37:35", "remaining_time": "0:21:14", "throughput": 13932.62, "total_tokens": 31422912} +{"current_steps": 9985, "total_steps": 15621, "loss": 0.3806, "lr": 6.939174683101509e-07, "epoch": 0.6392036361308495, "percentage": 63.92, "elapsed_time": "0:37:35", "remaining_time": "0:21:13", "throughput": 13935.9, "total_tokens": 31438912} +{"current_steps": 9990, "total_steps": 15621, "loss": 0.2888, "lr": 6.9285391943801e-07, "epoch": 0.6395237180718264, "percentage": 63.95, "elapsed_time": "0:37:36", "remaining_time": "0:21:11", "throughput": 13939.24, "total_tokens": 31455168} +{"current_steps": 9995, "total_steps": 15621, "loss": 0.32, "lr": 6.917907540413569e-07, "epoch": 0.6398438000128033, "percentage": 63.98, "elapsed_time": "0:37:37", "remaining_time": "0:21:10", "throughput": 13942.36, "total_tokens": 31470592} +{"current_steps": 10000, "total_steps": 15621, "loss": 0.3466, "lr": 6.907279734475659e-07, "epoch": 0.6401638819537802, "percentage": 64.02, "elapsed_time": "0:37:37", "remaining_time": "0:21:09", "throughput": 13945.31, "total_tokens": 31485632} +{"current_steps": 10005, "total_steps": 15621, "loss": 0.353, "lr": 6.896655789835317e-07, "epoch": 0.640483963894757, "percentage": 64.05, "elapsed_time": "0:37:38", "remaining_time": "0:21:07", "throughput": 13948.25, "total_tokens": 31500352} +{"current_steps": 10010, "total_steps": 15621, "loss": 0.365, "lr": 6.886035719756656e-07, "epoch": 0.6408040458357339, "percentage": 64.08, "elapsed_time": "0:37:39", "remaining_time": "0:21:06", "throughput": 13951.65, "total_tokens": 31516928} +{"current_steps": 10015, "total_steps": 15621, "loss": 0.272, "lr": 6.875419537498959e-07, "epoch": 0.6411241277767108, "percentage": 64.11, "elapsed_time": "0:37:39", "remaining_time": "0:21:04", "throughput": 13954.78, "total_tokens": 31532608} +{"current_steps": 10020, "total_steps": 15621, "loss": 0.5903, "lr": 6.864807256316658e-07, "epoch": 0.6414442097176877, "percentage": 64.14, "elapsed_time": "0:37:40", "remaining_time": "0:21:03", "throughput": 13958.08, "total_tokens": 31548608} +{"current_steps": 10025, "total_steps": 15621, "loss": 0.4124, "lr": 6.854198889459311e-07, "epoch": 0.6417642916586647, "percentage": 64.18, "elapsed_time": "0:37:40", "remaining_time": "0:21:02", "throughput": 13961.22, "total_tokens": 31564224} +{"current_steps": 10030, "total_steps": 15621, "loss": 0.2575, "lr": 6.84359445017158e-07, "epoch": 0.6420843735996415, "percentage": 64.21, "elapsed_time": "0:37:41", "remaining_time": "0:21:00", "throughput": 13964.19, "total_tokens": 31579200} +{"current_steps": 10035, "total_steps": 15621, "loss": 0.4146, "lr": 6.832993951693244e-07, "epoch": 0.6424044555406184, "percentage": 64.24, "elapsed_time": "0:37:42", "remaining_time": "0:20:59", "throughput": 13967.32, "total_tokens": 31594816} +{"current_steps": 10040, "total_steps": 15621, "loss": 0.3439, "lr": 6.822397407259144e-07, "epoch": 0.6427245374815953, "percentage": 64.27, "elapsed_time": "0:37:42", "remaining_time": "0:20:57", "throughput": 13970.47, "total_tokens": 31610432} +{"current_steps": 10045, "total_steps": 15621, "loss": 0.3688, "lr": 6.811804830099186e-07, "epoch": 0.6430446194225722, "percentage": 64.3, "elapsed_time": "0:37:43", "remaining_time": "0:20:56", "throughput": 13974.03, "total_tokens": 31627520} +{"current_steps": 10050, "total_steps": 15621, "loss": 0.3446, "lr": 6.801216233438336e-07, "epoch": 0.6433647013635491, "percentage": 64.34, "elapsed_time": "0:37:43", "remaining_time": "0:20:54", "throughput": 13977.51, "total_tokens": 31644352} +{"current_steps": 10055, "total_steps": 15621, "loss": 0.3831, "lr": 6.790631630496575e-07, "epoch": 0.6436847833045259, "percentage": 64.37, "elapsed_time": "0:37:44", "remaining_time": "0:20:53", "throughput": 13980.66, "total_tokens": 31660160} +{"current_steps": 10060, "total_steps": 15621, "loss": 0.4395, "lr": 6.780051034488903e-07, "epoch": 0.6440048652455028, "percentage": 64.4, "elapsed_time": "0:37:45", "remaining_time": "0:20:52", "throughput": 13983.96, "total_tokens": 31676352} +{"current_steps": 10065, "total_steps": 15621, "loss": 0.3439, "lr": 6.769474458625323e-07, "epoch": 0.6443249471864797, "percentage": 64.43, "elapsed_time": "0:37:45", "remaining_time": "0:20:50", "throughput": 13987.17, "total_tokens": 31692160} +{"current_steps": 10070, "total_steps": 15621, "loss": 0.3099, "lr": 6.758901916110813e-07, "epoch": 0.6446450291274566, "percentage": 64.46, "elapsed_time": "0:37:46", "remaining_time": "0:20:49", "throughput": 13990.27, "total_tokens": 31707712} +{"current_steps": 10075, "total_steps": 15621, "loss": 0.3246, "lr": 6.748333420145315e-07, "epoch": 0.6449651110684336, "percentage": 64.5, "elapsed_time": "0:37:47", "remaining_time": "0:20:47", "throughput": 13993.53, "total_tokens": 31723776} +{"current_steps": 10080, "total_steps": 15621, "loss": 0.3972, "lr": 6.737768983923718e-07, "epoch": 0.6452851930094105, "percentage": 64.53, "elapsed_time": "0:37:47", "remaining_time": "0:20:46", "throughput": 13997.01, "total_tokens": 31740672} +{"current_steps": 10085, "total_steps": 15621, "loss": 0.2989, "lr": 6.727208620635849e-07, "epoch": 0.6456052749503873, "percentage": 64.56, "elapsed_time": "0:37:48", "remaining_time": "0:20:45", "throughput": 13999.99, "total_tokens": 31755648} +{"current_steps": 10090, "total_steps": 15621, "loss": 0.4543, "lr": 6.716652343466446e-07, "epoch": 0.6459253568913642, "percentage": 64.59, "elapsed_time": "0:37:48", "remaining_time": "0:20:43", "throughput": 14002.97, "total_tokens": 31770624} +{"current_steps": 10095, "total_steps": 15621, "loss": 0.3094, "lr": 6.706100165595139e-07, "epoch": 0.6462454388323411, "percentage": 64.62, "elapsed_time": "0:37:49", "remaining_time": "0:20:42", "throughput": 14006.25, "total_tokens": 31786816} +{"current_steps": 10100, "total_steps": 15621, "loss": 0.396, "lr": 6.695552100196452e-07, "epoch": 0.646565520773318, "percentage": 64.66, "elapsed_time": "0:37:50", "remaining_time": "0:20:40", "throughput": 14009.22, "total_tokens": 31801792} +{"current_steps": 10105, "total_steps": 15621, "loss": 0.5142, "lr": 6.685008160439769e-07, "epoch": 0.6468856027142948, "percentage": 64.69, "elapsed_time": "0:37:50", "remaining_time": "0:20:39", "throughput": 14012.74, "total_tokens": 31818944} +{"current_steps": 10110, "total_steps": 15621, "loss": 0.4128, "lr": 6.674468359489313e-07, "epoch": 0.6472056846552717, "percentage": 64.72, "elapsed_time": "0:37:51", "remaining_time": "0:20:38", "throughput": 14015.74, "total_tokens": 31834176} +{"current_steps": 10115, "total_steps": 15621, "loss": 0.3496, "lr": 6.663932710504163e-07, "epoch": 0.6475257665962486, "percentage": 64.75, "elapsed_time": "0:37:51", "remaining_time": "0:20:36", "throughput": 14018.95, "total_tokens": 31850176} +{"current_steps": 10120, "total_steps": 15621, "loss": 0.3894, "lr": 6.653401226638192e-07, "epoch": 0.6478458485372255, "percentage": 64.78, "elapsed_time": "0:37:52", "remaining_time": "0:20:35", "throughput": 14022.07, "total_tokens": 31865600} +{"current_steps": 10125, "total_steps": 15621, "loss": 0.3921, "lr": 6.64287392104008e-07, "epoch": 0.6481659304782024, "percentage": 64.82, "elapsed_time": "0:37:53", "remaining_time": "0:20:33", "throughput": 14025.01, "total_tokens": 31880512} +{"current_steps": 10130, "total_steps": 15621, "loss": 0.4388, "lr": 6.632350806853299e-07, "epoch": 0.6484860124191794, "percentage": 64.85, "elapsed_time": "0:37:53", "remaining_time": "0:20:32", "throughput": 14028.21, "total_tokens": 31896512} +{"current_steps": 10135, "total_steps": 15621, "loss": 0.4029, "lr": 6.621831897216074e-07, "epoch": 0.6488060943601562, "percentage": 64.88, "elapsed_time": "0:37:54", "remaining_time": "0:20:31", "throughput": 14031.47, "total_tokens": 31912768} +{"current_steps": 10140, "total_steps": 15621, "loss": 0.4345, "lr": 6.611317205261387e-07, "epoch": 0.6491261763011331, "percentage": 64.91, "elapsed_time": "0:37:54", "remaining_time": "0:20:29", "throughput": 14034.34, "total_tokens": 31927488} +{"current_steps": 10145, "total_steps": 15621, "loss": 0.3416, "lr": 6.60080674411696e-07, "epoch": 0.64944625824211, "percentage": 64.94, "elapsed_time": "0:37:55", "remaining_time": "0:20:28", "throughput": 14037.38, "total_tokens": 31942784} +{"current_steps": 10150, "total_steps": 15621, "loss": 0.3172, "lr": 6.590300526905225e-07, "epoch": 0.6497663401830869, "percentage": 64.98, "elapsed_time": "0:37:56", "remaining_time": "0:20:26", "throughput": 14040.49, "total_tokens": 31958528} +{"current_steps": 10155, "total_steps": 15621, "loss": 0.4676, "lr": 6.579798566743313e-07, "epoch": 0.6500864221240638, "percentage": 65.01, "elapsed_time": "0:37:56", "remaining_time": "0:20:25", "throughput": 14043.54, "total_tokens": 31974016} +{"current_steps": 10160, "total_steps": 15621, "loss": 0.3143, "lr": 6.569300876743049e-07, "epoch": 0.6504065040650406, "percentage": 65.04, "elapsed_time": "0:37:57", "remaining_time": "0:20:24", "throughput": 14046.93, "total_tokens": 31990720} +{"current_steps": 10165, "total_steps": 15621, "loss": 0.3188, "lr": 6.558807470010923e-07, "epoch": 0.6507265860060175, "percentage": 65.07, "elapsed_time": "0:37:58", "remaining_time": "0:20:22", "throughput": 14050.28, "total_tokens": 32007168} +{"current_steps": 10166, "total_steps": 15621, "eval_loss": 0.37842774391174316, "epoch": 0.6507906023942129, "percentage": 65.08, "elapsed_time": "0:38:47", "remaining_time": "0:20:48", "throughput": 13754.12, "total_tokens": 32010176} +{"current_steps": 10170, "total_steps": 15621, "loss": 0.3642, "lr": 6.548318359648071e-07, "epoch": 0.6510466679469944, "percentage": 65.1, "elapsed_time": "0:39:12", "remaining_time": "0:21:01", "throughput": 13609.15, "total_tokens": 32022208} +{"current_steps": 10175, "total_steps": 15621, "loss": 0.3967, "lr": 6.537833558750279e-07, "epoch": 0.6513667498879713, "percentage": 65.14, "elapsed_time": "0:39:13", "remaining_time": "0:20:59", "throughput": 13612.3, "total_tokens": 32037760} +{"current_steps": 10180, "total_steps": 15621, "loss": 0.3055, "lr": 6.527353080407938e-07, "epoch": 0.6516868318289483, "percentage": 65.17, "elapsed_time": "0:39:14", "remaining_time": "0:20:58", "throughput": 13615.29, "total_tokens": 32052800} +{"current_steps": 10185, "total_steps": 15621, "loss": 0.3366, "lr": 6.516876937706048e-07, "epoch": 0.6520069137699251, "percentage": 65.2, "elapsed_time": "0:39:14", "remaining_time": "0:20:56", "throughput": 13618.36, "total_tokens": 32068288} +{"current_steps": 10190, "total_steps": 15621, "loss": 0.3758, "lr": 6.506405143724196e-07, "epoch": 0.652326995710902, "percentage": 65.23, "elapsed_time": "0:39:15", "remaining_time": "0:20:55", "throughput": 13621.26, "total_tokens": 32083200} +{"current_steps": 10195, "total_steps": 15621, "loss": 0.4635, "lr": 6.495937711536546e-07, "epoch": 0.6526470776518789, "percentage": 65.26, "elapsed_time": "0:39:15", "remaining_time": "0:20:53", "throughput": 13624.3, "total_tokens": 32098432} +{"current_steps": 10200, "total_steps": 15621, "loss": 0.4226, "lr": 6.485474654211803e-07, "epoch": 0.6529671595928558, "percentage": 65.3, "elapsed_time": "0:39:16", "remaining_time": "0:20:52", "throughput": 13627.67, "total_tokens": 32114944} +{"current_steps": 10205, "total_steps": 15621, "loss": 0.3044, "lr": 6.475015984813217e-07, "epoch": 0.6532872415338327, "percentage": 65.33, "elapsed_time": "0:39:17", "remaining_time": "0:20:51", "throughput": 13631.04, "total_tokens": 32131520} +{"current_steps": 10210, "total_steps": 15621, "loss": 0.3158, "lr": 6.464561716398564e-07, "epoch": 0.6536073234748095, "percentage": 65.36, "elapsed_time": "0:39:17", "remaining_time": "0:20:49", "throughput": 13634.14, "total_tokens": 32147008} +{"current_steps": 10215, "total_steps": 15621, "loss": 0.3734, "lr": 6.454111862020122e-07, "epoch": 0.6539274054157864, "percentage": 65.39, "elapsed_time": "0:39:18", "remaining_time": "0:20:48", "throughput": 13637.21, "total_tokens": 32162560} +{"current_steps": 10220, "total_steps": 15621, "loss": 0.3636, "lr": 6.443666434724649e-07, "epoch": 0.6542474873567633, "percentage": 65.42, "elapsed_time": "0:39:19", "remaining_time": "0:20:46", "throughput": 13640.0, "total_tokens": 32177024} +{"current_steps": 10225, "total_steps": 15621, "loss": 0.5155, "lr": 6.43322544755339e-07, "epoch": 0.6545675692977402, "percentage": 65.46, "elapsed_time": "0:39:19", "remaining_time": "0:20:45", "throughput": 13643.17, "total_tokens": 32193024} +{"current_steps": 10230, "total_steps": 15621, "loss": 0.3365, "lr": 6.422788913542038e-07, "epoch": 0.6548876512387171, "percentage": 65.49, "elapsed_time": "0:39:20", "remaining_time": "0:20:43", "throughput": 13646.31, "total_tokens": 32208896} +{"current_steps": 10235, "total_steps": 15621, "loss": 0.3296, "lr": 6.412356845720726e-07, "epoch": 0.655207733179694, "percentage": 65.52, "elapsed_time": "0:39:20", "remaining_time": "0:20:42", "throughput": 13649.59, "total_tokens": 32225280} +{"current_steps": 10240, "total_steps": 15621, "loss": 0.3605, "lr": 6.40192925711402e-07, "epoch": 0.6555278151206709, "percentage": 65.55, "elapsed_time": "0:39:21", "remaining_time": "0:20:40", "throughput": 13652.63, "total_tokens": 32240768} +{"current_steps": 10245, "total_steps": 15621, "loss": 0.3264, "lr": 6.39150616074088e-07, "epoch": 0.6558478970616478, "percentage": 65.58, "elapsed_time": "0:39:22", "remaining_time": "0:20:39", "throughput": 13655.61, "total_tokens": 32255872} +{"current_steps": 10250, "total_steps": 15621, "loss": 0.4193, "lr": 6.381087569614668e-07, "epoch": 0.6561679790026247, "percentage": 65.62, "elapsed_time": "0:39:22", "remaining_time": "0:20:38", "throughput": 13658.96, "total_tokens": 32272512} +{"current_steps": 10255, "total_steps": 15621, "loss": 0.3828, "lr": 6.370673496743116e-07, "epoch": 0.6564880609436016, "percentage": 65.65, "elapsed_time": "0:39:23", "remaining_time": "0:20:36", "throughput": 13661.58, "total_tokens": 32286272} +{"current_steps": 10260, "total_steps": 15621, "loss": 0.4331, "lr": 6.360263955128315e-07, "epoch": 0.6568081428845784, "percentage": 65.68, "elapsed_time": "0:39:23", "remaining_time": "0:20:35", "throughput": 13664.7, "total_tokens": 32301952} +{"current_steps": 10265, "total_steps": 15621, "loss": 0.3602, "lr": 6.349858957766701e-07, "epoch": 0.6571282248255553, "percentage": 65.71, "elapsed_time": "0:39:24", "remaining_time": "0:20:33", "throughput": 13667.91, "total_tokens": 32318208} +{"current_steps": 10270, "total_steps": 15621, "loss": 0.336, "lr": 6.339458517649036e-07, "epoch": 0.6574483067665322, "percentage": 65.74, "elapsed_time": "0:39:25", "remaining_time": "0:20:32", "throughput": 13670.91, "total_tokens": 32333504} +{"current_steps": 10275, "total_steps": 15621, "loss": 0.3626, "lr": 6.329062647760395e-07, "epoch": 0.6577683887075091, "percentage": 65.78, "elapsed_time": "0:39:25", "remaining_time": "0:20:30", "throughput": 13674.3, "total_tokens": 32350208} +{"current_steps": 10280, "total_steps": 15621, "loss": 0.3351, "lr": 6.318671361080137e-07, "epoch": 0.658088470648486, "percentage": 65.81, "elapsed_time": "0:39:26", "remaining_time": "0:20:29", "throughput": 13677.29, "total_tokens": 32365376} +{"current_steps": 10285, "total_steps": 15621, "loss": 0.3306, "lr": 6.308284670581906e-07, "epoch": 0.6584085525894628, "percentage": 65.84, "elapsed_time": "0:39:26", "remaining_time": "0:20:28", "throughput": 13680.44, "total_tokens": 32381248} +{"current_steps": 10290, "total_steps": 15621, "loss": 0.4558, "lr": 6.297902589233612e-07, "epoch": 0.6587286345304398, "percentage": 65.87, "elapsed_time": "0:39:27", "remaining_time": "0:20:26", "throughput": 13683.28, "total_tokens": 32395968} +{"current_steps": 10295, "total_steps": 15621, "loss": 0.3737, "lr": 6.287525129997404e-07, "epoch": 0.6590487164714167, "percentage": 65.9, "elapsed_time": "0:39:28", "remaining_time": "0:20:25", "throughput": 13686.35, "total_tokens": 32411456} +{"current_steps": 10300, "total_steps": 15621, "loss": 0.3865, "lr": 6.277152305829656e-07, "epoch": 0.6593687984123936, "percentage": 65.94, "elapsed_time": "0:39:28", "remaining_time": "0:20:23", "throughput": 13689.4, "total_tokens": 32426880} +{"current_steps": 10305, "total_steps": 15621, "loss": 0.3281, "lr": 6.266784129680968e-07, "epoch": 0.6596888803533705, "percentage": 65.97, "elapsed_time": "0:39:29", "remaining_time": "0:20:22", "throughput": 13692.47, "total_tokens": 32442368} +{"current_steps": 10310, "total_steps": 15621, "loss": 0.3781, "lr": 6.256420614496129e-07, "epoch": 0.6600089622943474, "percentage": 66.0, "elapsed_time": "0:39:29", "remaining_time": "0:20:20", "throughput": 13695.51, "total_tokens": 32457920} +{"current_steps": 10315, "total_steps": 15621, "loss": 0.4085, "lr": 6.246061773214102e-07, "epoch": 0.6603290442353242, "percentage": 66.03, "elapsed_time": "0:39:30", "remaining_time": "0:20:19", "throughput": 13698.61, "total_tokens": 32473536} +{"current_steps": 10320, "total_steps": 15621, "loss": 0.3956, "lr": 6.235707618768032e-07, "epoch": 0.6606491261763011, "percentage": 66.06, "elapsed_time": "0:39:31", "remaining_time": "0:20:18", "throughput": 13701.93, "total_tokens": 32490240} +{"current_steps": 10325, "total_steps": 15621, "loss": 0.3506, "lr": 6.225358164085196e-07, "epoch": 0.660969208117278, "percentage": 66.1, "elapsed_time": "0:39:31", "remaining_time": "0:20:16", "throughput": 13704.96, "total_tokens": 32505728} +{"current_steps": 10330, "total_steps": 15621, "loss": 0.3521, "lr": 6.21501342208701e-07, "epoch": 0.6612892900582549, "percentage": 66.13, "elapsed_time": "0:39:32", "remaining_time": "0:20:15", "throughput": 13707.88, "total_tokens": 32520960} +{"current_steps": 10335, "total_steps": 15621, "loss": 0.4036, "lr": 6.204673405689007e-07, "epoch": 0.6616093719992318, "percentage": 66.16, "elapsed_time": "0:39:33", "remaining_time": "0:20:13", "throughput": 13710.8, "total_tokens": 32535872} +{"current_steps": 10340, "total_steps": 15621, "loss": 0.3158, "lr": 6.194338127800823e-07, "epoch": 0.6619294539402087, "percentage": 66.19, "elapsed_time": "0:39:33", "remaining_time": "0:20:12", "throughput": 13714.1, "total_tokens": 32552448} +{"current_steps": 10345, "total_steps": 15621, "loss": 0.3866, "lr": 6.184007601326165e-07, "epoch": 0.6622495358811856, "percentage": 66.22, "elapsed_time": "0:39:34", "remaining_time": "0:20:10", "throughput": 13716.97, "total_tokens": 32567232} +{"current_steps": 10350, "total_steps": 15621, "loss": 0.3515, "lr": 6.173681839162824e-07, "epoch": 0.6625696178221625, "percentage": 66.26, "elapsed_time": "0:39:34", "remaining_time": "0:20:09", "throughput": 13720.2, "total_tokens": 32583360} +{"current_steps": 10355, "total_steps": 15621, "loss": 0.3336, "lr": 6.163360854202635e-07, "epoch": 0.6628896997631394, "percentage": 66.29, "elapsed_time": "0:39:35", "remaining_time": "0:20:08", "throughput": 13723.21, "total_tokens": 32598656} +{"current_steps": 10360, "total_steps": 15621, "loss": 0.306, "lr": 6.153044659331461e-07, "epoch": 0.6632097817041163, "percentage": 66.32, "elapsed_time": "0:39:36", "remaining_time": "0:20:06", "throughput": 13726.21, "total_tokens": 32614144} +{"current_steps": 10365, "total_steps": 15621, "loss": 0.3687, "lr": 6.142733267429203e-07, "epoch": 0.6635298636450931, "percentage": 66.35, "elapsed_time": "0:39:36", "remaining_time": "0:20:05", "throughput": 13729.1, "total_tokens": 32629120} +{"current_steps": 10370, "total_steps": 15621, "loss": 0.4287, "lr": 6.132426691369748e-07, "epoch": 0.66384994558607, "percentage": 66.38, "elapsed_time": "0:39:37", "remaining_time": "0:20:03", "throughput": 13732.45, "total_tokens": 32645952} +{"current_steps": 10375, "total_steps": 15621, "loss": 0.3988, "lr": 6.122124944020977e-07, "epoch": 0.6641700275270469, "percentage": 66.42, "elapsed_time": "0:39:37", "remaining_time": "0:20:02", "throughput": 13735.54, "total_tokens": 32661696} +{"current_steps": 10380, "total_steps": 15621, "loss": 0.3753, "lr": 6.111828038244749e-07, "epoch": 0.6644901094680238, "percentage": 66.45, "elapsed_time": "0:39:38", "remaining_time": "0:20:00", "throughput": 13738.72, "total_tokens": 32677760} +{"current_steps": 10385, "total_steps": 15621, "loss": 0.2948, "lr": 6.101535986896866e-07, "epoch": 0.6648101914090007, "percentage": 66.48, "elapsed_time": "0:39:39", "remaining_time": "0:19:59", "throughput": 13741.81, "total_tokens": 32693568} +{"current_steps": 10390, "total_steps": 15621, "loss": 0.2899, "lr": 6.091248802827076e-07, "epoch": 0.6651302733499775, "percentage": 66.51, "elapsed_time": "0:39:39", "remaining_time": "0:19:58", "throughput": 13744.77, "total_tokens": 32708736} +{"current_steps": 10395, "total_steps": 15621, "loss": 0.3218, "lr": 6.080966498879048e-07, "epoch": 0.6654503552909545, "percentage": 66.55, "elapsed_time": "0:39:40", "remaining_time": "0:19:56", "throughput": 13748.09, "total_tokens": 32725440} +{"current_steps": 10400, "total_steps": 15621, "loss": 0.2962, "lr": 6.070689087890363e-07, "epoch": 0.6657704372319314, "percentage": 66.58, "elapsed_time": "0:39:40", "remaining_time": "0:19:55", "throughput": 13751.05, "total_tokens": 32740608} +{"current_steps": 10405, "total_steps": 15621, "loss": 0.3974, "lr": 6.060416582692487e-07, "epoch": 0.6660905191729083, "percentage": 66.61, "elapsed_time": "0:39:41", "remaining_time": "0:19:53", "throughput": 13754.11, "total_tokens": 32756416} +{"current_steps": 10410, "total_steps": 15621, "loss": 0.3358, "lr": 6.05014899611076e-07, "epoch": 0.6664106011138852, "percentage": 66.64, "elapsed_time": "0:39:42", "remaining_time": "0:19:52", "throughput": 13757.16, "total_tokens": 32771904} +{"current_steps": 10415, "total_steps": 15621, "loss": 0.3724, "lr": 6.039886340964391e-07, "epoch": 0.666730683054862, "percentage": 66.67, "elapsed_time": "0:39:42", "remaining_time": "0:19:51", "throughput": 13760.18, "total_tokens": 32787392} +{"current_steps": 10420, "total_steps": 15621, "loss": 0.334, "lr": 6.029628630066423e-07, "epoch": 0.6670507649958389, "percentage": 66.71, "elapsed_time": "0:39:43", "remaining_time": "0:19:49", "throughput": 13763.28, "total_tokens": 32803136} +{"current_steps": 10425, "total_steps": 15621, "loss": 0.4173, "lr": 6.019375876223724e-07, "epoch": 0.6673708469368158, "percentage": 66.74, "elapsed_time": "0:39:43", "remaining_time": "0:19:48", "throughput": 13766.25, "total_tokens": 32818624} +{"current_steps": 10430, "total_steps": 15621, "loss": 0.4672, "lr": 6.009128092236982e-07, "epoch": 0.6676909288777927, "percentage": 66.77, "elapsed_time": "0:39:44", "remaining_time": "0:19:46", "throughput": 13769.24, "total_tokens": 32833920} +{"current_steps": 10435, "total_steps": 15621, "loss": 0.3859, "lr": 5.998885290900679e-07, "epoch": 0.6680110108187696, "percentage": 66.8, "elapsed_time": "0:39:45", "remaining_time": "0:19:45", "throughput": 13772.03, "total_tokens": 32848512} +{"current_steps": 10440, "total_steps": 15621, "loss": 0.3391, "lr": 5.988647485003061e-07, "epoch": 0.6683310927597464, "percentage": 66.83, "elapsed_time": "0:39:45", "remaining_time": "0:19:43", "throughput": 13775.3, "total_tokens": 32865088} +{"current_steps": 10445, "total_steps": 15621, "loss": 0.4559, "lr": 5.978414687326164e-07, "epoch": 0.6686511747007234, "percentage": 66.87, "elapsed_time": "0:39:46", "remaining_time": "0:19:42", "throughput": 13778.67, "total_tokens": 32882048} +{"current_steps": 10450, "total_steps": 15621, "loss": 0.365, "lr": 5.968186910645745e-07, "epoch": 0.6689712566417003, "percentage": 66.9, "elapsed_time": "0:39:47", "remaining_time": "0:19:41", "throughput": 13781.88, "total_tokens": 32898624} +{"current_steps": 10455, "total_steps": 15621, "loss": 0.505, "lr": 5.957964167731305e-07, "epoch": 0.6692913385826772, "percentage": 66.93, "elapsed_time": "0:39:47", "remaining_time": "0:19:39", "throughput": 13784.84, "total_tokens": 32914176} +{"current_steps": 10460, "total_steps": 15621, "loss": 0.4068, "lr": 5.947746471346065e-07, "epoch": 0.6696114205236541, "percentage": 66.96, "elapsed_time": "0:39:48", "remaining_time": "0:19:38", "throughput": 13788.18, "total_tokens": 32931136} +{"current_steps": 10465, "total_steps": 15621, "loss": 0.3349, "lr": 5.937533834246932e-07, "epoch": 0.669931502464631, "percentage": 66.99, "elapsed_time": "0:39:49", "remaining_time": "0:19:37", "throughput": 13791.39, "total_tokens": 32947648} +{"current_steps": 10470, "total_steps": 15621, "loss": 0.3745, "lr": 5.927326269184504e-07, "epoch": 0.6702515844056078, "percentage": 67.03, "elapsed_time": "0:39:49", "remaining_time": "0:19:35", "throughput": 13794.64, "total_tokens": 32964224} +{"current_steps": 10475, "total_steps": 15621, "loss": 0.4498, "lr": 5.917123788903049e-07, "epoch": 0.6705716663465847, "percentage": 67.06, "elapsed_time": "0:39:50", "remaining_time": "0:19:34", "throughput": 13798.23, "total_tokens": 32982080} +{"current_steps": 10480, "total_steps": 15621, "loss": 0.4642, "lr": 5.906926406140484e-07, "epoch": 0.6708917482875616, "percentage": 67.09, "elapsed_time": "0:39:50", "remaining_time": "0:19:32", "throughput": 13801.16, "total_tokens": 32997440} +{"current_steps": 10485, "total_steps": 15621, "loss": 0.4298, "lr": 5.896734133628354e-07, "epoch": 0.6712118302285385, "percentage": 67.12, "elapsed_time": "0:39:51", "remaining_time": "0:19:31", "throughput": 13804.18, "total_tokens": 33013056} +{"current_steps": 10490, "total_steps": 15621, "loss": 0.3736, "lr": 5.886546984091838e-07, "epoch": 0.6715319121695154, "percentage": 67.15, "elapsed_time": "0:39:52", "remaining_time": "0:19:30", "throughput": 13807.1, "total_tokens": 33028416} +{"current_steps": 10495, "total_steps": 15621, "loss": 0.3415, "lr": 5.876364970249711e-07, "epoch": 0.6718519941104922, "percentage": 67.19, "elapsed_time": "0:39:52", "remaining_time": "0:19:28", "throughput": 13809.83, "total_tokens": 33042880} +{"current_steps": 10500, "total_steps": 15621, "loss": 0.2735, "lr": 5.866188104814336e-07, "epoch": 0.6721720760514692, "percentage": 67.22, "elapsed_time": "0:39:53", "remaining_time": "0:19:27", "throughput": 13812.77, "total_tokens": 33058240} +{"current_steps": 10505, "total_steps": 15621, "loss": 0.3792, "lr": 5.856016400491646e-07, "epoch": 0.6724921579924461, "percentage": 67.25, "elapsed_time": "0:39:53", "remaining_time": "0:19:25", "throughput": 13815.79, "total_tokens": 33073920} +{"current_steps": 10510, "total_steps": 15621, "loss": 0.3192, "lr": 5.845849869981136e-07, "epoch": 0.672812239933423, "percentage": 67.28, "elapsed_time": "0:39:54", "remaining_time": "0:19:24", "throughput": 13818.74, "total_tokens": 33089344} +{"current_steps": 10515, "total_steps": 15621, "loss": 0.3458, "lr": 5.835688525975842e-07, "epoch": 0.6731323218743999, "percentage": 67.31, "elapsed_time": "0:39:55", "remaining_time": "0:19:23", "throughput": 13821.58, "total_tokens": 33104384} +{"current_steps": 10520, "total_steps": 15621, "loss": 0.3931, "lr": 5.825532381162311e-07, "epoch": 0.6734524038153767, "percentage": 67.35, "elapsed_time": "0:39:55", "remaining_time": "0:19:21", "throughput": 13824.58, "total_tokens": 33120064} +{"current_steps": 10525, "total_steps": 15621, "loss": 0.3866, "lr": 5.815381448220619e-07, "epoch": 0.6737724857563536, "percentage": 67.38, "elapsed_time": "0:39:56", "remaining_time": "0:19:20", "throughput": 13827.65, "total_tokens": 33136128} +{"current_steps": 10530, "total_steps": 15621, "loss": 0.3452, "lr": 5.805235739824327e-07, "epoch": 0.6740925676973305, "percentage": 67.41, "elapsed_time": "0:39:57", "remaining_time": "0:19:18", "throughput": 13831.39, "total_tokens": 33154816} +{"current_steps": 10535, "total_steps": 15621, "loss": 0.5023, "lr": 5.795095268640458e-07, "epoch": 0.6744126496383074, "percentage": 67.44, "elapsed_time": "0:39:57", "remaining_time": "0:19:17", "throughput": 13834.27, "total_tokens": 33169920} +{"current_steps": 10540, "total_steps": 15621, "loss": 0.541, "lr": 5.784960047329519e-07, "epoch": 0.6747327315792843, "percentage": 67.47, "elapsed_time": "0:39:58", "remaining_time": "0:19:16", "throughput": 13837.82, "total_tokens": 33187712} +{"current_steps": 10545, "total_steps": 15621, "loss": 0.3866, "lr": 5.774830088545452e-07, "epoch": 0.6750528135202611, "percentage": 67.51, "elapsed_time": "0:39:58", "remaining_time": "0:19:14", "throughput": 13840.68, "total_tokens": 33202880} +{"current_steps": 10550, "total_steps": 15621, "loss": 0.2997, "lr": 5.76470540493563e-07, "epoch": 0.6753728954612381, "percentage": 67.54, "elapsed_time": "0:39:59", "remaining_time": "0:19:13", "throughput": 13843.79, "total_tokens": 33218944} +{"current_steps": 10555, "total_steps": 15621, "loss": 0.4652, "lr": 5.754586009140836e-07, "epoch": 0.675692977402215, "percentage": 67.57, "elapsed_time": "0:40:00", "remaining_time": "0:19:11", "throughput": 13846.77, "total_tokens": 33234688} +{"current_steps": 10560, "total_steps": 15621, "loss": 0.3679, "lr": 5.744471913795256e-07, "epoch": 0.6760130593431919, "percentage": 67.6, "elapsed_time": "0:40:00", "remaining_time": "0:19:10", "throughput": 13849.67, "total_tokens": 33249920} +{"current_steps": 10565, "total_steps": 15621, "loss": 0.3365, "lr": 5.734363131526459e-07, "epoch": 0.6763331412841688, "percentage": 67.63, "elapsed_time": "0:40:01", "remaining_time": "0:19:09", "throughput": 13852.72, "total_tokens": 33265792} +{"current_steps": 10570, "total_steps": 15621, "loss": 0.3742, "lr": 5.724259674955377e-07, "epoch": 0.6766532232251457, "percentage": 67.67, "elapsed_time": "0:40:01", "remaining_time": "0:19:07", "throughput": 13855.6, "total_tokens": 33280832} +{"current_steps": 10575, "total_steps": 15621, "loss": 0.3888, "lr": 5.714161556696291e-07, "epoch": 0.6769733051661225, "percentage": 67.7, "elapsed_time": "0:40:02", "remaining_time": "0:19:06", "throughput": 13858.64, "total_tokens": 33296576} +{"current_steps": 10580, "total_steps": 15621, "loss": 0.3388, "lr": 5.704068789356824e-07, "epoch": 0.6772933871070994, "percentage": 67.73, "elapsed_time": "0:40:03", "remaining_time": "0:19:05", "throughput": 13862.68, "total_tokens": 33316672} +{"current_steps": 10585, "total_steps": 15621, "loss": 0.3496, "lr": 5.693981385537912e-07, "epoch": 0.6776134690480763, "percentage": 67.76, "elapsed_time": "0:40:03", "remaining_time": "0:19:03", "throughput": 13865.44, "total_tokens": 33331456} +{"current_steps": 10590, "total_steps": 15621, "loss": 0.3447, "lr": 5.683899357833801e-07, "epoch": 0.6779335509890532, "percentage": 67.79, "elapsed_time": "0:40:04", "remaining_time": "0:19:02", "throughput": 13868.32, "total_tokens": 33346752} +{"current_steps": 10595, "total_steps": 15621, "loss": 0.455, "lr": 5.673822718832015e-07, "epoch": 0.67825363293003, "percentage": 67.83, "elapsed_time": "0:40:05", "remaining_time": "0:19:00", "throughput": 13871.34, "total_tokens": 33362688} +{"current_steps": 10600, "total_steps": 15621, "loss": 0.3697, "lr": 5.663751481113362e-07, "epoch": 0.6785737148710069, "percentage": 67.86, "elapsed_time": "0:40:05", "remaining_time": "0:18:59", "throughput": 13874.12, "total_tokens": 33377600} +{"current_steps": 10605, "total_steps": 15621, "loss": 0.4282, "lr": 5.653685657251896e-07, "epoch": 0.6788937968119839, "percentage": 67.89, "elapsed_time": "0:40:06", "remaining_time": "0:18:58", "throughput": 13877.07, "total_tokens": 33393280} +{"current_steps": 10610, "total_steps": 15621, "loss": 0.3746, "lr": 5.643625259814922e-07, "epoch": 0.6792138787529608, "percentage": 67.92, "elapsed_time": "0:40:07", "remaining_time": "0:18:56", "throughput": 13880.37, "total_tokens": 33410112} +{"current_steps": 10615, "total_steps": 15621, "loss": 0.3664, "lr": 5.633570301362953e-07, "epoch": 0.6795339606939377, "percentage": 67.95, "elapsed_time": "0:40:07", "remaining_time": "0:18:55", "throughput": 13883.58, "total_tokens": 33426624} +{"current_steps": 10620, "total_steps": 15621, "loss": 0.36, "lr": 5.623520794449739e-07, "epoch": 0.6798540426349146, "percentage": 67.99, "elapsed_time": "0:40:08", "remaining_time": "0:18:54", "throughput": 13886.57, "total_tokens": 33442240} +{"current_steps": 10625, "total_steps": 15621, "loss": 0.4713, "lr": 5.613476751622195e-07, "epoch": 0.6801741245758914, "percentage": 68.02, "elapsed_time": "0:40:08", "remaining_time": "0:18:52", "throughput": 13889.67, "total_tokens": 33458432} +{"current_steps": 10630, "total_steps": 15621, "loss": 0.4368, "lr": 5.603438185420426e-07, "epoch": 0.6804942065168683, "percentage": 68.05, "elapsed_time": "0:40:09", "remaining_time": "0:18:51", "throughput": 13892.61, "total_tokens": 33473856} +{"current_steps": 10635, "total_steps": 15621, "loss": 0.4714, "lr": 5.593405108377714e-07, "epoch": 0.6808142884578452, "percentage": 68.08, "elapsed_time": "0:40:10", "remaining_time": "0:18:49", "throughput": 13895.52, "total_tokens": 33489216} +{"current_steps": 10640, "total_steps": 15621, "loss": 0.4586, "lr": 5.583377533020457e-07, "epoch": 0.6811343703988221, "percentage": 68.11, "elapsed_time": "0:40:10", "remaining_time": "0:18:48", "throughput": 13898.6, "total_tokens": 33505280} +{"current_steps": 10645, "total_steps": 15621, "loss": 0.2834, "lr": 5.573355471868201e-07, "epoch": 0.681454452339799, "percentage": 68.15, "elapsed_time": "0:40:11", "remaining_time": "0:18:47", "throughput": 13901.47, "total_tokens": 33520512} +{"current_steps": 10650, "total_steps": 15621, "loss": 0.3532, "lr": 5.563338937433621e-07, "epoch": 0.6817745342807758, "percentage": 68.18, "elapsed_time": "0:40:11", "remaining_time": "0:18:45", "throughput": 13904.72, "total_tokens": 33537344} +{"current_steps": 10655, "total_steps": 15621, "loss": 0.2438, "lr": 5.553327942222472e-07, "epoch": 0.6820946162217527, "percentage": 68.21, "elapsed_time": "0:40:12", "remaining_time": "0:18:44", "throughput": 13907.48, "total_tokens": 33552128} +{"current_steps": 10660, "total_steps": 15621, "loss": 0.3547, "lr": 5.54332249873359e-07, "epoch": 0.6824146981627297, "percentage": 68.24, "elapsed_time": "0:40:13", "remaining_time": "0:18:43", "throughput": 13910.24, "total_tokens": 33566784} +{"current_steps": 10665, "total_steps": 15621, "loss": 0.3052, "lr": 5.533322619458896e-07, "epoch": 0.6827347801037066, "percentage": 68.27, "elapsed_time": "0:40:13", "remaining_time": "0:18:41", "throughput": 13913.14, "total_tokens": 33582080} +{"current_steps": 10670, "total_steps": 15621, "loss": 0.4079, "lr": 5.52332831688336e-07, "epoch": 0.6830548620446835, "percentage": 68.31, "elapsed_time": "0:40:14", "remaining_time": "0:18:40", "throughput": 13915.9, "total_tokens": 33596864} +{"current_steps": 10675, "total_steps": 15621, "loss": 0.3454, "lr": 5.513339603484981e-07, "epoch": 0.6833749439856603, "percentage": 68.34, "elapsed_time": "0:40:14", "remaining_time": "0:18:38", "throughput": 13918.97, "total_tokens": 33613056} +{"current_steps": 10680, "total_steps": 15621, "loss": 0.5049, "lr": 5.503356491734785e-07, "epoch": 0.6836950259266372, "percentage": 68.37, "elapsed_time": "0:40:15", "remaining_time": "0:18:37", "throughput": 13921.8, "total_tokens": 33628160} +{"current_steps": 10685, "total_steps": 15621, "loss": 0.4346, "lr": 5.493378994096806e-07, "epoch": 0.6840151078676141, "percentage": 68.4, "elapsed_time": "0:40:16", "remaining_time": "0:18:36", "throughput": 13925.12, "total_tokens": 33645184} +{"current_steps": 10690, "total_steps": 15621, "loss": 0.3909, "lr": 5.483407123028067e-07, "epoch": 0.684335189808591, "percentage": 68.43, "elapsed_time": "0:40:16", "remaining_time": "0:18:34", "throughput": 13928.05, "total_tokens": 33660800} +{"current_steps": 10695, "total_steps": 15621, "loss": 0.4766, "lr": 5.473440890978566e-07, "epoch": 0.6846552717495679, "percentage": 68.47, "elapsed_time": "0:40:17", "remaining_time": "0:18:33", "throughput": 13931.1, "total_tokens": 33676736} +{"current_steps": 10700, "total_steps": 15621, "loss": 0.4079, "lr": 5.463480310391261e-07, "epoch": 0.6849753536905447, "percentage": 68.5, "elapsed_time": "0:40:18", "remaining_time": "0:18:32", "throughput": 13934.16, "total_tokens": 33692928} +{"current_steps": 10705, "total_steps": 15621, "loss": 0.3839, "lr": 5.453525393702052e-07, "epoch": 0.6852954356315216, "percentage": 68.53, "elapsed_time": "0:40:18", "remaining_time": "0:18:30", "throughput": 13937.07, "total_tokens": 33708352} +{"current_steps": 10710, "total_steps": 15621, "loss": 0.3644, "lr": 5.443576153339771e-07, "epoch": 0.6856155175724986, "percentage": 68.56, "elapsed_time": "0:40:19", "remaining_time": "0:18:29", "throughput": 13940.03, "total_tokens": 33723968} +{"current_steps": 10715, "total_steps": 15621, "loss": 0.3272, "lr": 5.433632601726159e-07, "epoch": 0.6859355995134755, "percentage": 68.59, "elapsed_time": "0:40:19", "remaining_time": "0:18:27", "throughput": 13942.88, "total_tokens": 33739200} +{"current_steps": 10720, "total_steps": 15621, "loss": 0.3404, "lr": 5.42369475127586e-07, "epoch": 0.6862556814544524, "percentage": 68.63, "elapsed_time": "0:40:20", "remaining_time": "0:18:26", "throughput": 13945.85, "total_tokens": 33754944} +{"current_steps": 10725, "total_steps": 15621, "loss": 0.4709, "lr": 5.413762614396396e-07, "epoch": 0.6865757633954293, "percentage": 68.66, "elapsed_time": "0:40:21", "remaining_time": "0:18:25", "throughput": 13948.53, "total_tokens": 33769472} +{"current_steps": 10730, "total_steps": 15621, "loss": 0.4262, "lr": 5.403836203488157e-07, "epoch": 0.6868958453364061, "percentage": 68.69, "elapsed_time": "0:40:21", "remaining_time": "0:18:23", "throughput": 13951.41, "total_tokens": 33784896} +{"current_steps": 10735, "total_steps": 15621, "loss": 0.3638, "lr": 5.393915530944382e-07, "epoch": 0.687215927277383, "percentage": 68.72, "elapsed_time": "0:40:22", "remaining_time": "0:18:22", "throughput": 13954.27, "total_tokens": 33800320} +{"current_steps": 10740, "total_steps": 15621, "loss": 0.3765, "lr": 5.384000609151145e-07, "epoch": 0.6875360092183599, "percentage": 68.75, "elapsed_time": "0:40:22", "remaining_time": "0:18:21", "throughput": 13957.46, "total_tokens": 33816896} +{"current_steps": 10745, "total_steps": 15621, "loss": 0.3763, "lr": 5.374091450487353e-07, "epoch": 0.6878560911593368, "percentage": 68.79, "elapsed_time": "0:40:23", "remaining_time": "0:18:19", "throughput": 13960.59, "total_tokens": 33833344} +{"current_steps": 10750, "total_steps": 15621, "loss": 0.3352, "lr": 5.364188067324693e-07, "epoch": 0.6881761731003136, "percentage": 68.82, "elapsed_time": "0:40:24", "remaining_time": "0:18:18", "throughput": 13963.75, "total_tokens": 33849856} +{"current_steps": 10755, "total_steps": 15621, "loss": 0.3441, "lr": 5.354290472027659e-07, "epoch": 0.6884962550412905, "percentage": 68.85, "elapsed_time": "0:40:24", "remaining_time": "0:18:17", "throughput": 13966.66, "total_tokens": 33865344} +{"current_steps": 10760, "total_steps": 15621, "loss": 0.4955, "lr": 5.344398676953525e-07, "epoch": 0.6888163369822674, "percentage": 68.88, "elapsed_time": "0:40:25", "remaining_time": "0:18:15", "throughput": 13969.85, "total_tokens": 33881792} +{"current_steps": 10765, "total_steps": 15621, "loss": 0.4902, "lr": 5.334512694452303e-07, "epoch": 0.6891364189232444, "percentage": 68.91, "elapsed_time": "0:40:25", "remaining_time": "0:18:14", "throughput": 13973.03, "total_tokens": 33898368} +{"current_steps": 10770, "total_steps": 15621, "loss": 0.3489, "lr": 5.324632536866755e-07, "epoch": 0.6894565008642213, "percentage": 68.95, "elapsed_time": "0:40:26", "remaining_time": "0:18:12", "throughput": 13976.05, "total_tokens": 33914368} +{"current_steps": 10775, "total_steps": 15621, "loss": 0.3526, "lr": 5.314758216532386e-07, "epoch": 0.6897765828051982, "percentage": 68.98, "elapsed_time": "0:40:27", "remaining_time": "0:18:11", "throughput": 13978.92, "total_tokens": 33929728} +{"current_steps": 10780, "total_steps": 15621, "loss": 0.3743, "lr": 5.304889745777396e-07, "epoch": 0.690096664746175, "percentage": 69.01, "elapsed_time": "0:40:27", "remaining_time": "0:18:10", "throughput": 13981.67, "total_tokens": 33944704} +{"current_steps": 10785, "total_steps": 15621, "loss": 0.6418, "lr": 5.295027136922678e-07, "epoch": 0.6904167466871519, "percentage": 69.04, "elapsed_time": "0:40:28", "remaining_time": "0:18:08", "throughput": 13984.57, "total_tokens": 33960128} +{"current_steps": 10790, "total_steps": 15621, "loss": 0.4207, "lr": 5.285170402281827e-07, "epoch": 0.6907368286281288, "percentage": 69.07, "elapsed_time": "0:40:28", "remaining_time": "0:18:07", "throughput": 13987.3, "total_tokens": 33975104} +{"current_steps": 10795, "total_steps": 15621, "loss": 0.4588, "lr": 5.275319554161087e-07, "epoch": 0.6910569105691057, "percentage": 69.11, "elapsed_time": "0:40:29", "remaining_time": "0:18:06", "throughput": 13990.23, "total_tokens": 33990720} +{"current_steps": 10800, "total_steps": 15621, "loss": 0.4123, "lr": 5.265474604859356e-07, "epoch": 0.6913769925100826, "percentage": 69.14, "elapsed_time": "0:40:30", "remaining_time": "0:18:04", "throughput": 13993.12, "total_tokens": 34006272} +{"current_steps": 10805, "total_steps": 15621, "loss": 0.3902, "lr": 5.255635566668171e-07, "epoch": 0.6916970744510594, "percentage": 69.17, "elapsed_time": "0:40:30", "remaining_time": "0:18:03", "throughput": 13996.17, "total_tokens": 34022400} +{"current_steps": 10810, "total_steps": 15621, "loss": 0.3704, "lr": 5.245802451871686e-07, "epoch": 0.6920171563920363, "percentage": 69.2, "elapsed_time": "0:40:31", "remaining_time": "0:18:02", "throughput": 13999.25, "total_tokens": 34038720} +{"current_steps": 10815, "total_steps": 15621, "loss": 0.4316, "lr": 5.235975272746663e-07, "epoch": 0.6923372383330133, "percentage": 69.23, "elapsed_time": "0:40:32", "remaining_time": "0:18:00", "throughput": 14002.05, "total_tokens": 34053760} +{"current_steps": 10820, "total_steps": 15621, "loss": 0.3024, "lr": 5.226154041562442e-07, "epoch": 0.6926573202739902, "percentage": 69.27, "elapsed_time": "0:40:32", "remaining_time": "0:17:59", "throughput": 14005.02, "total_tokens": 34069568} +{"current_steps": 10825, "total_steps": 15621, "loss": 0.406, "lr": 5.216338770580953e-07, "epoch": 0.6929774022149671, "percentage": 69.3, "elapsed_time": "0:40:33", "remaining_time": "0:17:58", "throughput": 14008.35, "total_tokens": 34086912} +{"current_steps": 10830, "total_steps": 15621, "loss": 0.3649, "lr": 5.206529472056678e-07, "epoch": 0.6932974841559439, "percentage": 69.33, "elapsed_time": "0:40:33", "remaining_time": "0:17:56", "throughput": 14011.06, "total_tokens": 34101696} +{"current_steps": 10835, "total_steps": 15621, "loss": 0.3168, "lr": 5.196726158236637e-07, "epoch": 0.6936175660969208, "percentage": 69.36, "elapsed_time": "0:40:34", "remaining_time": "0:17:55", "throughput": 14013.63, "total_tokens": 34115904} +{"current_steps": 10840, "total_steps": 15621, "loss": 0.3372, "lr": 5.186928841360384e-07, "epoch": 0.6939376480378977, "percentage": 69.39, "elapsed_time": "0:40:35", "remaining_time": "0:17:53", "throughput": 14016.52, "total_tokens": 34131328} +{"current_steps": 10845, "total_steps": 15621, "loss": 0.4395, "lr": 5.177137533659985e-07, "epoch": 0.6942577299788746, "percentage": 69.43, "elapsed_time": "0:40:35", "remaining_time": "0:17:52", "throughput": 14019.8, "total_tokens": 34148544} +{"current_steps": 10850, "total_steps": 15621, "loss": 0.4564, "lr": 5.167352247360002e-07, "epoch": 0.6945778119198515, "percentage": 69.46, "elapsed_time": "0:40:36", "remaining_time": "0:17:51", "throughput": 14022.56, "total_tokens": 34163520} +{"current_steps": 10855, "total_steps": 15621, "loss": 0.3993, "lr": 5.157572994677479e-07, "epoch": 0.6948978938608283, "percentage": 69.49, "elapsed_time": "0:40:36", "remaining_time": "0:17:49", "throughput": 14025.31, "total_tokens": 34178368} +{"current_steps": 10860, "total_steps": 15621, "loss": 0.4055, "lr": 5.147799787821929e-07, "epoch": 0.6952179758018052, "percentage": 69.52, "elapsed_time": "0:40:37", "remaining_time": "0:17:48", "throughput": 14028.2, "total_tokens": 34193920} +{"current_steps": 10865, "total_steps": 15621, "loss": 0.485, "lr": 5.138032638995315e-07, "epoch": 0.6955380577427821, "percentage": 69.55, "elapsed_time": "0:40:38", "remaining_time": "0:17:47", "throughput": 14031.27, "total_tokens": 34210176} +{"current_steps": 10870, "total_steps": 15621, "loss": 0.3575, "lr": 5.128271560392037e-07, "epoch": 0.6958581396837591, "percentage": 69.59, "elapsed_time": "0:40:38", "remaining_time": "0:17:45", "throughput": 14034.55, "total_tokens": 34227328} +{"current_steps": 10875, "total_steps": 15621, "loss": 0.3901, "lr": 5.118516564198916e-07, "epoch": 0.696178221624736, "percentage": 69.62, "elapsed_time": "0:40:39", "remaining_time": "0:17:44", "throughput": 14037.21, "total_tokens": 34241984} +{"current_steps": 10880, "total_steps": 15621, "loss": 0.3371, "lr": 5.108767662595175e-07, "epoch": 0.6964983035657129, "percentage": 69.65, "elapsed_time": "0:40:39", "remaining_time": "0:17:43", "throughput": 14039.92, "total_tokens": 34256896} +{"current_steps": 10885, "total_steps": 15621, "loss": 0.3824, "lr": 5.099024867752446e-07, "epoch": 0.6968183855066897, "percentage": 69.68, "elapsed_time": "0:40:40", "remaining_time": "0:17:41", "throughput": 14043.07, "total_tokens": 34273792} +{"current_steps": 10890, "total_steps": 15621, "loss": 0.3219, "lr": 5.089288191834709e-07, "epoch": 0.6971384674476666, "percentage": 69.71, "elapsed_time": "0:40:41", "remaining_time": "0:17:40", "throughput": 14046.25, "total_tokens": 34290752} +{"current_steps": 10895, "total_steps": 15621, "loss": 0.3367, "lr": 5.079557646998318e-07, "epoch": 0.6974585493886435, "percentage": 69.75, "elapsed_time": "0:40:41", "remaining_time": "0:17:39", "throughput": 14049.63, "total_tokens": 34308416} +{"current_steps": 10900, "total_steps": 15621, "loss": 0.403, "lr": 5.069833245391981e-07, "epoch": 0.6977786313296204, "percentage": 69.78, "elapsed_time": "0:40:42", "remaining_time": "0:17:37", "throughput": 14052.44, "total_tokens": 34323776} +{"current_steps": 10905, "total_steps": 15621, "loss": 0.322, "lr": 5.060114999156728e-07, "epoch": 0.6980987132705972, "percentage": 69.81, "elapsed_time": "0:40:43", "remaining_time": "0:17:36", "throughput": 14055.21, "total_tokens": 34338944} +{"current_steps": 10910, "total_steps": 15621, "loss": 0.3462, "lr": 5.050402920425895e-07, "epoch": 0.6984187952115741, "percentage": 69.84, "elapsed_time": "0:40:43", "remaining_time": "0:17:35", "throughput": 14058.07, "total_tokens": 34354432} +{"current_steps": 10915, "total_steps": 15621, "loss": 0.2526, "lr": 5.040697021325128e-07, "epoch": 0.698738877152551, "percentage": 69.87, "elapsed_time": "0:40:44", "remaining_time": "0:17:33", "throughput": 14061.0, "total_tokens": 34370432} +{"current_steps": 10920, "total_steps": 15621, "loss": 0.437, "lr": 5.030997313972361e-07, "epoch": 0.699058959093528, "percentage": 69.91, "elapsed_time": "0:40:45", "remaining_time": "0:17:32", "throughput": 14063.95, "total_tokens": 34386496} +{"current_steps": 10925, "total_steps": 15621, "loss": 0.368, "lr": 5.021303810477795e-07, "epoch": 0.6993790410345049, "percentage": 69.94, "elapsed_time": "0:40:45", "remaining_time": "0:17:31", "throughput": 14066.97, "total_tokens": 34402560} +{"current_steps": 10930, "total_steps": 15621, "loss": 0.2859, "lr": 5.011616522943869e-07, "epoch": 0.6996991229754818, "percentage": 69.97, "elapsed_time": "0:40:46", "remaining_time": "0:17:29", "throughput": 14069.91, "total_tokens": 34418496} +{"current_steps": 10935, "total_steps": 15621, "loss": 0.2731, "lr": 5.001935463465289e-07, "epoch": 0.7000192049164586, "percentage": 70.0, "elapsed_time": "0:40:46", "remaining_time": "0:17:28", "throughput": 14072.9, "total_tokens": 34434752} +{"current_steps": 10940, "total_steps": 15621, "loss": 0.3965, "lr": 4.99226064412897e-07, "epoch": 0.7003392868574355, "percentage": 70.03, "elapsed_time": "0:40:47", "remaining_time": "0:17:27", "throughput": 14075.73, "total_tokens": 34450176} +{"current_steps": 10945, "total_steps": 15621, "loss": 0.4233, "lr": 4.982592077014026e-07, "epoch": 0.7006593687984124, "percentage": 70.07, "elapsed_time": "0:40:48", "remaining_time": "0:17:25", "throughput": 14078.55, "total_tokens": 34465600} +{"current_steps": 10948, "total_steps": 15621, "eval_loss": 0.37222641706466675, "epoch": 0.7008514179629985, "percentage": 70.09, "elapsed_time": "0:41:37", "remaining_time": "0:17:46", "throughput": 13803.0, "total_tokens": 34475136} +{"current_steps": 10950, "total_steps": 15621, "loss": 0.3026, "lr": 4.97292977419179e-07, "epoch": 0.7009794507393893, "percentage": 70.1, "elapsed_time": "0:42:00", "remaining_time": "0:17:55", "throughput": 13678.89, "total_tokens": 34481600} +{"current_steps": 10955, "total_steps": 15621, "loss": 0.2954, "lr": 4.963273747725755e-07, "epoch": 0.7012995326803662, "percentage": 70.13, "elapsed_time": "0:42:01", "remaining_time": "0:17:53", "throughput": 13682.17, "total_tokens": 34498752} +{"current_steps": 10960, "total_steps": 15621, "loss": 0.4061, "lr": 4.953624009671582e-07, "epoch": 0.701619614621343, "percentage": 70.16, "elapsed_time": "0:42:02", "remaining_time": "0:17:52", "throughput": 13685.05, "total_tokens": 34514240} +{"current_steps": 10965, "total_steps": 15621, "loss": 0.4161, "lr": 4.943980572077086e-07, "epoch": 0.7019396965623199, "percentage": 70.19, "elapsed_time": "0:42:02", "remaining_time": "0:17:51", "throughput": 13687.67, "total_tokens": 34528704} +{"current_steps": 10970, "total_steps": 15621, "loss": 0.3243, "lr": 4.934343446982209e-07, "epoch": 0.7022597785032968, "percentage": 70.23, "elapsed_time": "0:42:03", "remaining_time": "0:17:49", "throughput": 13690.67, "total_tokens": 34544704} +{"current_steps": 10975, "total_steps": 15621, "loss": 0.3698, "lr": 4.924712646419016e-07, "epoch": 0.7025798604442738, "percentage": 70.26, "elapsed_time": "0:42:03", "remaining_time": "0:17:48", "throughput": 13693.45, "total_tokens": 34560000} +{"current_steps": 10980, "total_steps": 15621, "loss": 0.3211, "lr": 4.915088182411674e-07, "epoch": 0.7028999423852507, "percentage": 70.29, "elapsed_time": "0:42:04", "remaining_time": "0:17:47", "throughput": 13696.28, "total_tokens": 34575296} +{"current_steps": 10985, "total_steps": 15621, "loss": 0.3715, "lr": 4.905470066976439e-07, "epoch": 0.7032200243262275, "percentage": 70.32, "elapsed_time": "0:42:05", "remaining_time": "0:17:45", "throughput": 13699.05, "total_tokens": 34590528} +{"current_steps": 10990, "total_steps": 15621, "loss": 0.4187, "lr": 4.895858312121644e-07, "epoch": 0.7035401062672044, "percentage": 70.35, "elapsed_time": "0:42:05", "remaining_time": "0:17:44", "throughput": 13701.75, "total_tokens": 34605312} +{"current_steps": 10995, "total_steps": 15621, "loss": 0.4337, "lr": 4.886252929847674e-07, "epoch": 0.7038601882081813, "percentage": 70.39, "elapsed_time": "0:42:06", "remaining_time": "0:17:42", "throughput": 13704.59, "total_tokens": 34620736} +{"current_steps": 11000, "total_steps": 15621, "loss": 0.4578, "lr": 4.876653932146963e-07, "epoch": 0.7041802701491582, "percentage": 70.42, "elapsed_time": "0:42:06", "remaining_time": "0:17:41", "throughput": 13707.55, "total_tokens": 34636736} +{"current_steps": 11005, "total_steps": 15621, "loss": 0.3782, "lr": 4.86706133100397e-07, "epoch": 0.7045003520901351, "percentage": 70.45, "elapsed_time": "0:42:07", "remaining_time": "0:17:40", "throughput": 13710.29, "total_tokens": 34651776} +{"current_steps": 11010, "total_steps": 15621, "loss": 0.2923, "lr": 4.857475138395178e-07, "epoch": 0.7048204340311119, "percentage": 70.48, "elapsed_time": "0:42:07", "remaining_time": "0:17:38", "throughput": 13712.89, "total_tokens": 34666176} +{"current_steps": 11015, "total_steps": 15621, "loss": 0.2529, "lr": 4.847895366289054e-07, "epoch": 0.7051405159720888, "percentage": 70.51, "elapsed_time": "0:42:08", "remaining_time": "0:17:37", "throughput": 13715.88, "total_tokens": 34682112} +{"current_steps": 11020, "total_steps": 15621, "loss": 0.3828, "lr": 4.838322026646057e-07, "epoch": 0.7054605979130657, "percentage": 70.55, "elapsed_time": "0:42:09", "remaining_time": "0:17:35", "throughput": 13718.58, "total_tokens": 34697024} +{"current_steps": 11025, "total_steps": 15621, "loss": 0.3577, "lr": 4.82875513141861e-07, "epoch": 0.7057806798540426, "percentage": 70.58, "elapsed_time": "0:42:09", "remaining_time": "0:17:34", "throughput": 13721.46, "total_tokens": 34712704} +{"current_steps": 11030, "total_steps": 15621, "loss": 0.3791, "lr": 4.819194692551106e-07, "epoch": 0.7061007617950196, "percentage": 70.61, "elapsed_time": "0:42:10", "remaining_time": "0:17:33", "throughput": 13724.3, "total_tokens": 34728256} +{"current_steps": 11035, "total_steps": 15621, "loss": 0.4268, "lr": 4.809640721979855e-07, "epoch": 0.7064208437359965, "percentage": 70.64, "elapsed_time": "0:42:11", "remaining_time": "0:17:31", "throughput": 13727.31, "total_tokens": 34744512} +{"current_steps": 11040, "total_steps": 15621, "loss": 0.4158, "lr": 4.8000932316331e-07, "epoch": 0.7067409256769733, "percentage": 70.67, "elapsed_time": "0:42:11", "remaining_time": "0:17:30", "throughput": 13729.91, "total_tokens": 34758912} +{"current_steps": 11045, "total_steps": 15621, "loss": 0.4037, "lr": 4.790552233431002e-07, "epoch": 0.7070610076179502, "percentage": 70.71, "elapsed_time": "0:42:12", "remaining_time": "0:17:29", "throughput": 13732.87, "total_tokens": 34774848} +{"current_steps": 11050, "total_steps": 15621, "loss": 0.4168, "lr": 4.781017739285611e-07, "epoch": 0.7073810895589271, "percentage": 70.74, "elapsed_time": "0:42:12", "remaining_time": "0:17:27", "throughput": 13735.65, "total_tokens": 34790016} +{"current_steps": 11055, "total_steps": 15621, "loss": 0.3453, "lr": 4.771489761100842e-07, "epoch": 0.707701171499904, "percentage": 70.77, "elapsed_time": "0:42:13", "remaining_time": "0:17:26", "throughput": 13738.37, "total_tokens": 34804992} +{"current_steps": 11060, "total_steps": 15621, "loss": 0.2687, "lr": 4.761968310772501e-07, "epoch": 0.7080212534408808, "percentage": 70.8, "elapsed_time": "0:42:14", "remaining_time": "0:17:24", "throughput": 13741.17, "total_tokens": 34820288} +{"current_steps": 11065, "total_steps": 15621, "loss": 0.2718, "lr": 4.7524534001882267e-07, "epoch": 0.7083413353818577, "percentage": 70.83, "elapsed_time": "0:42:14", "remaining_time": "0:17:23", "throughput": 13744.07, "total_tokens": 34836096} +{"current_steps": 11070, "total_steps": 15621, "loss": 0.394, "lr": 4.7429450412274897e-07, "epoch": 0.7086614173228346, "percentage": 70.87, "elapsed_time": "0:42:15", "remaining_time": "0:17:22", "throughput": 13746.89, "total_tokens": 34851584} +{"current_steps": 11075, "total_steps": 15621, "loss": 0.3458, "lr": 4.733443245761596e-07, "epoch": 0.7089814992638115, "percentage": 70.9, "elapsed_time": "0:42:15", "remaining_time": "0:17:20", "throughput": 13749.94, "total_tokens": 34868032} +{"current_steps": 11080, "total_steps": 15621, "loss": 0.3821, "lr": 4.723948025653646e-07, "epoch": 0.7093015812047885, "percentage": 70.93, "elapsed_time": "0:42:16", "remaining_time": "0:17:19", "throughput": 13752.88, "total_tokens": 34884032} +{"current_steps": 11085, "total_steps": 15621, "loss": 0.3254, "lr": 4.714459392758534e-07, "epoch": 0.7096216631457654, "percentage": 70.96, "elapsed_time": "0:42:17", "remaining_time": "0:17:18", "throughput": 13755.67, "total_tokens": 34899456} +{"current_steps": 11090, "total_steps": 15621, "loss": 0.3735, "lr": 4.70497735892293e-07, "epoch": 0.7099417450867422, "percentage": 70.99, "elapsed_time": "0:42:17", "remaining_time": "0:17:16", "throughput": 13758.62, "total_tokens": 34915456} +{"current_steps": 11095, "total_steps": 15621, "loss": 0.3331, "lr": 4.695501935985263e-07, "epoch": 0.7102618270277191, "percentage": 71.03, "elapsed_time": "0:42:18", "remaining_time": "0:17:15", "throughput": 13761.53, "total_tokens": 34931328} +{"current_steps": 11100, "total_steps": 15621, "loss": 0.3999, "lr": 4.686033135775711e-07, "epoch": 0.710581908968696, "percentage": 71.06, "elapsed_time": "0:42:18", "remaining_time": "0:17:14", "throughput": 13764.31, "total_tokens": 34946816} +{"current_steps": 11105, "total_steps": 15621, "loss": 0.3245, "lr": 4.6765709701161817e-07, "epoch": 0.7109019909096729, "percentage": 71.09, "elapsed_time": "0:42:19", "remaining_time": "0:17:12", "throughput": 13767.66, "total_tokens": 34964544} +{"current_steps": 11110, "total_steps": 15621, "loss": 0.3996, "lr": 4.6671154508203003e-07, "epoch": 0.7112220728506498, "percentage": 71.12, "elapsed_time": "0:42:20", "remaining_time": "0:17:11", "throughput": 13770.97, "total_tokens": 34982208} +{"current_steps": 11115, "total_steps": 15621, "loss": 0.3439, "lr": 4.657666589693393e-07, "epoch": 0.7115421547916266, "percentage": 71.15, "elapsed_time": "0:42:20", "remaining_time": "0:17:10", "throughput": 13774.47, "total_tokens": 35000576} +{"current_steps": 11120, "total_steps": 15621, "loss": 0.3145, "lr": 4.6482243985324753e-07, "epoch": 0.7118622367326035, "percentage": 71.19, "elapsed_time": "0:42:21", "remaining_time": "0:17:08", "throughput": 13777.0, "total_tokens": 35014912} +{"current_steps": 11125, "total_steps": 15621, "loss": 0.2914, "lr": 4.638788889126232e-07, "epoch": 0.7121823186735804, "percentage": 71.22, "elapsed_time": "0:42:22", "remaining_time": "0:17:07", "throughput": 13779.62, "total_tokens": 35029632} +{"current_steps": 11130, "total_steps": 15621, "loss": 0.3239, "lr": 4.6293600732550085e-07, "epoch": 0.7125024006145573, "percentage": 71.25, "elapsed_time": "0:42:22", "remaining_time": "0:17:06", "throughput": 13782.38, "total_tokens": 35044992} +{"current_steps": 11135, "total_steps": 15621, "loss": 0.4686, "lr": 4.619937962690792e-07, "epoch": 0.7128224825555343, "percentage": 71.28, "elapsed_time": "0:42:23", "remaining_time": "0:17:04", "throughput": 13785.17, "total_tokens": 35060544} +{"current_steps": 11140, "total_steps": 15621, "loss": 0.5105, "lr": 4.610522569197197e-07, "epoch": 0.7131425644965111, "percentage": 71.31, "elapsed_time": "0:42:23", "remaining_time": "0:17:03", "throughput": 13787.86, "total_tokens": 35075648} +{"current_steps": 11145, "total_steps": 15621, "loss": 0.3294, "lr": 4.6011139045294554e-07, "epoch": 0.713462646437488, "percentage": 71.35, "elapsed_time": "0:42:24", "remaining_time": "0:17:01", "throughput": 13790.59, "total_tokens": 35090880} +{"current_steps": 11150, "total_steps": 15621, "loss": 0.3904, "lr": 4.59171198043439e-07, "epoch": 0.7137827283784649, "percentage": 71.38, "elapsed_time": "0:42:25", "remaining_time": "0:17:00", "throughput": 13793.39, "total_tokens": 35106432} +{"current_steps": 11155, "total_steps": 15621, "loss": 0.4349, "lr": 4.582316808650424e-07, "epoch": 0.7141028103194418, "percentage": 71.41, "elapsed_time": "0:42:25", "remaining_time": "0:16:59", "throughput": 13796.14, "total_tokens": 35121664} +{"current_steps": 11160, "total_steps": 15621, "loss": 0.491, "lr": 4.572928400907529e-07, "epoch": 0.7144228922604187, "percentage": 71.44, "elapsed_time": "0:42:26", "remaining_time": "0:16:57", "throughput": 13798.91, "total_tokens": 35137152} +{"current_steps": 11165, "total_steps": 15621, "loss": 0.3682, "lr": 4.5635467689272434e-07, "epoch": 0.7147429742013955, "percentage": 71.47, "elapsed_time": "0:42:26", "remaining_time": "0:16:56", "throughput": 13801.8, "total_tokens": 35153088} +{"current_steps": 11170, "total_steps": 15621, "loss": 0.3654, "lr": 4.554171924422655e-07, "epoch": 0.7150630561423724, "percentage": 71.51, "elapsed_time": "0:42:27", "remaining_time": "0:16:55", "throughput": 13804.49, "total_tokens": 35168192} +{"current_steps": 11175, "total_steps": 15621, "loss": 0.3242, "lr": 4.544803879098356e-07, "epoch": 0.7153831380833493, "percentage": 71.54, "elapsed_time": "0:42:28", "remaining_time": "0:16:53", "throughput": 13807.4, "total_tokens": 35184192} +{"current_steps": 11180, "total_steps": 15621, "loss": 0.3848, "lr": 4.535442644650462e-07, "epoch": 0.7157032200243262, "percentage": 71.57, "elapsed_time": "0:42:28", "remaining_time": "0:16:52", "throughput": 13810.33, "total_tokens": 35200256} +{"current_steps": 11185, "total_steps": 15621, "loss": 0.4889, "lr": 4.5260882327665906e-07, "epoch": 0.7160233019653032, "percentage": 71.6, "elapsed_time": "0:42:29", "remaining_time": "0:16:51", "throughput": 13812.9, "total_tokens": 35214720} +{"current_steps": 11190, "total_steps": 15621, "loss": 0.5077, "lr": 4.5167406551258347e-07, "epoch": 0.71634338390628, "percentage": 71.63, "elapsed_time": "0:42:30", "remaining_time": "0:16:49", "throughput": 13815.77, "total_tokens": 35230720} +{"current_steps": 11195, "total_steps": 15621, "loss": 0.3948, "lr": 4.5073999233987445e-07, "epoch": 0.7166634658472569, "percentage": 71.67, "elapsed_time": "0:42:30", "remaining_time": "0:16:48", "throughput": 13818.6, "total_tokens": 35246400} +{"current_steps": 11200, "total_steps": 15621, "loss": 0.4854, "lr": 4.4980660492473434e-07, "epoch": 0.7169835477882338, "percentage": 71.7, "elapsed_time": "0:42:31", "remaining_time": "0:16:47", "throughput": 13821.59, "total_tokens": 35262784} +{"current_steps": 11205, "total_steps": 15621, "loss": 0.2735, "lr": 4.4887390443250804e-07, "epoch": 0.7173036297292107, "percentage": 71.73, "elapsed_time": "0:42:31", "remaining_time": "0:16:45", "throughput": 13824.25, "total_tokens": 35277632} +{"current_steps": 11210, "total_steps": 15621, "loss": 0.2981, "lr": 4.4794189202768295e-07, "epoch": 0.7176237116701876, "percentage": 71.76, "elapsed_time": "0:42:32", "remaining_time": "0:16:44", "throughput": 13826.9, "total_tokens": 35292544} +{"current_steps": 11215, "total_steps": 15621, "loss": 0.3816, "lr": 4.4701056887388757e-07, "epoch": 0.7179437936111644, "percentage": 71.79, "elapsed_time": "0:42:33", "remaining_time": "0:16:43", "throughput": 13829.77, "total_tokens": 35308352} +{"current_steps": 11220, "total_steps": 15621, "loss": 0.3307, "lr": 4.460799361338897e-07, "epoch": 0.7182638755521413, "percentage": 71.83, "elapsed_time": "0:42:33", "remaining_time": "0:16:41", "throughput": 13832.59, "total_tokens": 35323904} +{"current_steps": 11225, "total_steps": 15621, "loss": 0.4203, "lr": 4.451499949695954e-07, "epoch": 0.7185839574931182, "percentage": 71.86, "elapsed_time": "0:42:34", "remaining_time": "0:16:40", "throughput": 13835.54, "total_tokens": 35340224} +{"current_steps": 11230, "total_steps": 15621, "loss": 0.375, "lr": 4.44220746542047e-07, "epoch": 0.7189040394340951, "percentage": 71.89, "elapsed_time": "0:42:34", "remaining_time": "0:16:38", "throughput": 13838.32, "total_tokens": 35355776} +{"current_steps": 11235, "total_steps": 15621, "loss": 0.474, "lr": 4.432921920114221e-07, "epoch": 0.719224121375072, "percentage": 71.92, "elapsed_time": "0:42:35", "remaining_time": "0:16:37", "throughput": 13841.04, "total_tokens": 35371072} +{"current_steps": 11240, "total_steps": 15621, "loss": 0.3144, "lr": 4.4236433253703185e-07, "epoch": 0.719544203316049, "percentage": 71.95, "elapsed_time": "0:42:36", "remaining_time": "0:16:36", "throughput": 13844.01, "total_tokens": 35387520} +{"current_steps": 11245, "total_steps": 15621, "loss": 0.4042, "lr": 4.4143716927732e-07, "epoch": 0.7198642852570258, "percentage": 71.99, "elapsed_time": "0:42:36", "remaining_time": "0:16:34", "throughput": 13846.98, "total_tokens": 35403840} +{"current_steps": 11250, "total_steps": 15621, "loss": 0.3767, "lr": 4.405107033898604e-07, "epoch": 0.7201843671980027, "percentage": 72.02, "elapsed_time": "0:42:37", "remaining_time": "0:16:33", "throughput": 13849.9, "total_tokens": 35420032} +{"current_steps": 11255, "total_steps": 15621, "loss": 0.2887, "lr": 4.395849360313568e-07, "epoch": 0.7205044491389796, "percentage": 72.05, "elapsed_time": "0:42:38", "remaining_time": "0:16:32", "throughput": 13852.8, "total_tokens": 35436032} +{"current_steps": 11260, "total_steps": 15621, "loss": 0.3505, "lr": 4.386598683576406e-07, "epoch": 0.7208245310799565, "percentage": 72.08, "elapsed_time": "0:42:38", "remaining_time": "0:16:30", "throughput": 13855.45, "total_tokens": 35451136} +{"current_steps": 11265, "total_steps": 15621, "loss": 0.4744, "lr": 4.377355015236696e-07, "epoch": 0.7211446130209334, "percentage": 72.11, "elapsed_time": "0:42:39", "remaining_time": "0:16:29", "throughput": 13858.25, "total_tokens": 35466816} +{"current_steps": 11270, "total_steps": 15621, "loss": 0.3588, "lr": 4.368118366835266e-07, "epoch": 0.7214646949619102, "percentage": 72.15, "elapsed_time": "0:42:39", "remaining_time": "0:16:28", "throughput": 13861.3, "total_tokens": 35483456} +{"current_steps": 11275, "total_steps": 15621, "loss": 0.4691, "lr": 4.358888749904177e-07, "epoch": 0.7217847769028871, "percentage": 72.18, "elapsed_time": "0:42:40", "remaining_time": "0:16:26", "throughput": 13864.24, "total_tokens": 35499584} +{"current_steps": 11280, "total_steps": 15621, "loss": 0.3521, "lr": 4.349666175966725e-07, "epoch": 0.722104858843864, "percentage": 72.21, "elapsed_time": "0:42:41", "remaining_time": "0:16:25", "throughput": 13867.05, "total_tokens": 35515328} +{"current_steps": 11285, "total_steps": 15621, "loss": 0.4721, "lr": 4.340450656537392e-07, "epoch": 0.7224249407848409, "percentage": 72.24, "elapsed_time": "0:42:41", "remaining_time": "0:16:24", "throughput": 13869.61, "total_tokens": 35530048} +{"current_steps": 11290, "total_steps": 15621, "loss": 0.2995, "lr": 4.331242203121861e-07, "epoch": 0.7227450227258178, "percentage": 72.27, "elapsed_time": "0:42:42", "remaining_time": "0:16:22", "throughput": 13872.46, "total_tokens": 35545792} +{"current_steps": 11295, "total_steps": 15621, "loss": 0.3775, "lr": 4.322040827217004e-07, "epoch": 0.7230651046667947, "percentage": 72.31, "elapsed_time": "0:42:42", "remaining_time": "0:16:21", "throughput": 13875.27, "total_tokens": 35561344} +{"current_steps": 11300, "total_steps": 15621, "loss": 0.4064, "lr": 4.312846540310838e-07, "epoch": 0.7233851866077716, "percentage": 72.34, "elapsed_time": "0:42:43", "remaining_time": "0:16:20", "throughput": 13878.09, "total_tokens": 35577024} +{"current_steps": 11305, "total_steps": 15621, "loss": 0.3527, "lr": 4.3036593538825373e-07, "epoch": 0.7237052685487485, "percentage": 72.37, "elapsed_time": "0:42:44", "remaining_time": "0:16:18", "throughput": 13880.76, "total_tokens": 35592192} +{"current_steps": 11310, "total_steps": 15621, "loss": 0.3375, "lr": 4.2944792794024196e-07, "epoch": 0.7240253504897254, "percentage": 72.4, "elapsed_time": "0:42:44", "remaining_time": "0:16:17", "throughput": 13883.61, "total_tokens": 35607872} +{"current_steps": 11315, "total_steps": 15621, "loss": 0.3015, "lr": 4.285306328331915e-07, "epoch": 0.7243454324307023, "percentage": 72.43, "elapsed_time": "0:42:45", "remaining_time": "0:16:16", "throughput": 13886.5, "total_tokens": 35623872} +{"current_steps": 11320, "total_steps": 15621, "loss": 0.3168, "lr": 4.2761405121235506e-07, "epoch": 0.7246655143716791, "percentage": 72.47, "elapsed_time": "0:42:45", "remaining_time": "0:16:14", "throughput": 13889.07, "total_tokens": 35638720} +{"current_steps": 11325, "total_steps": 15621, "loss": 0.538, "lr": 4.266981842220965e-07, "epoch": 0.724985596312656, "percentage": 72.5, "elapsed_time": "0:42:46", "remaining_time": "0:16:13", "throughput": 13892.2, "total_tokens": 35655680} +{"current_steps": 11330, "total_steps": 15621, "loss": 0.2663, "lr": 4.257830330058864e-07, "epoch": 0.7253056782536329, "percentage": 72.53, "elapsed_time": "0:42:47", "remaining_time": "0:16:12", "throughput": 13894.95, "total_tokens": 35671168} +{"current_steps": 11335, "total_steps": 15621, "loss": 0.4085, "lr": 4.248685987063019e-07, "epoch": 0.7256257601946098, "percentage": 72.56, "elapsed_time": "0:42:47", "remaining_time": "0:16:10", "throughput": 13897.77, "total_tokens": 35686848} +{"current_steps": 11340, "total_steps": 15621, "loss": 0.3486, "lr": 4.2395488246502396e-07, "epoch": 0.7259458421355867, "percentage": 72.59, "elapsed_time": "0:42:48", "remaining_time": "0:16:09", "throughput": 13900.63, "total_tokens": 35702720} +{"current_steps": 11345, "total_steps": 15621, "loss": 0.4532, "lr": 4.2304188542283913e-07, "epoch": 0.7262659240765637, "percentage": 72.63, "elapsed_time": "0:42:49", "remaining_time": "0:16:08", "throughput": 13903.97, "total_tokens": 35720640} +{"current_steps": 11350, "total_steps": 15621, "loss": 0.3855, "lr": 4.221296087196347e-07, "epoch": 0.7265860060175405, "percentage": 72.66, "elapsed_time": "0:42:49", "remaining_time": "0:16:06", "throughput": 13906.55, "total_tokens": 35735424} +{"current_steps": 11355, "total_steps": 15621, "loss": 0.46, "lr": 4.2121805349439867e-07, "epoch": 0.7269060879585174, "percentage": 72.69, "elapsed_time": "0:42:50", "remaining_time": "0:16:05", "throughput": 13909.37, "total_tokens": 35751168} +{"current_steps": 11360, "total_steps": 15621, "loss": 0.3829, "lr": 4.203072208852184e-07, "epoch": 0.7272261698994943, "percentage": 72.72, "elapsed_time": "0:42:50", "remaining_time": "0:16:04", "throughput": 13912.19, "total_tokens": 35767168} +{"current_steps": 11365, "total_steps": 15621, "loss": 0.447, "lr": 4.193971120292793e-07, "epoch": 0.7275462518404712, "percentage": 72.75, "elapsed_time": "0:42:51", "remaining_time": "0:16:02", "throughput": 13914.89, "total_tokens": 35782464} +{"current_steps": 11370, "total_steps": 15621, "loss": 0.4004, "lr": 4.184877280628629e-07, "epoch": 0.727866333781448, "percentage": 72.79, "elapsed_time": "0:42:52", "remaining_time": "0:16:01", "throughput": 13917.73, "total_tokens": 35798592} +{"current_steps": 11375, "total_steps": 15621, "loss": 0.3955, "lr": 4.1757907012134565e-07, "epoch": 0.7281864157224249, "percentage": 72.82, "elapsed_time": "0:42:52", "remaining_time": "0:16:00", "throughput": 13920.64, "total_tokens": 35814720} +{"current_steps": 11380, "total_steps": 15621, "loss": 0.2807, "lr": 4.166711393391978e-07, "epoch": 0.7285064976634018, "percentage": 72.85, "elapsed_time": "0:42:53", "remaining_time": "0:15:59", "throughput": 13923.33, "total_tokens": 35830016} +{"current_steps": 11385, "total_steps": 15621, "loss": 0.3365, "lr": 4.1576393684998146e-07, "epoch": 0.7288265796043787, "percentage": 72.88, "elapsed_time": "0:42:53", "remaining_time": "0:15:57", "throughput": 13926.07, "total_tokens": 35845632} +{"current_steps": 11390, "total_steps": 15621, "loss": 0.3505, "lr": 4.1485746378634966e-07, "epoch": 0.7291466615453556, "percentage": 72.91, "elapsed_time": "0:42:54", "remaining_time": "0:15:56", "throughput": 13928.84, "total_tokens": 35861184} +{"current_steps": 11395, "total_steps": 15621, "loss": 0.4186, "lr": 4.1395172128004473e-07, "epoch": 0.7294667434863324, "percentage": 72.95, "elapsed_time": "0:42:55", "remaining_time": "0:15:55", "throughput": 13931.68, "total_tokens": 35876864} +{"current_steps": 11400, "total_steps": 15621, "loss": 0.3272, "lr": 4.130467104618963e-07, "epoch": 0.7297868254273094, "percentage": 72.98, "elapsed_time": "0:42:55", "remaining_time": "0:15:53", "throughput": 13934.73, "total_tokens": 35893568} +{"current_steps": 11405, "total_steps": 15621, "loss": 0.3336, "lr": 4.1214243246182223e-07, "epoch": 0.7301069073682863, "percentage": 73.01, "elapsed_time": "0:42:56", "remaining_time": "0:15:52", "throughput": 13937.59, "total_tokens": 35909696} +{"current_steps": 11410, "total_steps": 15621, "loss": 0.465, "lr": 4.1123888840882306e-07, "epoch": 0.7304269893092632, "percentage": 73.04, "elapsed_time": "0:42:57", "remaining_time": "0:15:51", "throughput": 13940.31, "total_tokens": 35925120} +{"current_steps": 11415, "total_steps": 15621, "loss": 0.3184, "lr": 4.1033607943098415e-07, "epoch": 0.7307470712502401, "percentage": 73.07, "elapsed_time": "0:42:57", "remaining_time": "0:15:49", "throughput": 13943.11, "total_tokens": 35940800} +{"current_steps": 11420, "total_steps": 15621, "loss": 0.3461, "lr": 4.0943400665547423e-07, "epoch": 0.731067153191217, "percentage": 73.11, "elapsed_time": "0:42:58", "remaining_time": "0:15:48", "throughput": 13945.8, "total_tokens": 35955968} +{"current_steps": 11425, "total_steps": 15621, "loss": 0.3261, "lr": 4.0853267120854064e-07, "epoch": 0.7313872351321938, "percentage": 73.14, "elapsed_time": "0:42:58", "remaining_time": "0:15:47", "throughput": 13948.67, "total_tokens": 35972096} +{"current_steps": 11430, "total_steps": 15621, "loss": 0.3358, "lr": 4.076320742155117e-07, "epoch": 0.7317073170731707, "percentage": 73.17, "elapsed_time": "0:42:59", "remaining_time": "0:15:45", "throughput": 13951.2, "total_tokens": 35986624} +{"current_steps": 11435, "total_steps": 15621, "loss": 0.3546, "lr": 4.067322168007928e-07, "epoch": 0.7320273990141476, "percentage": 73.2, "elapsed_time": "0:43:00", "remaining_time": "0:15:44", "throughput": 13954.14, "total_tokens": 36003008} +{"current_steps": 11440, "total_steps": 15621, "loss": 0.3539, "lr": 4.0583310008786775e-07, "epoch": 0.7323474809551245, "percentage": 73.23, "elapsed_time": "0:43:00", "remaining_time": "0:15:43", "throughput": 13956.54, "total_tokens": 36017152} +{"current_steps": 11445, "total_steps": 15621, "loss": 0.2777, "lr": 4.049347251992932e-07, "epoch": 0.7326675628961014, "percentage": 73.27, "elapsed_time": "0:43:01", "remaining_time": "0:15:41", "throughput": 13959.1, "total_tokens": 36031936} +{"current_steps": 11450, "total_steps": 15621, "loss": 0.3461, "lr": 4.0403709325670064e-07, "epoch": 0.7329876448370783, "percentage": 73.3, "elapsed_time": "0:43:01", "remaining_time": "0:15:40", "throughput": 13961.98, "total_tokens": 36048064} +{"current_steps": 11455, "total_steps": 15621, "loss": 0.4433, "lr": 4.03140205380795e-07, "epoch": 0.7333077267780552, "percentage": 73.33, "elapsed_time": "0:43:02", "remaining_time": "0:15:39", "throughput": 13964.88, "total_tokens": 36064256} +{"current_steps": 11460, "total_steps": 15621, "loss": 0.6545, "lr": 4.0224406269135115e-07, "epoch": 0.7336278087190321, "percentage": 73.36, "elapsed_time": "0:43:03", "remaining_time": "0:15:37", "throughput": 13967.51, "total_tokens": 36079424} +{"current_steps": 11465, "total_steps": 15621, "loss": 0.3062, "lr": 4.0134866630721266e-07, "epoch": 0.733947890660009, "percentage": 73.39, "elapsed_time": "0:43:03", "remaining_time": "0:15:36", "throughput": 13970.36, "total_tokens": 36095424} +{"current_steps": 11470, "total_steps": 15621, "loss": 0.3666, "lr": 4.0045401734629367e-07, "epoch": 0.7342679726009859, "percentage": 73.43, "elapsed_time": "0:43:04", "remaining_time": "0:15:35", "throughput": 13973.19, "total_tokens": 36111360} +{"current_steps": 11475, "total_steps": 15621, "loss": 0.3819, "lr": 3.9956011692557377e-07, "epoch": 0.7345880545419627, "percentage": 73.46, "elapsed_time": "0:43:04", "remaining_time": "0:15:33", "throughput": 13975.98, "total_tokens": 36127232} +{"current_steps": 11480, "total_steps": 15621, "loss": 0.3447, "lr": 3.986669661610972e-07, "epoch": 0.7349081364829396, "percentage": 73.49, "elapsed_time": "0:43:05", "remaining_time": "0:15:32", "throughput": 13978.77, "total_tokens": 36143168} +{"current_steps": 11485, "total_steps": 15621, "loss": 0.329, "lr": 3.9777456616797414e-07, "epoch": 0.7352282184239165, "percentage": 73.52, "elapsed_time": "0:43:06", "remaining_time": "0:15:31", "throughput": 13981.44, "total_tokens": 36158272} +{"current_steps": 11490, "total_steps": 15621, "loss": 0.3544, "lr": 3.968829180603761e-07, "epoch": 0.7355483003648934, "percentage": 73.55, "elapsed_time": "0:43:06", "remaining_time": "0:15:30", "throughput": 13984.0, "total_tokens": 36173056} +{"current_steps": 11495, "total_steps": 15621, "loss": 0.4025, "lr": 3.9599202295153624e-07, "epoch": 0.7358683823058703, "percentage": 73.59, "elapsed_time": "0:43:07", "remaining_time": "0:15:28", "throughput": 13986.57, "total_tokens": 36187904} +{"current_steps": 11500, "total_steps": 15621, "loss": 0.3587, "lr": 3.951018819537476e-07, "epoch": 0.7361884642468471, "percentage": 73.62, "elapsed_time": "0:43:08", "remaining_time": "0:15:27", "throughput": 13989.8, "total_tokens": 36205632} +{"current_steps": 11505, "total_steps": 15621, "loss": 0.3492, "lr": 3.942124961783616e-07, "epoch": 0.7365085461878241, "percentage": 73.65, "elapsed_time": "0:43:08", "remaining_time": "0:15:26", "throughput": 13992.32, "total_tokens": 36220160} +{"current_steps": 11510, "total_steps": 15621, "loss": 0.3096, "lr": 3.933238667357869e-07, "epoch": 0.736828628128801, "percentage": 73.68, "elapsed_time": "0:43:09", "remaining_time": "0:15:24", "throughput": 13995.24, "total_tokens": 36236416} +{"current_steps": 11515, "total_steps": 15621, "loss": 0.3546, "lr": 3.924359947354876e-07, "epoch": 0.7371487100697779, "percentage": 73.71, "elapsed_time": "0:43:09", "remaining_time": "0:15:23", "throughput": 13997.86, "total_tokens": 36251584} +{"current_steps": 11520, "total_steps": 15621, "loss": 0.3261, "lr": 3.915488812859826e-07, "epoch": 0.7374687920107548, "percentage": 73.75, "elapsed_time": "0:43:10", "remaining_time": "0:15:22", "throughput": 14000.27, "total_tokens": 36265856} +{"current_steps": 11525, "total_steps": 15621, "loss": 0.3797, "lr": 3.90662527494843e-07, "epoch": 0.7377888739517316, "percentage": 73.78, "elapsed_time": "0:43:11", "remaining_time": "0:15:20", "throughput": 14003.58, "total_tokens": 36283904} +{"current_steps": 11530, "total_steps": 15621, "loss": 0.3638, "lr": 3.8977693446869285e-07, "epoch": 0.7381089558927085, "percentage": 73.81, "elapsed_time": "0:43:11", "remaining_time": "0:15:19", "throughput": 14006.05, "total_tokens": 36298432} +{"current_steps": 11535, "total_steps": 15621, "loss": 0.3298, "lr": 3.8889210331320445e-07, "epoch": 0.7384290378336854, "percentage": 73.84, "elapsed_time": "0:43:12", "remaining_time": "0:15:18", "throughput": 14008.73, "total_tokens": 36313728} +{"current_steps": 11540, "total_steps": 15621, "loss": 0.3795, "lr": 3.8800803513310033e-07, "epoch": 0.7387491197746623, "percentage": 73.87, "elapsed_time": "0:43:12", "remaining_time": "0:15:16", "throughput": 14011.4, "total_tokens": 36329088} +{"current_steps": 11545, "total_steps": 15621, "loss": 0.4125, "lr": 3.8712473103214993e-07, "epoch": 0.7390692017156392, "percentage": 73.91, "elapsed_time": "0:43:13", "remaining_time": "0:15:15", "throughput": 14014.12, "total_tokens": 36345024} +{"current_steps": 11550, "total_steps": 15621, "loss": 0.3077, "lr": 3.862421921131688e-07, "epoch": 0.739389283656616, "percentage": 73.94, "elapsed_time": "0:43:14", "remaining_time": "0:15:14", "throughput": 14017.17, "total_tokens": 36361792} +{"current_steps": 11555, "total_steps": 15621, "loss": 0.2844, "lr": 3.85360419478017e-07, "epoch": 0.739709365597593, "percentage": 73.97, "elapsed_time": "0:43:14", "remaining_time": "0:15:13", "throughput": 14019.84, "total_tokens": 36377152} +{"current_steps": 11560, "total_steps": 15621, "loss": 0.346, "lr": 3.8447941422759786e-07, "epoch": 0.7400294475385699, "percentage": 74.0, "elapsed_time": "0:43:15", "remaining_time": "0:15:11", "throughput": 14022.85, "total_tokens": 36394048} +{"current_steps": 11565, "total_steps": 15621, "loss": 0.3546, "lr": 3.835991774618579e-07, "epoch": 0.7403495294795468, "percentage": 74.03, "elapsed_time": "0:43:15", "remaining_time": "0:15:10", "throughput": 14025.44, "total_tokens": 36409152} +{"current_steps": 11570, "total_steps": 15621, "loss": 0.3882, "lr": 3.827197102797818e-07, "epoch": 0.7406696114205237, "percentage": 74.07, "elapsed_time": "0:43:16", "remaining_time": "0:15:09", "throughput": 14028.68, "total_tokens": 36427072} +{"current_steps": 11575, "total_steps": 15621, "loss": 0.4667, "lr": 3.818410137793947e-07, "epoch": 0.7409896933615006, "percentage": 74.1, "elapsed_time": "0:43:17", "remaining_time": "0:15:07", "throughput": 14031.73, "total_tokens": 36444288} +{"current_steps": 11580, "total_steps": 15621, "loss": 0.4323, "lr": 3.809630890577602e-07, "epoch": 0.7413097753024774, "percentage": 74.13, "elapsed_time": "0:43:17", "remaining_time": "0:15:06", "throughput": 14034.51, "total_tokens": 36460096} +{"current_steps": 11585, "total_steps": 15621, "loss": 0.3414, "lr": 3.800859372109777e-07, "epoch": 0.7416298572434543, "percentage": 74.16, "elapsed_time": "0:43:18", "remaining_time": "0:15:05", "throughput": 14037.14, "total_tokens": 36475264} +{"current_steps": 11590, "total_steps": 15621, "loss": 0.3205, "lr": 3.7920955933418055e-07, "epoch": 0.7419499391844312, "percentage": 74.19, "elapsed_time": "0:43:19", "remaining_time": "0:15:03", "throughput": 14039.96, "total_tokens": 36491264} +{"current_steps": 11595, "total_steps": 15621, "loss": 0.3158, "lr": 3.7833395652153775e-07, "epoch": 0.7422700211254081, "percentage": 74.23, "elapsed_time": "0:43:19", "remaining_time": "0:15:02", "throughput": 14042.54, "total_tokens": 36506368} +{"current_steps": 11600, "total_steps": 15621, "loss": 0.2953, "lr": 3.774591298662497e-07, "epoch": 0.742590103066385, "percentage": 74.26, "elapsed_time": "0:43:20", "remaining_time": "0:15:01", "throughput": 14045.42, "total_tokens": 36522432} +{"current_steps": 11605, "total_steps": 15621, "loss": 0.4255, "lr": 3.765850804605468e-07, "epoch": 0.7429101850073618, "percentage": 74.29, "elapsed_time": "0:43:20", "remaining_time": "0:15:00", "throughput": 14048.38, "total_tokens": 36539008} +{"current_steps": 11610, "total_steps": 15621, "loss": 0.2863, "lr": 3.7571180939569104e-07, "epoch": 0.7432302669483388, "percentage": 74.32, "elapsed_time": "0:43:21", "remaining_time": "0:14:58", "throughput": 14051.02, "total_tokens": 36554240} +{"current_steps": 11615, "total_steps": 15621, "loss": 0.308, "lr": 3.748393177619711e-07, "epoch": 0.7435503488893157, "percentage": 74.36, "elapsed_time": "0:43:22", "remaining_time": "0:14:57", "throughput": 14053.73, "total_tokens": 36569920} +{"current_steps": 11620, "total_steps": 15621, "loss": 0.3273, "lr": 3.739676066487032e-07, "epoch": 0.7438704308302926, "percentage": 74.39, "elapsed_time": "0:43:22", "remaining_time": "0:14:56", "throughput": 14056.49, "total_tokens": 36585792} +{"current_steps": 11625, "total_steps": 15621, "loss": 0.2906, "lr": 3.730966771442289e-07, "epoch": 0.7441905127712695, "percentage": 74.42, "elapsed_time": "0:43:23", "remaining_time": "0:14:54", "throughput": 14059.17, "total_tokens": 36601280} +{"current_steps": 11630, "total_steps": 15621, "loss": 0.5193, "lr": 3.722265303359137e-07, "epoch": 0.7445105947122463, "percentage": 74.45, "elapsed_time": "0:43:23", "remaining_time": "0:14:53", "throughput": 14061.92, "total_tokens": 36617152} +{"current_steps": 11635, "total_steps": 15621, "loss": 0.4, "lr": 3.713571673101463e-07, "epoch": 0.7448306766532232, "percentage": 74.48, "elapsed_time": "0:43:24", "remaining_time": "0:14:52", "throughput": 14064.55, "total_tokens": 36632512} +{"current_steps": 11640, "total_steps": 15621, "loss": 0.3338, "lr": 3.704885891523366e-07, "epoch": 0.7451507585942001, "percentage": 74.52, "elapsed_time": "0:43:25", "remaining_time": "0:14:51", "throughput": 14067.19, "total_tokens": 36647744} +{"current_steps": 11645, "total_steps": 15621, "loss": 0.3878, "lr": 3.696207969469146e-07, "epoch": 0.745470840535177, "percentage": 74.55, "elapsed_time": "0:43:25", "remaining_time": "0:14:49", "throughput": 14069.86, "total_tokens": 36663360} +{"current_steps": 11650, "total_steps": 15621, "loss": 0.3571, "lr": 3.6875379177732913e-07, "epoch": 0.7457909224761539, "percentage": 74.58, "elapsed_time": "0:43:26", "remaining_time": "0:14:48", "throughput": 14072.48, "total_tokens": 36678656} +{"current_steps": 11655, "total_steps": 15621, "loss": 0.4971, "lr": 3.6788757472604634e-07, "epoch": 0.7461110044171307, "percentage": 74.61, "elapsed_time": "0:43:27", "remaining_time": "0:14:47", "throughput": 14075.15, "total_tokens": 36693952} +{"current_steps": 11660, "total_steps": 15621, "loss": 0.3139, "lr": 3.6702214687454825e-07, "epoch": 0.7464310863581076, "percentage": 74.64, "elapsed_time": "0:43:27", "remaining_time": "0:14:45", "throughput": 14077.92, "total_tokens": 36709888} +{"current_steps": 11665, "total_steps": 15621, "loss": 0.3103, "lr": 3.6615750930333177e-07, "epoch": 0.7467511682990846, "percentage": 74.68, "elapsed_time": "0:43:28", "remaining_time": "0:14:44", "throughput": 14080.61, "total_tokens": 36725504} +{"current_steps": 11670, "total_steps": 15621, "loss": 0.3055, "lr": 3.65293663091907e-07, "epoch": 0.7470712502400615, "percentage": 74.71, "elapsed_time": "0:43:28", "remaining_time": "0:14:43", "throughput": 14083.4, "total_tokens": 36741376} +{"current_steps": 11675, "total_steps": 15621, "loss": 0.4277, "lr": 3.6443060931879623e-07, "epoch": 0.7473913321810384, "percentage": 74.74, "elapsed_time": "0:43:29", "remaining_time": "0:14:41", "throughput": 14086.07, "total_tokens": 36756864} +{"current_steps": 11680, "total_steps": 15621, "loss": 0.4503, "lr": 3.635683490615321e-07, "epoch": 0.7477114141220152, "percentage": 74.77, "elapsed_time": "0:43:30", "remaining_time": "0:14:40", "throughput": 14088.8, "total_tokens": 36772608} +{"current_steps": 11685, "total_steps": 15621, "loss": 0.2975, "lr": 3.6270688339665634e-07, "epoch": 0.7480314960629921, "percentage": 74.8, "elapsed_time": "0:43:30", "remaining_time": "0:14:39", "throughput": 14091.52, "total_tokens": 36788352} +{"current_steps": 11690, "total_steps": 15621, "loss": 0.3444, "lr": 3.6184621339972e-07, "epoch": 0.748351578003969, "percentage": 74.84, "elapsed_time": "0:43:31", "remaining_time": "0:14:38", "throughput": 14094.24, "total_tokens": 36804096} +{"current_steps": 11695, "total_steps": 15621, "loss": 0.3568, "lr": 3.609863401452786e-07, "epoch": 0.7486716599449459, "percentage": 74.87, "elapsed_time": "0:43:31", "remaining_time": "0:14:36", "throughput": 14096.97, "total_tokens": 36819776} +{"current_steps": 11700, "total_steps": 15621, "loss": 0.4084, "lr": 3.6012726470689416e-07, "epoch": 0.7489917418859228, "percentage": 74.9, "elapsed_time": "0:43:32", "remaining_time": "0:14:35", "throughput": 14099.61, "total_tokens": 36835072} +{"current_steps": 11705, "total_steps": 15621, "loss": 0.3318, "lr": 3.592689881571329e-07, "epoch": 0.7493118238268996, "percentage": 74.93, "elapsed_time": "0:43:33", "remaining_time": "0:14:34", "throughput": 14102.33, "total_tokens": 36850816} +{"current_steps": 11710, "total_steps": 15621, "loss": 0.4348, "lr": 3.5841151156756334e-07, "epoch": 0.7496319057678765, "percentage": 74.96, "elapsed_time": "0:43:33", "remaining_time": "0:14:32", "throughput": 14104.99, "total_tokens": 36866368} +{"current_steps": 11715, "total_steps": 15621, "loss": 0.3994, "lr": 3.575548360087539e-07, "epoch": 0.7499519877088535, "percentage": 75.0, "elapsed_time": "0:43:34", "remaining_time": "0:14:31", "throughput": 14108.42, "total_tokens": 36885376} +{"current_steps": 11720, "total_steps": 15621, "loss": 0.3173, "lr": 3.5669896255027533e-07, "epoch": 0.7502720696498304, "percentage": 75.03, "elapsed_time": "0:43:35", "remaining_time": "0:14:30", "throughput": 14110.92, "total_tokens": 36900288} +{"current_steps": 11725, "total_steps": 15621, "loss": 0.4035, "lr": 3.5584389226069543e-07, "epoch": 0.7505921515908073, "percentage": 75.06, "elapsed_time": "0:43:35", "remaining_time": "0:14:29", "throughput": 14113.69, "total_tokens": 36916224} +{"current_steps": 11730, "total_steps": 15621, "loss": 0.2995, "lr": 3.5498962620757866e-07, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "0:43:36", "remaining_time": "0:14:27", "throughput": 14116.34, "total_tokens": 36931648} +{"current_steps": 11730, "total_steps": 15621, "eval_loss": 0.3647865653038025, "epoch": 0.7509122335317842, "percentage": 75.09, "elapsed_time": "0:44:25", "remaining_time": "0:14:44", "throughput": 13855.98, "total_tokens": 36931648} +{"current_steps": 11735, "total_steps": 15621, "loss": 0.4327, "lr": 3.5413616545748713e-07, "epoch": 0.751232315472761, "percentage": 75.12, "elapsed_time": "0:45:02", "remaining_time": "0:14:55", "throughput": 13669.22, "total_tokens": 36945856} +{"current_steps": 11740, "total_steps": 15621, "loss": 0.5026, "lr": 3.532835110759763e-07, "epoch": 0.7515523974137379, "percentage": 75.16, "elapsed_time": "0:45:03", "remaining_time": "0:14:53", "throughput": 13671.98, "total_tokens": 36961792} +{"current_steps": 11745, "total_steps": 15621, "loss": 0.3038, "lr": 3.524316641275955e-07, "epoch": 0.7518724793547148, "percentage": 75.19, "elapsed_time": "0:45:04", "remaining_time": "0:14:52", "throughput": 13674.61, "total_tokens": 36977152} +{"current_steps": 11750, "total_steps": 15621, "loss": 0.4152, "lr": 3.5158062567588467e-07, "epoch": 0.7521925612956917, "percentage": 75.22, "elapsed_time": "0:45:04", "remaining_time": "0:14:51", "throughput": 13677.12, "total_tokens": 36991936} +{"current_steps": 11755, "total_steps": 15621, "loss": 0.3924, "lr": 3.5073039678337633e-07, "epoch": 0.7525126432366686, "percentage": 75.25, "elapsed_time": "0:45:05", "remaining_time": "0:14:49", "throughput": 13679.64, "total_tokens": 37006784} +{"current_steps": 11760, "total_steps": 15621, "loss": 0.348, "lr": 3.498809785115908e-07, "epoch": 0.7528327251776454, "percentage": 75.28, "elapsed_time": "0:45:05", "remaining_time": "0:14:48", "throughput": 13682.31, "total_tokens": 37022208} +{"current_steps": 11765, "total_steps": 15621, "loss": 0.3504, "lr": 3.4903237192103697e-07, "epoch": 0.7531528071186223, "percentage": 75.32, "elapsed_time": "0:45:06", "remaining_time": "0:14:47", "throughput": 13685.39, "total_tokens": 37039488} +{"current_steps": 11770, "total_steps": 15621, "loss": 0.3372, "lr": 3.481845780712099e-07, "epoch": 0.7534728890595993, "percentage": 75.35, "elapsed_time": "0:45:07", "remaining_time": "0:14:45", "throughput": 13688.32, "total_tokens": 37056064} +{"current_steps": 11775, "total_steps": 15621, "loss": 0.3354, "lr": 3.4733759802059037e-07, "epoch": 0.7537929710005762, "percentage": 75.38, "elapsed_time": "0:45:07", "remaining_time": "0:14:44", "throughput": 13691.14, "total_tokens": 37072256} +{"current_steps": 11780, "total_steps": 15621, "loss": 0.4239, "lr": 3.4649143282664273e-07, "epoch": 0.7541130529415531, "percentage": 75.41, "elapsed_time": "0:45:08", "remaining_time": "0:14:43", "throughput": 13693.69, "total_tokens": 37087360} +{"current_steps": 11785, "total_steps": 15621, "loss": 0.2992, "lr": 3.456460835458143e-07, "epoch": 0.7544331348825299, "percentage": 75.44, "elapsed_time": "0:45:08", "remaining_time": "0:14:41", "throughput": 13696.18, "total_tokens": 37102144} +{"current_steps": 11790, "total_steps": 15621, "loss": 0.3172, "lr": 3.4480155123353337e-07, "epoch": 0.7547532168235068, "percentage": 75.48, "elapsed_time": "0:45:09", "remaining_time": "0:14:40", "throughput": 13698.82, "total_tokens": 37117568} +{"current_steps": 11795, "total_steps": 15621, "loss": 0.4541, "lr": 3.4395783694420875e-07, "epoch": 0.7550732987644837, "percentage": 75.51, "elapsed_time": "0:45:10", "remaining_time": "0:14:39", "throughput": 13701.41, "total_tokens": 37132800} +{"current_steps": 11800, "total_steps": 15621, "loss": 0.4009, "lr": 3.4311494173122743e-07, "epoch": 0.7553933807054606, "percentage": 75.54, "elapsed_time": "0:45:10", "remaining_time": "0:14:37", "throughput": 13703.91, "total_tokens": 37147776} +{"current_steps": 11805, "total_steps": 15621, "loss": 0.387, "lr": 3.422728666469541e-07, "epoch": 0.7557134626464375, "percentage": 75.57, "elapsed_time": "0:45:11", "remaining_time": "0:14:36", "throughput": 13706.75, "total_tokens": 37163904} +{"current_steps": 11810, "total_steps": 15621, "loss": 0.4272, "lr": 3.41431612742729e-07, "epoch": 0.7560335445874143, "percentage": 75.6, "elapsed_time": "0:45:11", "remaining_time": "0:14:35", "throughput": 13709.64, "total_tokens": 37180416} +{"current_steps": 11815, "total_steps": 15621, "loss": 0.4243, "lr": 3.4059118106886855e-07, "epoch": 0.7563536265283912, "percentage": 75.64, "elapsed_time": "0:45:12", "remaining_time": "0:14:33", "throughput": 13712.4, "total_tokens": 37196480} +{"current_steps": 11820, "total_steps": 15621, "loss": 0.5118, "lr": 3.3975157267466036e-07, "epoch": 0.7566737084693682, "percentage": 75.67, "elapsed_time": "0:45:13", "remaining_time": "0:14:32", "throughput": 13714.99, "total_tokens": 37211648} +{"current_steps": 11825, "total_steps": 15621, "loss": 0.29, "lr": 3.389127886083656e-07, "epoch": 0.7569937904103451, "percentage": 75.7, "elapsed_time": "0:45:13", "remaining_time": "0:14:31", "throughput": 13717.64, "total_tokens": 37227072} +{"current_steps": 11830, "total_steps": 15621, "loss": 0.3415, "lr": 3.3807482991721667e-07, "epoch": 0.757313872351322, "percentage": 75.73, "elapsed_time": "0:45:14", "remaining_time": "0:14:29", "throughput": 13720.62, "total_tokens": 37243968} +{"current_steps": 11835, "total_steps": 15621, "loss": 0.3219, "lr": 3.3723769764741474e-07, "epoch": 0.7576339542922989, "percentage": 75.76, "elapsed_time": "0:45:15", "remaining_time": "0:14:28", "throughput": 13723.18, "total_tokens": 37259200} +{"current_steps": 11840, "total_steps": 15621, "loss": 0.2948, "lr": 3.3640139284412825e-07, "epoch": 0.7579540362332757, "percentage": 75.8, "elapsed_time": "0:45:15", "remaining_time": "0:14:27", "throughput": 13725.92, "total_tokens": 37275072} +{"current_steps": 11845, "total_steps": 15621, "loss": 0.399, "lr": 3.355659165514948e-07, "epoch": 0.7582741181742526, "percentage": 75.83, "elapsed_time": "0:45:16", "remaining_time": "0:14:25", "throughput": 13728.74, "total_tokens": 37291392} +{"current_steps": 11850, "total_steps": 15621, "loss": 0.2714, "lr": 3.347312698126161e-07, "epoch": 0.7585942001152295, "percentage": 75.86, "elapsed_time": "0:45:16", "remaining_time": "0:14:24", "throughput": 13731.57, "total_tokens": 37307648} +{"current_steps": 11855, "total_steps": 15621, "loss": 0.2191, "lr": 3.338974536695578e-07, "epoch": 0.7589142820562064, "percentage": 75.89, "elapsed_time": "0:45:17", "remaining_time": "0:14:23", "throughput": 13734.21, "total_tokens": 37323136} +{"current_steps": 11860, "total_steps": 15621, "loss": 0.3183, "lr": 3.330644691633492e-07, "epoch": 0.7592343639971832, "percentage": 75.92, "elapsed_time": "0:45:18", "remaining_time": "0:14:21", "throughput": 13736.83, "total_tokens": 37338496} +{"current_steps": 11865, "total_steps": 15621, "loss": 0.2783, "lr": 3.322323173339818e-07, "epoch": 0.7595544459381601, "percentage": 75.96, "elapsed_time": "0:45:18", "remaining_time": "0:14:20", "throughput": 13740.03, "total_tokens": 37356800} +{"current_steps": 11870, "total_steps": 15621, "loss": 0.4264, "lr": 3.314009992204071e-07, "epoch": 0.759874527879137, "percentage": 75.99, "elapsed_time": "0:45:19", "remaining_time": "0:14:19", "throughput": 13742.75, "total_tokens": 37372800} +{"current_steps": 11875, "total_steps": 15621, "loss": 0.3269, "lr": 3.3057051586053443e-07, "epoch": 0.760194609820114, "percentage": 76.02, "elapsed_time": "0:45:20", "remaining_time": "0:14:18", "throughput": 13745.46, "total_tokens": 37388608} +{"current_steps": 11880, "total_steps": 15621, "loss": 0.4584, "lr": 3.297408682912329e-07, "epoch": 0.7605146917610909, "percentage": 76.05, "elapsed_time": "0:45:20", "remaining_time": "0:14:16", "throughput": 13748.34, "total_tokens": 37405184} +{"current_steps": 11885, "total_steps": 15621, "loss": 0.2741, "lr": 3.289120575483271e-07, "epoch": 0.7608347737020678, "percentage": 76.08, "elapsed_time": "0:45:21", "remaining_time": "0:14:15", "throughput": 13750.86, "total_tokens": 37420096} +{"current_steps": 11890, "total_steps": 15621, "loss": 0.4214, "lr": 3.280840846665969e-07, "epoch": 0.7611548556430446, "percentage": 76.12, "elapsed_time": "0:45:21", "remaining_time": "0:14:14", "throughput": 13753.21, "total_tokens": 37434368} +{"current_steps": 11895, "total_steps": 15621, "loss": 0.3005, "lr": 3.272569506797761e-07, "epoch": 0.7614749375840215, "percentage": 76.15, "elapsed_time": "0:45:22", "remaining_time": "0:14:12", "throughput": 13755.7, "total_tokens": 37449344} +{"current_steps": 11900, "total_steps": 15621, "loss": 0.3314, "lr": 3.2643065662055136e-07, "epoch": 0.7617950195249984, "percentage": 76.18, "elapsed_time": "0:45:23", "remaining_time": "0:14:11", "throughput": 13758.25, "total_tokens": 37464448} +{"current_steps": 11905, "total_steps": 15621, "loss": 0.2837, "lr": 3.2560520352056033e-07, "epoch": 0.7621151014659753, "percentage": 76.21, "elapsed_time": "0:45:23", "remaining_time": "0:14:10", "throughput": 13761.31, "total_tokens": 37481856} +{"current_steps": 11910, "total_steps": 15621, "loss": 0.3985, "lr": 3.24780592410391e-07, "epoch": 0.7624351834069522, "percentage": 76.24, "elapsed_time": "0:45:24", "remaining_time": "0:14:08", "throughput": 13764.03, "total_tokens": 37497856} +{"current_steps": 11915, "total_steps": 15621, "loss": 0.4494, "lr": 3.2395682431957994e-07, "epoch": 0.762755265347929, "percentage": 76.28, "elapsed_time": "0:45:24", "remaining_time": "0:14:07", "throughput": 13766.67, "total_tokens": 37513600} +{"current_steps": 11920, "total_steps": 15621, "loss": 0.324, "lr": 3.231339002766115e-07, "epoch": 0.7630753472889059, "percentage": 76.31, "elapsed_time": "0:45:25", "remaining_time": "0:14:06", "throughput": 13769.35, "total_tokens": 37529408} +{"current_steps": 11925, "total_steps": 15621, "loss": 0.3296, "lr": 3.2231182130891564e-07, "epoch": 0.7633954292298829, "percentage": 76.34, "elapsed_time": "0:45:26", "remaining_time": "0:14:04", "throughput": 13772.23, "total_tokens": 37545984} +{"current_steps": 11930, "total_steps": 15621, "loss": 0.3405, "lr": 3.214905884428679e-07, "epoch": 0.7637155111708598, "percentage": 76.37, "elapsed_time": "0:45:26", "remaining_time": "0:14:03", "throughput": 13774.94, "total_tokens": 37561856} +{"current_steps": 11935, "total_steps": 15621, "loss": 0.3253, "lr": 3.206702027037868e-07, "epoch": 0.7640355931118367, "percentage": 76.4, "elapsed_time": "0:45:27", "remaining_time": "0:14:02", "throughput": 13777.82, "total_tokens": 37578624} +{"current_steps": 11940, "total_steps": 15621, "loss": 0.3882, "lr": 3.198506651159344e-07, "epoch": 0.7643556750528135, "percentage": 76.44, "elapsed_time": "0:45:28", "remaining_time": "0:14:01", "throughput": 13780.39, "total_tokens": 37593920} +{"current_steps": 11945, "total_steps": 15621, "loss": 0.38, "lr": 3.190319767025121e-07, "epoch": 0.7646757569937904, "percentage": 76.47, "elapsed_time": "0:45:28", "remaining_time": "0:13:59", "throughput": 13783.04, "total_tokens": 37609664} +{"current_steps": 11950, "total_steps": 15621, "loss": 0.4989, "lr": 3.1821413848566213e-07, "epoch": 0.7649958389347673, "percentage": 76.5, "elapsed_time": "0:45:29", "remaining_time": "0:13:58", "throughput": 13785.81, "total_tokens": 37626048} +{"current_steps": 11955, "total_steps": 15621, "loss": 0.3798, "lr": 3.1739715148646564e-07, "epoch": 0.7653159208757442, "percentage": 76.53, "elapsed_time": "0:45:29", "remaining_time": "0:13:57", "throughput": 13788.48, "total_tokens": 37641792} +{"current_steps": 11960, "total_steps": 15621, "loss": 0.4583, "lr": 3.1658101672494043e-07, "epoch": 0.7656360028167211, "percentage": 76.56, "elapsed_time": "0:45:30", "remaining_time": "0:13:55", "throughput": 13790.91, "total_tokens": 37656512} +{"current_steps": 11965, "total_steps": 15621, "loss": 0.3527, "lr": 3.157657352200397e-07, "epoch": 0.7659560847576979, "percentage": 76.6, "elapsed_time": "0:45:31", "remaining_time": "0:13:54", "throughput": 13793.51, "total_tokens": 37672000} +{"current_steps": 11970, "total_steps": 15621, "loss": 0.3362, "lr": 3.149513079896521e-07, "epoch": 0.7662761666986748, "percentage": 76.63, "elapsed_time": "0:45:31", "remaining_time": "0:13:53", "throughput": 13796.1, "total_tokens": 37687232} +{"current_steps": 11975, "total_steps": 15621, "loss": 0.3244, "lr": 3.1413773605060034e-07, "epoch": 0.7665962486396517, "percentage": 76.66, "elapsed_time": "0:45:32", "remaining_time": "0:13:51", "throughput": 13798.7, "total_tokens": 37702656} +{"current_steps": 11980, "total_steps": 15621, "loss": 0.4343, "lr": 3.1332502041863783e-07, "epoch": 0.7669163305806287, "percentage": 76.69, "elapsed_time": "0:45:32", "remaining_time": "0:13:50", "throughput": 13801.29, "total_tokens": 37718080} +{"current_steps": 11985, "total_steps": 15621, "loss": 0.3141, "lr": 3.1251316210844946e-07, "epoch": 0.7672364125216056, "percentage": 76.72, "elapsed_time": "0:45:33", "remaining_time": "0:13:49", "throughput": 13804.34, "total_tokens": 37735680} +{"current_steps": 11990, "total_steps": 15621, "loss": 0.2871, "lr": 3.1170216213365055e-07, "epoch": 0.7675564944625825, "percentage": 76.76, "elapsed_time": "0:45:34", "remaining_time": "0:13:48", "throughput": 13806.69, "total_tokens": 37749952} +{"current_steps": 11995, "total_steps": 15621, "loss": 0.4607, "lr": 3.1089202150678397e-07, "epoch": 0.7678765764035593, "percentage": 76.79, "elapsed_time": "0:45:34", "remaining_time": "0:13:46", "throughput": 13809.23, "total_tokens": 37765312} +{"current_steps": 12000, "total_steps": 15621, "loss": 0.4695, "lr": 3.1008274123931886e-07, "epoch": 0.7681966583445362, "percentage": 76.82, "elapsed_time": "0:45:35", "remaining_time": "0:13:45", "throughput": 13811.68, "total_tokens": 37780160} +{"current_steps": 12005, "total_steps": 15621, "loss": 0.2672, "lr": 3.092743223416523e-07, "epoch": 0.7685167402855131, "percentage": 76.85, "elapsed_time": "0:45:36", "remaining_time": "0:13:44", "throughput": 13814.42, "total_tokens": 37796352} +{"current_steps": 12010, "total_steps": 15621, "loss": 0.3499, "lr": 3.0846676582310413e-07, "epoch": 0.76883682222649, "percentage": 76.88, "elapsed_time": "0:45:36", "remaining_time": "0:13:42", "throughput": 13817.28, "total_tokens": 37812864} +{"current_steps": 12015, "total_steps": 15621, "loss": 0.3824, "lr": 3.076600726919185e-07, "epoch": 0.7691569041674668, "percentage": 76.92, "elapsed_time": "0:45:37", "remaining_time": "0:13:41", "throughput": 13819.76, "total_tokens": 37827840} +{"current_steps": 12020, "total_steps": 15621, "loss": 0.3579, "lr": 3.0685424395526106e-07, "epoch": 0.7694769861084437, "percentage": 76.95, "elapsed_time": "0:45:37", "remaining_time": "0:13:40", "throughput": 13823.19, "total_tokens": 37847040} +{"current_steps": 12025, "total_steps": 15621, "loss": 0.2819, "lr": 3.060492806192184e-07, "epoch": 0.7697970680494206, "percentage": 76.98, "elapsed_time": "0:45:38", "remaining_time": "0:13:38", "throughput": 13825.79, "total_tokens": 37862464} +{"current_steps": 12030, "total_steps": 15621, "loss": 0.377, "lr": 3.052451836887968e-07, "epoch": 0.7701171499903975, "percentage": 77.01, "elapsed_time": "0:45:39", "remaining_time": "0:13:37", "throughput": 13828.36, "total_tokens": 37877760} +{"current_steps": 12035, "total_steps": 15621, "loss": 0.2861, "lr": 3.044419541679207e-07, "epoch": 0.7704372319313745, "percentage": 77.04, "elapsed_time": "0:45:39", "remaining_time": "0:13:36", "throughput": 13830.82, "total_tokens": 37892800} +{"current_steps": 12040, "total_steps": 15621, "loss": 0.4239, "lr": 3.0363959305943153e-07, "epoch": 0.7707573138723514, "percentage": 77.08, "elapsed_time": "0:45:40", "remaining_time": "0:13:35", "throughput": 13833.59, "total_tokens": 37909056} +{"current_steps": 12045, "total_steps": 15621, "loss": 0.348, "lr": 3.028381013650867e-07, "epoch": 0.7710773958133282, "percentage": 77.11, "elapsed_time": "0:45:40", "remaining_time": "0:13:33", "throughput": 13836.39, "total_tokens": 37925376} +{"current_steps": 12050, "total_steps": 15621, "loss": 0.3716, "lr": 3.0203748008555783e-07, "epoch": 0.7713974777543051, "percentage": 77.14, "elapsed_time": "0:45:41", "remaining_time": "0:13:32", "throughput": 13839.14, "total_tokens": 37941632} +{"current_steps": 12055, "total_steps": 15621, "loss": 0.3805, "lr": 3.012377302204301e-07, "epoch": 0.771717559695282, "percentage": 77.17, "elapsed_time": "0:45:42", "remaining_time": "0:13:31", "throughput": 13841.69, "total_tokens": 37957056} +{"current_steps": 12060, "total_steps": 15621, "loss": 0.3916, "lr": 3.0043885276820046e-07, "epoch": 0.7720376416362589, "percentage": 77.2, "elapsed_time": "0:45:42", "remaining_time": "0:13:29", "throughput": 13844.36, "total_tokens": 37973184} +{"current_steps": 12065, "total_steps": 15621, "loss": 0.3087, "lr": 2.99640848726277e-07, "epoch": 0.7723577235772358, "percentage": 77.24, "elapsed_time": "0:45:43", "remaining_time": "0:13:28", "throughput": 13846.83, "total_tokens": 37988288} +{"current_steps": 12070, "total_steps": 15621, "loss": 0.3812, "lr": 2.9884371909097704e-07, "epoch": 0.7726778055182126, "percentage": 77.27, "elapsed_time": "0:45:44", "remaining_time": "0:13:27", "throughput": 13849.46, "total_tokens": 38004224} +{"current_steps": 12075, "total_steps": 15621, "loss": 0.3711, "lr": 2.9804746485752616e-07, "epoch": 0.7729978874591895, "percentage": 77.3, "elapsed_time": "0:45:44", "remaining_time": "0:13:26", "throughput": 13851.99, "total_tokens": 38019456} +{"current_steps": 12080, "total_steps": 15621, "loss": 0.4058, "lr": 2.972520870200573e-07, "epoch": 0.7733179694001664, "percentage": 77.33, "elapsed_time": "0:45:45", "remaining_time": "0:13:24", "throughput": 13854.59, "total_tokens": 38035264} +{"current_steps": 12085, "total_steps": 15621, "loss": 0.4045, "lr": 2.9645758657160904e-07, "epoch": 0.7736380513411434, "percentage": 77.36, "elapsed_time": "0:45:45", "remaining_time": "0:13:23", "throughput": 13857.25, "total_tokens": 38051072} +{"current_steps": 12090, "total_steps": 15621, "loss": 0.3538, "lr": 2.9566396450412444e-07, "epoch": 0.7739581332821203, "percentage": 77.4, "elapsed_time": "0:45:46", "remaining_time": "0:13:22", "throughput": 13859.8, "total_tokens": 38066688} +{"current_steps": 12095, "total_steps": 15621, "loss": 0.3193, "lr": 2.9487122180844957e-07, "epoch": 0.7742782152230971, "percentage": 77.43, "elapsed_time": "0:45:47", "remaining_time": "0:13:20", "throughput": 13862.36, "total_tokens": 38082048} +{"current_steps": 12100, "total_steps": 15621, "loss": 0.2996, "lr": 2.9407935947433406e-07, "epoch": 0.774598297164074, "percentage": 77.46, "elapsed_time": "0:45:47", "remaining_time": "0:13:19", "throughput": 13864.88, "total_tokens": 38097344} +{"current_steps": 12105, "total_steps": 15621, "loss": 0.446, "lr": 2.932883784904264e-07, "epoch": 0.7749183791050509, "percentage": 77.49, "elapsed_time": "0:45:48", "remaining_time": "0:13:18", "throughput": 13867.32, "total_tokens": 38112320} +{"current_steps": 12110, "total_steps": 15621, "loss": 0.2475, "lr": 2.9249827984427555e-07, "epoch": 0.7752384610460278, "percentage": 77.52, "elapsed_time": "0:45:48", "remaining_time": "0:13:16", "throughput": 13869.94, "total_tokens": 38128000} +{"current_steps": 12115, "total_steps": 15621, "loss": 0.3015, "lr": 2.917090645223297e-07, "epoch": 0.7755585429870047, "percentage": 77.56, "elapsed_time": "0:45:49", "remaining_time": "0:13:15", "throughput": 13872.42, "total_tokens": 38143168} +{"current_steps": 12120, "total_steps": 15621, "loss": 0.2912, "lr": 2.909207335099332e-07, "epoch": 0.7758786249279815, "percentage": 77.59, "elapsed_time": "0:45:50", "remaining_time": "0:13:14", "throughput": 13874.81, "total_tokens": 38157824} +{"current_steps": 12125, "total_steps": 15621, "loss": 0.3332, "lr": 2.9013328779132595e-07, "epoch": 0.7761987068689584, "percentage": 77.62, "elapsed_time": "0:45:50", "remaining_time": "0:13:13", "throughput": 13877.29, "total_tokens": 38172864} +{"current_steps": 12130, "total_steps": 15621, "loss": 0.425, "lr": 2.893467283496439e-07, "epoch": 0.7765187888099353, "percentage": 77.65, "elapsed_time": "0:45:51", "remaining_time": "0:13:11", "throughput": 13879.63, "total_tokens": 38187264} +{"current_steps": 12135, "total_steps": 15621, "loss": 0.3551, "lr": 2.885610561669155e-07, "epoch": 0.7768388707509122, "percentage": 77.68, "elapsed_time": "0:45:51", "remaining_time": "0:13:10", "throughput": 13882.57, "total_tokens": 38204288} +{"current_steps": 12140, "total_steps": 15621, "loss": 0.3462, "lr": 2.8777627222406163e-07, "epoch": 0.7771589526918892, "percentage": 77.72, "elapsed_time": "0:45:52", "remaining_time": "0:13:09", "throughput": 13885.01, "total_tokens": 38219264} +{"current_steps": 12145, "total_steps": 15621, "loss": 0.3863, "lr": 2.869923775008943e-07, "epoch": 0.777479034632866, "percentage": 77.75, "elapsed_time": "0:45:53", "remaining_time": "0:13:07", "throughput": 13887.48, "total_tokens": 38234496} +{"current_steps": 12150, "total_steps": 15621, "loss": 0.2559, "lr": 2.862093729761155e-07, "epoch": 0.7777991165738429, "percentage": 77.78, "elapsed_time": "0:45:53", "remaining_time": "0:13:06", "throughput": 13890.29, "total_tokens": 38251072} +{"current_steps": 12155, "total_steps": 15621, "loss": 0.4049, "lr": 2.854272596273152e-07, "epoch": 0.7781191985148198, "percentage": 77.81, "elapsed_time": "0:45:54", "remaining_time": "0:13:05", "throughput": 13892.84, "total_tokens": 38266560} +{"current_steps": 12160, "total_steps": 15621, "loss": 0.3287, "lr": 2.8464603843097134e-07, "epoch": 0.7784392804557967, "percentage": 77.84, "elapsed_time": "0:45:55", "remaining_time": "0:13:04", "throughput": 13895.62, "total_tokens": 38282944} +{"current_steps": 12165, "total_steps": 15621, "loss": 0.3291, "lr": 2.8386571036244764e-07, "epoch": 0.7787593623967736, "percentage": 77.88, "elapsed_time": "0:45:55", "remaining_time": "0:13:02", "throughput": 13898.39, "total_tokens": 38299264} +{"current_steps": 12170, "total_steps": 15621, "loss": 0.39, "lr": 2.830862763959929e-07, "epoch": 0.7790794443377504, "percentage": 77.91, "elapsed_time": "0:45:56", "remaining_time": "0:13:01", "throughput": 13900.88, "total_tokens": 38314368} +{"current_steps": 12175, "total_steps": 15621, "loss": 0.3154, "lr": 2.8230773750473956e-07, "epoch": 0.7793995262787273, "percentage": 77.94, "elapsed_time": "0:45:56", "remaining_time": "0:13:00", "throughput": 13903.4, "total_tokens": 38329664} +{"current_steps": 12180, "total_steps": 15621, "loss": 0.3072, "lr": 2.8153009466070267e-07, "epoch": 0.7797196082197042, "percentage": 77.97, "elapsed_time": "0:45:57", "remaining_time": "0:12:59", "throughput": 13905.99, "total_tokens": 38345408} +{"current_steps": 12185, "total_steps": 15621, "loss": 0.2878, "lr": 2.807533488347783e-07, "epoch": 0.7800396901606811, "percentage": 78.0, "elapsed_time": "0:45:58", "remaining_time": "0:12:57", "throughput": 13908.95, "total_tokens": 38362688} +{"current_steps": 12190, "total_steps": 15621, "loss": 0.2548, "lr": 2.7997750099674277e-07, "epoch": 0.7803597721016581, "percentage": 78.04, "elapsed_time": "0:45:58", "remaining_time": "0:12:56", "throughput": 13911.39, "total_tokens": 38377600} +{"current_steps": 12195, "total_steps": 15621, "loss": 0.5286, "lr": 2.792025521152512e-07, "epoch": 0.780679854042635, "percentage": 78.07, "elapsed_time": "0:45:59", "remaining_time": "0:12:55", "throughput": 13913.84, "total_tokens": 38392640} +{"current_steps": 12200, "total_steps": 15621, "loss": 0.4496, "lr": 2.784285031578365e-07, "epoch": 0.7809999359836118, "percentage": 78.1, "elapsed_time": "0:45:59", "remaining_time": "0:12:53", "throughput": 13916.42, "total_tokens": 38408448} +{"current_steps": 12205, "total_steps": 15621, "loss": 0.3629, "lr": 2.7765535509090786e-07, "epoch": 0.7813200179245887, "percentage": 78.13, "elapsed_time": "0:46:00", "remaining_time": "0:12:52", "throughput": 13919.08, "total_tokens": 38424512} +{"current_steps": 12210, "total_steps": 15621, "loss": 0.4739, "lr": 2.768831088797495e-07, "epoch": 0.7816400998655656, "percentage": 78.16, "elapsed_time": "0:46:01", "remaining_time": "0:12:51", "throughput": 13921.47, "total_tokens": 38439296} +{"current_steps": 12215, "total_steps": 15621, "loss": 0.2482, "lr": 2.761117654885201e-07, "epoch": 0.7819601818065425, "percentage": 78.2, "elapsed_time": "0:46:01", "remaining_time": "0:12:50", "throughput": 13924.16, "total_tokens": 38455424} +{"current_steps": 12220, "total_steps": 15621, "loss": 0.3265, "lr": 2.7534132588025063e-07, "epoch": 0.7822802637475194, "percentage": 78.23, "elapsed_time": "0:46:02", "remaining_time": "0:12:48", "throughput": 13926.7, "total_tokens": 38470976} +{"current_steps": 12225, "total_steps": 15621, "loss": 0.5075, "lr": 2.7457179101684483e-07, "epoch": 0.7826003456884962, "percentage": 78.26, "elapsed_time": "0:46:02", "remaining_time": "0:12:47", "throughput": 13929.16, "total_tokens": 38486016} +{"current_steps": 12230, "total_steps": 15621, "loss": 0.298, "lr": 2.7380316185907506e-07, "epoch": 0.7829204276294731, "percentage": 78.29, "elapsed_time": "0:46:03", "remaining_time": "0:12:46", "throughput": 13931.65, "total_tokens": 38501248} +{"current_steps": 12235, "total_steps": 15621, "loss": 0.3503, "lr": 2.730354393665839e-07, "epoch": 0.78324050957045, "percentage": 78.32, "elapsed_time": "0:46:04", "remaining_time": "0:12:44", "throughput": 13934.23, "total_tokens": 38516992} +{"current_steps": 12240, "total_steps": 15621, "loss": 0.3702, "lr": 2.7226862449788245e-07, "epoch": 0.7835605915114269, "percentage": 78.36, "elapsed_time": "0:46:04", "remaining_time": "0:12:43", "throughput": 13936.57, "total_tokens": 38531456} +{"current_steps": 12245, "total_steps": 15621, "loss": 0.3264, "lr": 2.715027182103482e-07, "epoch": 0.7838806734524039, "percentage": 78.39, "elapsed_time": "0:46:05", "remaining_time": "0:12:42", "throughput": 13939.12, "total_tokens": 38546880} +{"current_steps": 12250, "total_steps": 15621, "loss": 0.3039, "lr": 2.707377214602232e-07, "epoch": 0.7842007553933807, "percentage": 78.42, "elapsed_time": "0:46:05", "remaining_time": "0:12:41", "throughput": 13941.62, "total_tokens": 38562176} +{"current_steps": 12255, "total_steps": 15621, "loss": 0.4366, "lr": 2.699736352026157e-07, "epoch": 0.7845208373343576, "percentage": 78.45, "elapsed_time": "0:46:06", "remaining_time": "0:12:39", "throughput": 13944.14, "total_tokens": 38577472} +{"current_steps": 12260, "total_steps": 15621, "loss": 0.3297, "lr": 2.6921046039149645e-07, "epoch": 0.7848409192753345, "percentage": 78.48, "elapsed_time": "0:46:07", "remaining_time": "0:12:38", "throughput": 13946.69, "total_tokens": 38593088} +{"current_steps": 12265, "total_steps": 15621, "loss": 0.3408, "lr": 2.6844819797969744e-07, "epoch": 0.7851610012163114, "percentage": 78.52, "elapsed_time": "0:46:07", "remaining_time": "0:12:37", "throughput": 13949.1, "total_tokens": 38607936} +{"current_steps": 12270, "total_steps": 15621, "loss": 0.2481, "lr": 2.6768684891891236e-07, "epoch": 0.7854810831572883, "percentage": 78.55, "elapsed_time": "0:46:08", "remaining_time": "0:12:36", "throughput": 13951.97, "total_tokens": 38625024} +{"current_steps": 12275, "total_steps": 15621, "loss": 0.3321, "lr": 2.6692641415969497e-07, "epoch": 0.7858011650982651, "percentage": 78.58, "elapsed_time": "0:46:09", "remaining_time": "0:12:34", "throughput": 13954.79, "total_tokens": 38641792} +{"current_steps": 12280, "total_steps": 15621, "loss": 0.395, "lr": 2.66166894651457e-07, "epoch": 0.786121247039242, "percentage": 78.61, "elapsed_time": "0:46:09", "remaining_time": "0:12:33", "throughput": 13957.27, "total_tokens": 38656896} +{"current_steps": 12285, "total_steps": 15621, "loss": 0.3426, "lr": 2.654082913424668e-07, "epoch": 0.7864413289802189, "percentage": 78.64, "elapsed_time": "0:46:10", "remaining_time": "0:12:32", "throughput": 13959.87, "total_tokens": 38672448} +{"current_steps": 12290, "total_steps": 15621, "loss": 0.3016, "lr": 2.6465060517985003e-07, "epoch": 0.7867614109211958, "percentage": 78.68, "elapsed_time": "0:46:10", "remaining_time": "0:12:31", "throughput": 13962.55, "total_tokens": 38688576} +{"current_steps": 12295, "total_steps": 15621, "loss": 0.5123, "lr": 2.638938371095867e-07, "epoch": 0.7870814928621728, "percentage": 78.71, "elapsed_time": "0:46:11", "remaining_time": "0:12:29", "throughput": 13965.09, "total_tokens": 38704064} +{"current_steps": 12300, "total_steps": 15621, "loss": 0.381, "lr": 2.6313798807651065e-07, "epoch": 0.7874015748031497, "percentage": 78.74, "elapsed_time": "0:46:12", "remaining_time": "0:12:28", "throughput": 13967.5, "total_tokens": 38718976} +{"current_steps": 12305, "total_steps": 15621, "loss": 0.3529, "lr": 2.6238305902430813e-07, "epoch": 0.7877216567441265, "percentage": 78.77, "elapsed_time": "0:46:12", "remaining_time": "0:12:27", "throughput": 13969.98, "total_tokens": 38734272} +{"current_steps": 12310, "total_steps": 15621, "loss": 0.307, "lr": 2.61629050895517e-07, "epoch": 0.7880417386851034, "percentage": 78.8, "elapsed_time": "0:46:13", "remaining_time": "0:12:25", "throughput": 13972.45, "total_tokens": 38749504} +{"current_steps": 12315, "total_steps": 15621, "loss": 0.3171, "lr": 2.608759646315253e-07, "epoch": 0.7883618206260803, "percentage": 78.84, "elapsed_time": "0:46:13", "remaining_time": "0:12:24", "throughput": 13974.85, "total_tokens": 38764352} +{"current_steps": 12320, "total_steps": 15621, "loss": 0.3637, "lr": 2.6012380117257005e-07, "epoch": 0.7886819025670572, "percentage": 78.87, "elapsed_time": "0:46:14", "remaining_time": "0:12:23", "throughput": 13977.46, "total_tokens": 38780096} +{"current_steps": 12325, "total_steps": 15621, "loss": 0.3902, "lr": 2.5937256145773613e-07, "epoch": 0.789001984508034, "percentage": 78.9, "elapsed_time": "0:46:15", "remaining_time": "0:12:22", "throughput": 13980.02, "total_tokens": 38795712} +{"current_steps": 12330, "total_steps": 15621, "loss": 0.3264, "lr": 2.586222464249551e-07, "epoch": 0.7893220664490109, "percentage": 78.93, "elapsed_time": "0:46:15", "remaining_time": "0:12:20", "throughput": 13982.58, "total_tokens": 38811328} +{"current_steps": 12335, "total_steps": 15621, "loss": 0.2022, "lr": 2.5787285701100413e-07, "epoch": 0.7896421483899878, "percentage": 78.96, "elapsed_time": "0:46:16", "remaining_time": "0:12:19", "throughput": 13985.01, "total_tokens": 38826240} +{"current_steps": 12340, "total_steps": 15621, "loss": 0.3672, "lr": 2.571243941515048e-07, "epoch": 0.7899622303309647, "percentage": 79.0, "elapsed_time": "0:46:16", "remaining_time": "0:12:18", "throughput": 13987.71, "total_tokens": 38842624} +{"current_steps": 12345, "total_steps": 15621, "loss": 0.2672, "lr": 2.563768587809213e-07, "epoch": 0.7902823122719416, "percentage": 79.03, "elapsed_time": "0:46:17", "remaining_time": "0:12:17", "throughput": 13990.09, "total_tokens": 38857472} +{"current_steps": 12350, "total_steps": 15621, "loss": 0.4118, "lr": 2.5563025183256137e-07, "epoch": 0.7906023942129186, "percentage": 79.06, "elapsed_time": "0:46:18", "remaining_time": "0:12:15", "throughput": 13992.47, "total_tokens": 38872256} +{"current_steps": 12355, "total_steps": 15621, "loss": 0.552, "lr": 2.548845742385717e-07, "epoch": 0.7909224761538954, "percentage": 79.09, "elapsed_time": "0:46:18", "remaining_time": "0:12:14", "throughput": 13995.51, "total_tokens": 38890048} +{"current_steps": 12360, "total_steps": 15621, "loss": 0.2356, "lr": 2.541398269299393e-07, "epoch": 0.7912425580948723, "percentage": 79.12, "elapsed_time": "0:46:19", "remaining_time": "0:12:13", "throughput": 13998.08, "total_tokens": 38905664} +{"current_steps": 12365, "total_steps": 15621, "loss": 0.2978, "lr": 2.5339601083649063e-07, "epoch": 0.7915626400358492, "percentage": 79.16, "elapsed_time": "0:46:20", "remaining_time": "0:12:12", "throughput": 14001.63, "total_tokens": 38926144} +{"current_steps": 12370, "total_steps": 15621, "loss": 0.4751, "lr": 2.526531268868889e-07, "epoch": 0.7918827219768261, "percentage": 79.19, "elapsed_time": "0:46:20", "remaining_time": "0:12:10", "throughput": 14004.35, "total_tokens": 38942720} +{"current_steps": 12375, "total_steps": 15621, "loss": 0.3397, "lr": 2.5191117600863266e-07, "epoch": 0.792202803917803, "percentage": 79.22, "elapsed_time": "0:46:21", "remaining_time": "0:12:09", "throughput": 14006.85, "total_tokens": 38958144} +{"current_steps": 12380, "total_steps": 15621, "loss": 0.2568, "lr": 2.511701591280565e-07, "epoch": 0.7925228858587798, "percentage": 79.25, "elapsed_time": "0:46:21", "remaining_time": "0:12:08", "throughput": 14009.35, "total_tokens": 38973376} +{"current_steps": 12385, "total_steps": 15621, "loss": 0.346, "lr": 2.504300771703295e-07, "epoch": 0.7928429677997567, "percentage": 79.28, "elapsed_time": "0:46:22", "remaining_time": "0:12:07", "throughput": 14012.02, "total_tokens": 38989504} +{"current_steps": 12390, "total_steps": 15621, "loss": 0.3626, "lr": 2.496909310594517e-07, "epoch": 0.7931630497407336, "percentage": 79.32, "elapsed_time": "0:46:23", "remaining_time": "0:12:05", "throughput": 14014.58, "total_tokens": 39005056} +{"current_steps": 12395, "total_steps": 15621, "loss": 0.4459, "lr": 2.4895272171825587e-07, "epoch": 0.7934831316817105, "percentage": 79.35, "elapsed_time": "0:46:23", "remaining_time": "0:12:04", "throughput": 14017.12, "total_tokens": 39020608} +{"current_steps": 12400, "total_steps": 15621, "loss": 0.443, "lr": 2.482154500684055e-07, "epoch": 0.7938032136226874, "percentage": 79.38, "elapsed_time": "0:46:24", "remaining_time": "0:12:03", "throughput": 14019.57, "total_tokens": 39035712} +{"current_steps": 12405, "total_steps": 15621, "loss": 0.3361, "lr": 2.4747911703039293e-07, "epoch": 0.7941232955636643, "percentage": 79.41, "elapsed_time": "0:46:24", "remaining_time": "0:12:02", "throughput": 14022.01, "total_tokens": 39050880} +{"current_steps": 12410, "total_steps": 15621, "loss": 0.3689, "lr": 2.467437235235378e-07, "epoch": 0.7944433775046412, "percentage": 79.44, "elapsed_time": "0:46:25", "remaining_time": "0:12:00", "throughput": 14024.45, "total_tokens": 39065792} +{"current_steps": 12415, "total_steps": 15621, "loss": 0.3418, "lr": 2.460092704659883e-07, "epoch": 0.7947634594456181, "percentage": 79.48, "elapsed_time": "0:46:26", "remaining_time": "0:11:59", "throughput": 14026.93, "total_tokens": 39080960} +{"current_steps": 12420, "total_steps": 15621, "loss": 0.2604, "lr": 2.452757587747174e-07, "epoch": 0.795083541386595, "percentage": 79.51, "elapsed_time": "0:46:26", "remaining_time": "0:11:58", "throughput": 14029.59, "total_tokens": 39097216} +{"current_steps": 12425, "total_steps": 15621, "loss": 0.1771, "lr": 2.445431893655232e-07, "epoch": 0.7954036233275719, "percentage": 79.54, "elapsed_time": "0:46:27", "remaining_time": "0:11:56", "throughput": 14032.21, "total_tokens": 39113152} +{"current_steps": 12430, "total_steps": 15621, "loss": 0.3722, "lr": 2.438115631530271e-07, "epoch": 0.7957237052685487, "percentage": 79.57, "elapsed_time": "0:46:28", "remaining_time": "0:11:55", "throughput": 14035.05, "total_tokens": 39130176} +{"current_steps": 12435, "total_steps": 15621, "loss": 0.2283, "lr": 2.4308088105067305e-07, "epoch": 0.7960437872095256, "percentage": 79.6, "elapsed_time": "0:46:28", "remaining_time": "0:11:54", "throughput": 14037.58, "total_tokens": 39145792} +{"current_steps": 12440, "total_steps": 15621, "loss": 0.4201, "lr": 2.423511439707262e-07, "epoch": 0.7963638691505025, "percentage": 79.64, "elapsed_time": "0:46:29", "remaining_time": "0:11:53", "throughput": 14040.09, "total_tokens": 39161280} +{"current_steps": 12445, "total_steps": 15621, "loss": 0.2784, "lr": 2.4162235282427177e-07, "epoch": 0.7966839510914794, "percentage": 79.67, "elapsed_time": "0:46:29", "remaining_time": "0:11:51", "throughput": 14042.54, "total_tokens": 39176512} +{"current_steps": 12450, "total_steps": 15621, "loss": 0.3621, "lr": 2.408945085212144e-07, "epoch": 0.7970040330324563, "percentage": 79.7, "elapsed_time": "0:46:30", "remaining_time": "0:11:50", "throughput": 14045.0, "total_tokens": 39191808} +{"current_steps": 12455, "total_steps": 15621, "loss": 0.2479, "lr": 2.401676119702759e-07, "epoch": 0.7973241149734333, "percentage": 79.73, "elapsed_time": "0:46:31", "remaining_time": "0:11:49", "throughput": 14047.78, "total_tokens": 39208640} +{"current_steps": 12460, "total_steps": 15621, "loss": 0.3438, "lr": 2.394416640789952e-07, "epoch": 0.7976441969144101, "percentage": 79.76, "elapsed_time": "0:46:31", "remaining_time": "0:11:48", "throughput": 14050.1, "total_tokens": 39223232} +{"current_steps": 12465, "total_steps": 15621, "loss": 0.3098, "lr": 2.3871666575372696e-07, "epoch": 0.797964278855387, "percentage": 79.8, "elapsed_time": "0:46:32", "remaining_time": "0:11:46", "throughput": 14052.57, "total_tokens": 39238656} +{"current_steps": 12470, "total_steps": 15621, "loss": 0.532, "lr": 2.3799261789963964e-07, "epoch": 0.7982843607963639, "percentage": 79.83, "elapsed_time": "0:46:32", "remaining_time": "0:11:45", "throughput": 14055.45, "total_tokens": 39255872} +{"current_steps": 12475, "total_steps": 15621, "loss": 0.2708, "lr": 2.3726952142071644e-07, "epoch": 0.7986044427373408, "percentage": 79.86, "elapsed_time": "0:46:33", "remaining_time": "0:11:44", "throughput": 14057.79, "total_tokens": 39270784} +{"current_steps": 12480, "total_steps": 15621, "loss": 0.3462, "lr": 2.365473772197508e-07, "epoch": 0.7989245246783176, "percentage": 79.89, "elapsed_time": "0:46:34", "remaining_time": "0:11:43", "throughput": 14060.3, "total_tokens": 39286080} +{"current_steps": 12485, "total_steps": 15621, "loss": 0.356, "lr": 2.3582618619834883e-07, "epoch": 0.7992446066192945, "percentage": 79.92, "elapsed_time": "0:46:34", "remaining_time": "0:11:41", "throughput": 14062.77, "total_tokens": 39301312} +{"current_steps": 12490, "total_steps": 15621, "loss": 0.2216, "lr": 2.3510594925692528e-07, "epoch": 0.7995646885602714, "percentage": 79.96, "elapsed_time": "0:46:35", "remaining_time": "0:11:40", "throughput": 14065.25, "total_tokens": 39316736} +{"current_steps": 12495, "total_steps": 15621, "loss": 0.3493, "lr": 2.343866672947057e-07, "epoch": 0.7998847705012483, "percentage": 79.99, "elapsed_time": "0:46:35", "remaining_time": "0:11:39", "throughput": 14067.56, "total_tokens": 39331264} +{"current_steps": 12500, "total_steps": 15621, "loss": 0.2587, "lr": 2.336683412097209e-07, "epoch": 0.8002048524422252, "percentage": 80.02, "elapsed_time": "0:46:36", "remaining_time": "0:11:38", "throughput": 14069.89, "total_tokens": 39345856} +{"current_steps": 12505, "total_steps": 15621, "loss": 0.3645, "lr": 2.329509718988095e-07, "epoch": 0.800524934383202, "percentage": 80.05, "elapsed_time": "0:46:37", "remaining_time": "0:11:36", "throughput": 14072.41, "total_tokens": 39361280} +{"current_steps": 12510, "total_steps": 15621, "loss": 0.3367, "lr": 2.3223456025761645e-07, "epoch": 0.800845016324179, "percentage": 80.08, "elapsed_time": "0:46:37", "remaining_time": "0:11:35", "throughput": 14074.71, "total_tokens": 39375872} +{"current_steps": 12512, "total_steps": 15621, "eval_loss": 0.36358681321144104, "epoch": 0.8009730491005698, "percentage": 80.1, "elapsed_time": "0:47:27", "remaining_time": "0:11:47", "throughput": 13832.76, "total_tokens": 39382144} +{"current_steps": 12515, "total_steps": 15621, "loss": 0.2866, "lr": 2.315191071805892e-07, "epoch": 0.8011650982651559, "percentage": 80.12, "elapsed_time": "0:47:58", "remaining_time": "0:11:54", "throughput": 13685.71, "total_tokens": 39392320} +{"current_steps": 12520, "total_steps": 15621, "loss": 0.3619, "lr": 2.3080461356097937e-07, "epoch": 0.8014851802061328, "percentage": 80.15, "elapsed_time": "0:47:58", "remaining_time": "0:11:53", "throughput": 13688.19, "total_tokens": 39407680} +{"current_steps": 12525, "total_steps": 15621, "loss": 0.288, "lr": 2.30091080290841e-07, "epoch": 0.8018052621471097, "percentage": 80.18, "elapsed_time": "0:47:59", "remaining_time": "0:11:51", "throughput": 13690.95, "total_tokens": 39424512} +{"current_steps": 12530, "total_steps": 15621, "loss": 0.3463, "lr": 2.29378508261029e-07, "epoch": 0.8021253440880866, "percentage": 80.21, "elapsed_time": "0:48:00", "remaining_time": "0:11:50", "throughput": 13693.26, "total_tokens": 39439296} +{"current_steps": 12535, "total_steps": 15621, "loss": 0.3707, "lr": 2.2866689836119702e-07, "epoch": 0.8024454260290634, "percentage": 80.24, "elapsed_time": "0:48:00", "remaining_time": "0:11:49", "throughput": 13696.13, "total_tokens": 39456576} +{"current_steps": 12540, "total_steps": 15621, "loss": 0.3536, "lr": 2.2795625147979913e-07, "epoch": 0.8027655079700403, "percentage": 80.28, "elapsed_time": "0:48:01", "remaining_time": "0:11:47", "throughput": 13698.76, "total_tokens": 39472512} +{"current_steps": 12545, "total_steps": 15621, "loss": 0.2332, "lr": 2.2724656850408597e-07, "epoch": 0.8030855899110172, "percentage": 80.31, "elapsed_time": "0:48:02", "remaining_time": "0:11:46", "throughput": 13701.29, "total_tokens": 39488192} +{"current_steps": 12550, "total_steps": 15621, "loss": 0.3855, "lr": 2.2653785032010532e-07, "epoch": 0.8034056718519941, "percentage": 80.34, "elapsed_time": "0:48:02", "remaining_time": "0:11:45", "throughput": 13703.71, "total_tokens": 39503552} +{"current_steps": 12555, "total_steps": 15621, "loss": 0.3363, "lr": 2.258300978126999e-07, "epoch": 0.803725753792971, "percentage": 80.37, "elapsed_time": "0:48:03", "remaining_time": "0:11:44", "throughput": 13706.35, "total_tokens": 39519744} +{"current_steps": 12560, "total_steps": 15621, "loss": 0.4753, "lr": 2.2512331186550715e-07, "epoch": 0.804045835733948, "percentage": 80.4, "elapsed_time": "0:48:03", "remaining_time": "0:11:42", "throughput": 13708.82, "total_tokens": 39535232} +{"current_steps": 12565, "total_steps": 15621, "loss": 0.3878, "lr": 2.244174933609575e-07, "epoch": 0.8043659176749248, "percentage": 80.44, "elapsed_time": "0:48:04", "remaining_time": "0:11:41", "throughput": 13711.08, "total_tokens": 39549568} +{"current_steps": 12570, "total_steps": 15621, "loss": 0.2764, "lr": 2.2371264318027383e-07, "epoch": 0.8046859996159017, "percentage": 80.47, "elapsed_time": "0:48:05", "remaining_time": "0:11:40", "throughput": 13712.61, "total_tokens": 39566016} +{"current_steps": 12575, "total_steps": 15621, "loss": 0.2308, "lr": 2.2300876220346975e-07, "epoch": 0.8050060815568786, "percentage": 80.5, "elapsed_time": "0:48:05", "remaining_time": "0:11:39", "throughput": 13715.15, "total_tokens": 39581760} +{"current_steps": 12580, "total_steps": 15621, "loss": 0.2785, "lr": 2.2230585130934897e-07, "epoch": 0.8053261634978555, "percentage": 80.53, "elapsed_time": "0:48:06", "remaining_time": "0:11:37", "throughput": 13717.76, "total_tokens": 39597888} +{"current_steps": 12585, "total_steps": 15621, "loss": 0.4454, "lr": 2.2160391137550394e-07, "epoch": 0.8056462454388323, "percentage": 80.56, "elapsed_time": "0:48:07", "remaining_time": "0:11:36", "throughput": 13720.28, "total_tokens": 39613568} +{"current_steps": 12590, "total_steps": 15621, "loss": 0.4314, "lr": 2.2090294327831494e-07, "epoch": 0.8059663273798092, "percentage": 80.6, "elapsed_time": "0:48:07", "remaining_time": "0:11:35", "throughput": 13722.59, "total_tokens": 39628096} +{"current_steps": 12595, "total_steps": 15621, "loss": 0.2695, "lr": 2.202029478929488e-07, "epoch": 0.8062864093207861, "percentage": 80.63, "elapsed_time": "0:48:08", "remaining_time": "0:11:33", "throughput": 13724.83, "total_tokens": 39642560} +{"current_steps": 12600, "total_steps": 15621, "loss": 0.2967, "lr": 2.195039260933581e-07, "epoch": 0.806606491261763, "percentage": 80.66, "elapsed_time": "0:48:08", "remaining_time": "0:11:32", "throughput": 13727.34, "total_tokens": 39658112} +{"current_steps": 12605, "total_steps": 15621, "loss": 0.2657, "lr": 2.1880587875227973e-07, "epoch": 0.8069265732027399, "percentage": 80.69, "elapsed_time": "0:48:09", "remaining_time": "0:11:31", "throughput": 13729.95, "total_tokens": 39674112} +{"current_steps": 12610, "total_steps": 15621, "loss": 0.3313, "lr": 2.18108806741234e-07, "epoch": 0.8072466551437167, "percentage": 80.72, "elapsed_time": "0:48:10", "remaining_time": "0:11:30", "throughput": 13732.63, "total_tokens": 39690432} +{"current_steps": 12615, "total_steps": 15621, "loss": 0.3512, "lr": 2.1741271093052315e-07, "epoch": 0.8075667370846937, "percentage": 80.76, "elapsed_time": "0:48:10", "remaining_time": "0:11:28", "throughput": 13735.07, "total_tokens": 39705792} +{"current_steps": 12620, "total_steps": 15621, "loss": 0.4692, "lr": 2.167175921892318e-07, "epoch": 0.8078868190256706, "percentage": 80.79, "elapsed_time": "0:48:11", "remaining_time": "0:11:27", "throughput": 13737.68, "total_tokens": 39722048} +{"current_steps": 12625, "total_steps": 15621, "loss": 0.4239, "lr": 2.1602345138522314e-07, "epoch": 0.8082069009666475, "percentage": 80.82, "elapsed_time": "0:48:12", "remaining_time": "0:11:26", "throughput": 13740.31, "total_tokens": 39738304} +{"current_steps": 12630, "total_steps": 15621, "loss": 0.3468, "lr": 2.1533028938514008e-07, "epoch": 0.8085269829076244, "percentage": 80.85, "elapsed_time": "0:48:12", "remaining_time": "0:11:25", "throughput": 13742.78, "total_tokens": 39753728} +{"current_steps": 12635, "total_steps": 15621, "loss": 0.3435, "lr": 2.1463810705440433e-07, "epoch": 0.8088470648486012, "percentage": 80.88, "elapsed_time": "0:48:13", "remaining_time": "0:11:23", "throughput": 13745.31, "total_tokens": 39769600} +{"current_steps": 12640, "total_steps": 15621, "loss": 0.3519, "lr": 2.139469052572127e-07, "epoch": 0.8091671467895781, "percentage": 80.92, "elapsed_time": "0:48:13", "remaining_time": "0:11:22", "throughput": 13747.56, "total_tokens": 39784000} +{"current_steps": 12645, "total_steps": 15621, "loss": 0.344, "lr": 2.1325668485653891e-07, "epoch": 0.809487228730555, "percentage": 80.95, "elapsed_time": "0:48:14", "remaining_time": "0:11:21", "throughput": 13750.19, "total_tokens": 39800320} +{"current_steps": 12650, "total_steps": 15621, "loss": 0.457, "lr": 2.1256744671413173e-07, "epoch": 0.8098073106715319, "percentage": 80.98, "elapsed_time": "0:48:15", "remaining_time": "0:11:19", "throughput": 13752.59, "total_tokens": 39815360} +{"current_steps": 12655, "total_steps": 15621, "loss": 0.3821, "lr": 2.1187919169051316e-07, "epoch": 0.8101273926125088, "percentage": 81.01, "elapsed_time": "0:48:15", "remaining_time": "0:11:18", "throughput": 13754.85, "total_tokens": 39829952} +{"current_steps": 12660, "total_steps": 15621, "loss": 0.3528, "lr": 2.111919206449767e-07, "epoch": 0.8104474745534856, "percentage": 81.04, "elapsed_time": "0:48:16", "remaining_time": "0:11:17", "throughput": 13757.31, "total_tokens": 39845376} +{"current_steps": 12665, "total_steps": 15621, "loss": 0.4858, "lr": 2.1050563443558922e-07, "epoch": 0.8107675564944626, "percentage": 81.08, "elapsed_time": "0:48:16", "remaining_time": "0:11:16", "throughput": 13759.95, "total_tokens": 39861696} +{"current_steps": 12670, "total_steps": 15621, "loss": 0.297, "lr": 2.0982033391918697e-07, "epoch": 0.8110876384354395, "percentage": 81.11, "elapsed_time": "0:48:17", "remaining_time": "0:11:14", "throughput": 13762.47, "total_tokens": 39877440} +{"current_steps": 12675, "total_steps": 15621, "loss": 0.334, "lr": 2.0913601995137543e-07, "epoch": 0.8114077203764164, "percentage": 81.14, "elapsed_time": "0:48:18", "remaining_time": "0:11:13", "throughput": 13765.08, "total_tokens": 39893760} +{"current_steps": 12680, "total_steps": 15621, "loss": 0.2943, "lr": 2.084526933865287e-07, "epoch": 0.8117278023173933, "percentage": 81.17, "elapsed_time": "0:48:18", "remaining_time": "0:11:12", "throughput": 13767.62, "total_tokens": 39909568} +{"current_steps": 12685, "total_steps": 15621, "loss": 0.4543, "lr": 2.0777035507778817e-07, "epoch": 0.8120478842583702, "percentage": 81.2, "elapsed_time": "0:48:19", "remaining_time": "0:11:11", "throughput": 13769.79, "total_tokens": 39923648} +{"current_steps": 12690, "total_steps": 15621, "loss": 0.4299, "lr": 2.0708900587706135e-07, "epoch": 0.812367966199347, "percentage": 81.24, "elapsed_time": "0:48:19", "remaining_time": "0:11:09", "throughput": 13772.17, "total_tokens": 39939008} +{"current_steps": 12695, "total_steps": 15621, "loss": 0.3374, "lr": 2.0640864663502e-07, "epoch": 0.8126880481403239, "percentage": 81.27, "elapsed_time": "0:48:20", "remaining_time": "0:11:08", "throughput": 13774.71, "total_tokens": 39955072} +{"current_steps": 12700, "total_steps": 15621, "loss": 0.4545, "lr": 2.057292782011013e-07, "epoch": 0.8130081300813008, "percentage": 81.3, "elapsed_time": "0:48:21", "remaining_time": "0:11:07", "throughput": 13777.24, "total_tokens": 39970880} +{"current_steps": 12705, "total_steps": 15621, "loss": 0.2967, "lr": 2.0505090142350468e-07, "epoch": 0.8133282120222777, "percentage": 81.33, "elapsed_time": "0:48:21", "remaining_time": "0:11:06", "throughput": 13779.64, "total_tokens": 39986240} +{"current_steps": 12710, "total_steps": 15621, "loss": 0.3427, "lr": 2.0437351714919127e-07, "epoch": 0.8136482939632546, "percentage": 81.36, "elapsed_time": "0:48:22", "remaining_time": "0:11:04", "throughput": 13782.12, "total_tokens": 40001856} +{"current_steps": 12715, "total_steps": 15621, "loss": 0.309, "lr": 2.0369712622388336e-07, "epoch": 0.8139683759042314, "percentage": 81.4, "elapsed_time": "0:48:23", "remaining_time": "0:11:03", "throughput": 13784.72, "total_tokens": 40018112} +{"current_steps": 12720, "total_steps": 15621, "loss": 0.2879, "lr": 2.0302172949206298e-07, "epoch": 0.8142884578452084, "percentage": 81.43, "elapsed_time": "0:48:23", "remaining_time": "0:11:02", "throughput": 13787.14, "total_tokens": 40033664} +{"current_steps": 12725, "total_steps": 15621, "loss": 0.2967, "lr": 2.0234732779697094e-07, "epoch": 0.8146085397861853, "percentage": 81.46, "elapsed_time": "0:48:24", "remaining_time": "0:11:00", "throughput": 13789.5, "total_tokens": 40048768} +{"current_steps": 12730, "total_steps": 15621, "loss": 0.3229, "lr": 2.016739219806056e-07, "epoch": 0.8149286217271622, "percentage": 81.49, "elapsed_time": "0:48:24", "remaining_time": "0:10:59", "throughput": 13791.72, "total_tokens": 40063232} +{"current_steps": 12735, "total_steps": 15621, "loss": 0.3904, "lr": 2.0100151288372215e-07, "epoch": 0.8152487036681391, "percentage": 81.52, "elapsed_time": "0:48:25", "remaining_time": "0:10:58", "throughput": 13794.28, "total_tokens": 40079296} +{"current_steps": 12740, "total_steps": 15621, "loss": 0.5554, "lr": 2.0033010134583084e-07, "epoch": 0.8155687856091159, "percentage": 81.56, "elapsed_time": "0:48:26", "remaining_time": "0:10:57", "throughput": 13796.77, "total_tokens": 40094976} +{"current_steps": 12745, "total_steps": 15621, "loss": 0.3218, "lr": 1.9965968820519763e-07, "epoch": 0.8158888675500928, "percentage": 81.59, "elapsed_time": "0:48:26", "remaining_time": "0:10:55", "throughput": 13799.2, "total_tokens": 40110464} +{"current_steps": 12750, "total_steps": 15621, "loss": 0.3981, "lr": 1.9899027429884042e-07, "epoch": 0.8162089494910697, "percentage": 81.62, "elapsed_time": "0:48:27", "remaining_time": "0:10:54", "throughput": 13801.57, "total_tokens": 40125568} +{"current_steps": 12755, "total_steps": 15621, "loss": 0.4142, "lr": 1.983218604625305e-07, "epoch": 0.8165290314320466, "percentage": 81.65, "elapsed_time": "0:48:27", "remaining_time": "0:10:53", "throughput": 13804.12, "total_tokens": 40141440} +{"current_steps": 12760, "total_steps": 15621, "loss": 0.3275, "lr": 1.9765444753079096e-07, "epoch": 0.8168491133730235, "percentage": 81.68, "elapsed_time": "0:48:28", "remaining_time": "0:10:52", "throughput": 13806.43, "total_tokens": 40156416} +{"current_steps": 12765, "total_steps": 15621, "loss": 0.3998, "lr": 1.9698803633689408e-07, "epoch": 0.8171691953140003, "percentage": 81.72, "elapsed_time": "0:48:29", "remaining_time": "0:10:50", "throughput": 13809.06, "total_tokens": 40172928} +{"current_steps": 12770, "total_steps": 15621, "loss": 0.2336, "lr": 1.963226277128619e-07, "epoch": 0.8174892772549772, "percentage": 81.75, "elapsed_time": "0:48:29", "remaining_time": "0:10:49", "throughput": 13811.46, "total_tokens": 40188096} +{"current_steps": 12775, "total_steps": 15621, "loss": 0.3593, "lr": 1.956582224894655e-07, "epoch": 0.8178093591959542, "percentage": 81.78, "elapsed_time": "0:48:30", "remaining_time": "0:10:48", "throughput": 13813.97, "total_tokens": 40204032} +{"current_steps": 12780, "total_steps": 15621, "loss": 0.3646, "lr": 1.949948214962227e-07, "epoch": 0.8181294411369311, "percentage": 81.81, "elapsed_time": "0:48:30", "remaining_time": "0:10:47", "throughput": 13816.32, "total_tokens": 40218944} +{"current_steps": 12785, "total_steps": 15621, "loss": 0.3731, "lr": 1.943324255613964e-07, "epoch": 0.818449523077908, "percentage": 81.84, "elapsed_time": "0:48:31", "remaining_time": "0:10:45", "throughput": 13818.97, "total_tokens": 40235456} +{"current_steps": 12790, "total_steps": 15621, "loss": 0.4505, "lr": 1.936710355119967e-07, "epoch": 0.8187696050188848, "percentage": 81.88, "elapsed_time": "0:48:32", "remaining_time": "0:10:44", "throughput": 13821.24, "total_tokens": 40250176} +{"current_steps": 12795, "total_steps": 15621, "loss": 0.3157, "lr": 1.9301065217377655e-07, "epoch": 0.8190896869598617, "percentage": 81.91, "elapsed_time": "0:48:32", "remaining_time": "0:10:43", "throughput": 13823.64, "total_tokens": 40265472} +{"current_steps": 12800, "total_steps": 15621, "loss": 0.3992, "lr": 1.9235127637123249e-07, "epoch": 0.8194097689008386, "percentage": 81.94, "elapsed_time": "0:48:33", "remaining_time": "0:10:42", "throughput": 13826.22, "total_tokens": 40281728} +{"current_steps": 12805, "total_steps": 15621, "loss": 0.3282, "lr": 1.9169290892760225e-07, "epoch": 0.8197298508418155, "percentage": 81.97, "elapsed_time": "0:48:34", "remaining_time": "0:10:40", "throughput": 13828.57, "total_tokens": 40296768} +{"current_steps": 12810, "total_steps": 15621, "loss": 0.3201, "lr": 1.91035550664866e-07, "epoch": 0.8200499327827924, "percentage": 82.0, "elapsed_time": "0:48:34", "remaining_time": "0:10:39", "throughput": 13830.81, "total_tokens": 40311488} +{"current_steps": 12815, "total_steps": 15621, "loss": 0.314, "lr": 1.903792024037433e-07, "epoch": 0.8203700147237692, "percentage": 82.04, "elapsed_time": "0:48:35", "remaining_time": "0:10:38", "throughput": 13833.28, "total_tokens": 40327232} +{"current_steps": 12820, "total_steps": 15621, "loss": 0.4472, "lr": 1.8972386496369185e-07, "epoch": 0.8206900966647461, "percentage": 82.07, "elapsed_time": "0:48:35", "remaining_time": "0:10:37", "throughput": 13835.97, "total_tokens": 40344064} +{"current_steps": 12825, "total_steps": 15621, "loss": 0.3976, "lr": 1.89069539162909e-07, "epoch": 0.8210101786057231, "percentage": 82.1, "elapsed_time": "0:48:36", "remaining_time": "0:10:35", "throughput": 13838.32, "total_tokens": 40359040} +{"current_steps": 12830, "total_steps": 15621, "loss": 0.4066, "lr": 1.8841622581832783e-07, "epoch": 0.8213302605467, "percentage": 82.13, "elapsed_time": "0:48:37", "remaining_time": "0:10:34", "throughput": 13841.12, "total_tokens": 40376384} +{"current_steps": 12835, "total_steps": 15621, "loss": 0.5901, "lr": 1.8776392574561783e-07, "epoch": 0.8216503424876769, "percentage": 82.17, "elapsed_time": "0:48:37", "remaining_time": "0:10:33", "throughput": 13843.56, "total_tokens": 40391936} +{"current_steps": 12840, "total_steps": 15621, "loss": 0.4831, "lr": 1.8711263975918322e-07, "epoch": 0.8219704244286538, "percentage": 82.2, "elapsed_time": "0:48:38", "remaining_time": "0:10:32", "throughput": 13846.32, "total_tokens": 40408832} +{"current_steps": 12845, "total_steps": 15621, "loss": 0.4603, "lr": 1.8646236867216215e-07, "epoch": 0.8222905063696306, "percentage": 82.23, "elapsed_time": "0:48:39", "remaining_time": "0:10:30", "throughput": 13848.96, "total_tokens": 40425280} +{"current_steps": 12850, "total_steps": 15621, "loss": 0.338, "lr": 1.8581311329642591e-07, "epoch": 0.8226105883106075, "percentage": 82.26, "elapsed_time": "0:48:39", "remaining_time": "0:10:29", "throughput": 13851.39, "total_tokens": 40440832} +{"current_steps": 12855, "total_steps": 15621, "loss": 0.2651, "lr": 1.8516487444257723e-07, "epoch": 0.8229306702515844, "percentage": 82.29, "elapsed_time": "0:48:40", "remaining_time": "0:10:28", "throughput": 13854.3, "total_tokens": 40458624} +{"current_steps": 12860, "total_steps": 15621, "loss": 0.4093, "lr": 1.8451765291995004e-07, "epoch": 0.8232507521925613, "percentage": 82.33, "elapsed_time": "0:48:40", "remaining_time": "0:10:27", "throughput": 13856.89, "total_tokens": 40474688} +{"current_steps": 12865, "total_steps": 15621, "loss": 0.3554, "lr": 1.8387144953660806e-07, "epoch": 0.8235708341335382, "percentage": 82.36, "elapsed_time": "0:48:41", "remaining_time": "0:10:25", "throughput": 13859.47, "total_tokens": 40490816} +{"current_steps": 12870, "total_steps": 15621, "loss": 0.4472, "lr": 1.832262650993437e-07, "epoch": 0.823890916074515, "percentage": 82.39, "elapsed_time": "0:48:42", "remaining_time": "0:10:24", "throughput": 13861.86, "total_tokens": 40506112} +{"current_steps": 12875, "total_steps": 15621, "loss": 0.2954, "lr": 1.825821004136774e-07, "epoch": 0.8242109980154919, "percentage": 82.42, "elapsed_time": "0:48:42", "remaining_time": "0:10:23", "throughput": 13864.23, "total_tokens": 40521344} +{"current_steps": 12880, "total_steps": 15621, "loss": 0.2698, "lr": 1.819389562838559e-07, "epoch": 0.8245310799564689, "percentage": 82.45, "elapsed_time": "0:48:43", "remaining_time": "0:10:22", "throughput": 13866.73, "total_tokens": 40537024} +{"current_steps": 12885, "total_steps": 15621, "loss": 0.3136, "lr": 1.8129683351285319e-07, "epoch": 0.8248511618974458, "percentage": 82.49, "elapsed_time": "0:48:43", "remaining_time": "0:10:20", "throughput": 13869.19, "total_tokens": 40552640} +{"current_steps": 12890, "total_steps": 15621, "loss": 0.3186, "lr": 1.8065573290236626e-07, "epoch": 0.8251712438384227, "percentage": 82.52, "elapsed_time": "0:48:44", "remaining_time": "0:10:19", "throughput": 13871.59, "total_tokens": 40568000} +{"current_steps": 12895, "total_steps": 15621, "loss": 0.3809, "lr": 1.8001565525281682e-07, "epoch": 0.8254913257793995, "percentage": 82.55, "elapsed_time": "0:48:45", "remaining_time": "0:10:18", "throughput": 13874.31, "total_tokens": 40584960} +{"current_steps": 12900, "total_steps": 15621, "loss": 0.3665, "lr": 1.793766013633493e-07, "epoch": 0.8258114077203764, "percentage": 82.58, "elapsed_time": "0:48:45", "remaining_time": "0:10:17", "throughput": 13876.75, "total_tokens": 40600704} +{"current_steps": 12905, "total_steps": 15621, "loss": 0.3693, "lr": 1.7873857203183074e-07, "epoch": 0.8261314896613533, "percentage": 82.61, "elapsed_time": "0:48:46", "remaining_time": "0:10:15", "throughput": 13879.12, "total_tokens": 40615872} +{"current_steps": 12910, "total_steps": 15621, "loss": 0.4563, "lr": 1.7810156805484733e-07, "epoch": 0.8264515716023302, "percentage": 82.65, "elapsed_time": "0:48:47", "remaining_time": "0:10:14", "throughput": 13881.86, "total_tokens": 40632640} +{"current_steps": 12915, "total_steps": 15621, "loss": 0.2995, "lr": 1.7746559022770612e-07, "epoch": 0.8267716535433071, "percentage": 82.68, "elapsed_time": "0:48:47", "remaining_time": "0:10:13", "throughput": 13884.27, "total_tokens": 40648064} +{"current_steps": 12920, "total_steps": 15621, "loss": 0.3663, "lr": 1.7683063934443342e-07, "epoch": 0.8270917354842839, "percentage": 82.71, "elapsed_time": "0:48:48", "remaining_time": "0:10:12", "throughput": 13886.95, "total_tokens": 40664704} +{"current_steps": 12925, "total_steps": 15621, "loss": 0.4004, "lr": 1.7619671619777277e-07, "epoch": 0.8274118174252608, "percentage": 82.74, "elapsed_time": "0:48:48", "remaining_time": "0:10:10", "throughput": 13889.52, "total_tokens": 40681024} +{"current_steps": 12930, "total_steps": 15621, "loss": 0.4101, "lr": 1.7556382157918404e-07, "epoch": 0.8277318993662378, "percentage": 82.77, "elapsed_time": "0:48:49", "remaining_time": "0:10:09", "throughput": 13891.82, "total_tokens": 40695936} +{"current_steps": 12935, "total_steps": 15621, "loss": 0.3185, "lr": 1.7493195627884427e-07, "epoch": 0.8280519813072147, "percentage": 82.81, "elapsed_time": "0:48:50", "remaining_time": "0:10:08", "throughput": 13894.64, "total_tokens": 40713472} +{"current_steps": 12940, "total_steps": 15621, "loss": 0.3141, "lr": 1.7430112108564465e-07, "epoch": 0.8283720632481916, "percentage": 82.84, "elapsed_time": "0:48:50", "remaining_time": "0:10:07", "throughput": 13897.11, "total_tokens": 40729344} +{"current_steps": 12945, "total_steps": 15621, "loss": 0.3861, "lr": 1.736713167871896e-07, "epoch": 0.8286921451891684, "percentage": 82.87, "elapsed_time": "0:48:51", "remaining_time": "0:10:05", "throughput": 13899.71, "total_tokens": 40745856} +{"current_steps": 12950, "total_steps": 15621, "loss": 0.2993, "lr": 1.7304254416979803e-07, "epoch": 0.8290122271301453, "percentage": 82.9, "elapsed_time": "0:48:52", "remaining_time": "0:10:04", "throughput": 13902.26, "total_tokens": 40761920} +{"current_steps": 12955, "total_steps": 15621, "loss": 0.2488, "lr": 1.7241480401849963e-07, "epoch": 0.8293323090711222, "percentage": 82.93, "elapsed_time": "0:48:52", "remaining_time": "0:10:03", "throughput": 13904.59, "total_tokens": 40776960} +{"current_steps": 12960, "total_steps": 15621, "loss": 0.3455, "lr": 1.7178809711703524e-07, "epoch": 0.8296523910120991, "percentage": 82.97, "elapsed_time": "0:48:53", "remaining_time": "0:10:02", "throughput": 13906.94, "total_tokens": 40792192} +{"current_steps": 12965, "total_steps": 15621, "loss": 0.3612, "lr": 1.7116242424785599e-07, "epoch": 0.829972472953076, "percentage": 83.0, "elapsed_time": "0:48:53", "remaining_time": "0:10:01", "throughput": 13909.46, "total_tokens": 40808256} +{"current_steps": 12970, "total_steps": 15621, "loss": 0.4288, "lr": 1.7053778619212166e-07, "epoch": 0.8302925548940528, "percentage": 83.03, "elapsed_time": "0:48:54", "remaining_time": "0:09:59", "throughput": 13911.8, "total_tokens": 40823424} +{"current_steps": 12975, "total_steps": 15621, "loss": 0.4221, "lr": 1.6991418372970022e-07, "epoch": 0.8306126368350297, "percentage": 83.06, "elapsed_time": "0:48:55", "remaining_time": "0:09:58", "throughput": 13914.62, "total_tokens": 40840960} +{"current_steps": 12980, "total_steps": 15621, "loss": 0.3775, "lr": 1.6929161763916666e-07, "epoch": 0.8309327187760066, "percentage": 83.09, "elapsed_time": "0:48:55", "remaining_time": "0:09:57", "throughput": 13917.25, "total_tokens": 40857536} +{"current_steps": 12985, "total_steps": 15621, "loss": 0.3597, "lr": 1.686700886978021e-07, "epoch": 0.8312528007169836, "percentage": 83.13, "elapsed_time": "0:48:56", "remaining_time": "0:09:56", "throughput": 13919.93, "total_tokens": 40874240} +{"current_steps": 12990, "total_steps": 15621, "loss": 0.3573, "lr": 1.6804959768159266e-07, "epoch": 0.8315728826579605, "percentage": 83.16, "elapsed_time": "0:48:56", "remaining_time": "0:09:54", "throughput": 13922.14, "total_tokens": 40888960} +{"current_steps": 12995, "total_steps": 15621, "loss": 0.5238, "lr": 1.674301453652287e-07, "epoch": 0.8318929645989374, "percentage": 83.19, "elapsed_time": "0:48:57", "remaining_time": "0:09:53", "throughput": 13924.53, "total_tokens": 40904512} +{"current_steps": 13000, "total_steps": 15621, "loss": 0.2903, "lr": 1.6681173252210378e-07, "epoch": 0.8322130465399142, "percentage": 83.22, "elapsed_time": "0:48:58", "remaining_time": "0:09:52", "throughput": 13927.32, "total_tokens": 40921856} +{"current_steps": 13005, "total_steps": 15621, "loss": 0.3741, "lr": 1.6619435992431342e-07, "epoch": 0.8325331284808911, "percentage": 83.25, "elapsed_time": "0:48:58", "remaining_time": "0:09:51", "throughput": 13930.02, "total_tokens": 40938752} +{"current_steps": 13010, "total_steps": 15621, "loss": 0.3033, "lr": 1.6557802834265466e-07, "epoch": 0.832853210421868, "percentage": 83.29, "elapsed_time": "0:48:59", "remaining_time": "0:09:49", "throughput": 13932.36, "total_tokens": 40954048} +{"current_steps": 13015, "total_steps": 15621, "loss": 0.3593, "lr": 1.649627385466248e-07, "epoch": 0.8331732923628449, "percentage": 83.32, "elapsed_time": "0:49:00", "remaining_time": "0:09:48", "throughput": 13935.4, "total_tokens": 40972672} +{"current_steps": 13020, "total_steps": 15621, "loss": 0.242, "lr": 1.643484913044202e-07, "epoch": 0.8334933743038218, "percentage": 83.35, "elapsed_time": "0:49:00", "remaining_time": "0:09:47", "throughput": 13937.7, "total_tokens": 40987648} +{"current_steps": 13025, "total_steps": 15621, "loss": 0.3147, "lr": 1.6373528738293564e-07, "epoch": 0.8338134562447986, "percentage": 83.38, "elapsed_time": "0:49:01", "remaining_time": "0:09:46", "throughput": 13940.17, "total_tokens": 41003328} +{"current_steps": 13030, "total_steps": 15621, "loss": 0.2875, "lr": 1.6312312754776404e-07, "epoch": 0.8341335381857755, "percentage": 83.41, "elapsed_time": "0:49:01", "remaining_time": "0:09:45", "throughput": 13942.53, "total_tokens": 41018624} +{"current_steps": 13035, "total_steps": 15621, "loss": 0.3321, "lr": 1.6251201256319357e-07, "epoch": 0.8344536201267524, "percentage": 83.45, "elapsed_time": "0:49:02", "remaining_time": "0:09:43", "throughput": 13945.02, "total_tokens": 41034624} +{"current_steps": 13040, "total_steps": 15621, "loss": 0.3821, "lr": 1.619019431922083e-07, "epoch": 0.8347737020677294, "percentage": 83.48, "elapsed_time": "0:49:03", "remaining_time": "0:09:42", "throughput": 13947.34, "total_tokens": 41049664} +{"current_steps": 13045, "total_steps": 15621, "loss": 0.3454, "lr": 1.6129292019648754e-07, "epoch": 0.8350937840087063, "percentage": 83.51, "elapsed_time": "0:49:03", "remaining_time": "0:09:41", "throughput": 13949.99, "total_tokens": 41066368} +{"current_steps": 13050, "total_steps": 15621, "loss": 0.2916, "lr": 1.606849443364038e-07, "epoch": 0.8354138659496831, "percentage": 83.54, "elapsed_time": "0:49:04", "remaining_time": "0:09:40", "throughput": 13952.4, "total_tokens": 41082048} +{"current_steps": 13055, "total_steps": 15621, "loss": 0.3422, "lr": 1.6007801637102104e-07, "epoch": 0.83573394789066, "percentage": 83.57, "elapsed_time": "0:49:05", "remaining_time": "0:09:38", "throughput": 13954.9, "total_tokens": 41098048} +{"current_steps": 13060, "total_steps": 15621, "loss": 0.3826, "lr": 1.594721370580969e-07, "epoch": 0.8360540298316369, "percentage": 83.61, "elapsed_time": "0:49:05", "remaining_time": "0:09:37", "throughput": 13957.12, "total_tokens": 41112768} +{"current_steps": 13065, "total_steps": 15621, "loss": 0.4512, "lr": 1.588673071540788e-07, "epoch": 0.8363741117726138, "percentage": 83.64, "elapsed_time": "0:49:06", "remaining_time": "0:09:36", "throughput": 13959.39, "total_tokens": 41127488} +{"current_steps": 13070, "total_steps": 15621, "loss": 0.3295, "lr": 1.5826352741410332e-07, "epoch": 0.8366941937135907, "percentage": 83.67, "elapsed_time": "0:49:06", "remaining_time": "0:09:35", "throughput": 13961.62, "total_tokens": 41142272} +{"current_steps": 13075, "total_steps": 15621, "loss": 0.2947, "lr": 1.576607985919971e-07, "epoch": 0.8370142756545675, "percentage": 83.7, "elapsed_time": "0:49:07", "remaining_time": "0:09:33", "throughput": 13964.02, "total_tokens": 41157952} +{"current_steps": 13080, "total_steps": 15621, "loss": 0.3595, "lr": 1.57059121440274e-07, "epoch": 0.8373343575955444, "percentage": 83.73, "elapsed_time": "0:49:08", "remaining_time": "0:09:32", "throughput": 13966.29, "total_tokens": 41172992} +{"current_steps": 13085, "total_steps": 15621, "loss": 0.3642, "lr": 1.56458496710135e-07, "epoch": 0.8376544395365213, "percentage": 83.77, "elapsed_time": "0:49:08", "remaining_time": "0:09:31", "throughput": 13968.5, "total_tokens": 41187776} +{"current_steps": 13090, "total_steps": 15621, "loss": 0.3461, "lr": 1.5585892515146716e-07, "epoch": 0.8379745214774983, "percentage": 83.8, "elapsed_time": "0:49:09", "remaining_time": "0:09:30", "throughput": 13971.15, "total_tokens": 41204416} +{"current_steps": 13095, "total_steps": 15621, "loss": 0.4195, "lr": 1.5526040751284253e-07, "epoch": 0.8382946034184752, "percentage": 83.83, "elapsed_time": "0:49:09", "remaining_time": "0:09:29", "throughput": 13973.61, "total_tokens": 41220032} +{"current_steps": 13100, "total_steps": 15621, "loss": 0.3118, "lr": 1.546629445415174e-07, "epoch": 0.838614685359452, "percentage": 83.86, "elapsed_time": "0:49:10", "remaining_time": "0:09:27", "throughput": 13976.04, "total_tokens": 41235776} +{"current_steps": 13105, "total_steps": 15621, "loss": 0.3725, "lr": 1.5406653698343141e-07, "epoch": 0.8389347673004289, "percentage": 83.89, "elapsed_time": "0:49:11", "remaining_time": "0:09:26", "throughput": 13978.62, "total_tokens": 41252160} +{"current_steps": 13110, "total_steps": 15621, "loss": 0.3539, "lr": 1.5347118558320637e-07, "epoch": 0.8392548492414058, "percentage": 83.93, "elapsed_time": "0:49:11", "remaining_time": "0:09:25", "throughput": 13981.3, "total_tokens": 41269056} +{"current_steps": 13115, "total_steps": 15621, "loss": 0.3562, "lr": 1.5287689108414558e-07, "epoch": 0.8395749311823827, "percentage": 83.96, "elapsed_time": "0:49:12", "remaining_time": "0:09:24", "throughput": 13983.85, "total_tokens": 41285312} +{"current_steps": 13120, "total_steps": 15621, "loss": 0.3246, "lr": 1.5228365422823242e-07, "epoch": 0.8398950131233596, "percentage": 83.99, "elapsed_time": "0:49:12", "remaining_time": "0:09:22", "throughput": 13986.28, "total_tokens": 41300992} +{"current_steps": 13125, "total_steps": 15621, "loss": 0.2623, "lr": 1.5169147575613038e-07, "epoch": 0.8402150950643364, "percentage": 84.02, "elapsed_time": "0:49:13", "remaining_time": "0:09:21", "throughput": 13988.96, "total_tokens": 41317952} +{"current_steps": 13130, "total_steps": 15621, "loss": 0.2941, "lr": 1.5110035640718098e-07, "epoch": 0.8405351770053133, "percentage": 84.05, "elapsed_time": "0:49:14", "remaining_time": "0:09:20", "throughput": 13991.32, "total_tokens": 41333440} +{"current_steps": 13135, "total_steps": 15621, "loss": 0.3725, "lr": 1.5051029691940387e-07, "epoch": 0.8408552589462902, "percentage": 84.09, "elapsed_time": "0:49:14", "remaining_time": "0:09:19", "throughput": 13993.78, "total_tokens": 41349312} +{"current_steps": 13140, "total_steps": 15621, "loss": 0.3449, "lr": 1.4992129802949515e-07, "epoch": 0.8411753408872671, "percentage": 84.12, "elapsed_time": "0:49:15", "remaining_time": "0:09:18", "throughput": 13996.03, "total_tokens": 41364288} +{"current_steps": 13145, "total_steps": 15621, "loss": 0.2836, "lr": 1.4933336047282696e-07, "epoch": 0.8414954228282441, "percentage": 84.15, "elapsed_time": "0:49:16", "remaining_time": "0:09:16", "throughput": 13998.4, "total_tokens": 41379904} +{"current_steps": 13150, "total_steps": 15621, "loss": 0.3199, "lr": 1.4874648498344579e-07, "epoch": 0.841815504769221, "percentage": 84.18, "elapsed_time": "0:49:16", "remaining_time": "0:09:15", "throughput": 14000.59, "total_tokens": 41394432} +{"current_steps": 13155, "total_steps": 15621, "loss": 0.3419, "lr": 1.4816067229407348e-07, "epoch": 0.8421355867101978, "percentage": 84.21, "elapsed_time": "0:49:17", "remaining_time": "0:09:14", "throughput": 14002.92, "total_tokens": 41409984} +{"current_steps": 13160, "total_steps": 15621, "loss": 0.3038, "lr": 1.4757592313610322e-07, "epoch": 0.8424556686511747, "percentage": 84.25, "elapsed_time": "0:49:17", "remaining_time": "0:09:13", "throughput": 14005.35, "total_tokens": 41425984} +{"current_steps": 13165, "total_steps": 15621, "loss": 0.3293, "lr": 1.4699223823960128e-07, "epoch": 0.8427757505921516, "percentage": 84.28, "elapsed_time": "0:49:18", "remaining_time": "0:09:11", "throughput": 14007.76, "total_tokens": 41441920} +{"current_steps": 13170, "total_steps": 15621, "loss": 0.3392, "lr": 1.4640961833330579e-07, "epoch": 0.8430958325331285, "percentage": 84.31, "elapsed_time": "0:49:19", "remaining_time": "0:09:10", "throughput": 14010.18, "total_tokens": 41457664} +{"current_steps": 13175, "total_steps": 15621, "loss": 0.2544, "lr": 1.4582806414462378e-07, "epoch": 0.8434159144741054, "percentage": 84.34, "elapsed_time": "0:49:19", "remaining_time": "0:09:09", "throughput": 14012.48, "total_tokens": 41472832} +{"current_steps": 13180, "total_steps": 15621, "loss": 0.3411, "lr": 1.4524757639963258e-07, "epoch": 0.8437359964150822, "percentage": 84.37, "elapsed_time": "0:49:20", "remaining_time": "0:09:08", "throughput": 14015.29, "total_tokens": 41490368} +{"current_steps": 13185, "total_steps": 15621, "loss": 0.4458, "lr": 1.4466815582307845e-07, "epoch": 0.8440560783560591, "percentage": 84.41, "elapsed_time": "0:49:20", "remaining_time": "0:09:07", "throughput": 14017.81, "total_tokens": 41506624} +{"current_steps": 13190, "total_steps": 15621, "loss": 0.2433, "lr": 1.440898031383746e-07, "epoch": 0.844376160297036, "percentage": 84.44, "elapsed_time": "0:49:21", "remaining_time": "0:09:05", "throughput": 14020.41, "total_tokens": 41523264} +{"current_steps": 13195, "total_steps": 15621, "loss": 0.3678, "lr": 1.4351251906760064e-07, "epoch": 0.844696242238013, "percentage": 84.47, "elapsed_time": "0:49:22", "remaining_time": "0:09:04", "throughput": 14022.79, "total_tokens": 41538944} +{"current_steps": 13200, "total_steps": 15621, "loss": 0.3919, "lr": 1.4293630433150317e-07, "epoch": 0.8450163241789899, "percentage": 84.5, "elapsed_time": "0:49:22", "remaining_time": "0:09:03", "throughput": 14025.25, "total_tokens": 41554880} +{"current_steps": 13205, "total_steps": 15621, "loss": 0.4473, "lr": 1.423611596494927e-07, "epoch": 0.8453364061199667, "percentage": 84.53, "elapsed_time": "0:49:23", "remaining_time": "0:09:02", "throughput": 14027.41, "total_tokens": 41569280} +{"current_steps": 13210, "total_steps": 15621, "loss": 0.3541, "lr": 1.4178708573964438e-07, "epoch": 0.8456564880609436, "percentage": 84.57, "elapsed_time": "0:49:24", "remaining_time": "0:09:00", "throughput": 14029.73, "total_tokens": 41584576} +{"current_steps": 13215, "total_steps": 15621, "loss": 0.3483, "lr": 1.4121408331869566e-07, "epoch": 0.8459765700019205, "percentage": 84.6, "elapsed_time": "0:49:24", "remaining_time": "0:08:59", "throughput": 14032.07, "total_tokens": 41600000} +{"current_steps": 13220, "total_steps": 15621, "loss": 0.3539, "lr": 1.406421531020474e-07, "epoch": 0.8462966519428974, "percentage": 84.63, "elapsed_time": "0:49:25", "remaining_time": "0:08:58", "throughput": 14034.33, "total_tokens": 41615040} +{"current_steps": 13225, "total_steps": 15621, "loss": 0.3418, "lr": 1.4007129580376097e-07, "epoch": 0.8466167338838743, "percentage": 84.66, "elapsed_time": "0:49:25", "remaining_time": "0:08:57", "throughput": 14036.64, "total_tokens": 41630208} +{"current_steps": 13230, "total_steps": 15621, "loss": 0.354, "lr": 1.3950151213655847e-07, "epoch": 0.8469368158248511, "percentage": 84.69, "elapsed_time": "0:49:26", "remaining_time": "0:08:56", "throughput": 14038.96, "total_tokens": 41645440} +{"current_steps": 13235, "total_steps": 15621, "loss": 0.3286, "lr": 1.389328028118214e-07, "epoch": 0.847256897765828, "percentage": 84.73, "elapsed_time": "0:49:27", "remaining_time": "0:08:54", "throughput": 14041.37, "total_tokens": 41661184} +{"current_steps": 13240, "total_steps": 15621, "loss": 0.3546, "lr": 1.3836516853959e-07, "epoch": 0.8475769797068049, "percentage": 84.76, "elapsed_time": "0:49:27", "remaining_time": "0:08:53", "throughput": 14043.65, "total_tokens": 41676224} +{"current_steps": 13245, "total_steps": 15621, "loss": 0.3031, "lr": 1.3779861002856242e-07, "epoch": 0.8478970616477818, "percentage": 84.79, "elapsed_time": "0:49:28", "remaining_time": "0:08:52", "throughput": 14045.85, "total_tokens": 41690816} +{"current_steps": 13250, "total_steps": 15621, "loss": 0.3261, "lr": 1.3723312798609366e-07, "epoch": 0.8482171435887588, "percentage": 84.82, "elapsed_time": "0:49:28", "remaining_time": "0:08:51", "throughput": 14048.28, "total_tokens": 41706688} +{"current_steps": 13255, "total_steps": 15621, "loss": 0.3518, "lr": 1.3666872311819455e-07, "epoch": 0.8485372255297357, "percentage": 84.85, "elapsed_time": "0:49:29", "remaining_time": "0:08:50", "throughput": 14050.61, "total_tokens": 41721920} +{"current_steps": 13260, "total_steps": 15621, "loss": 0.2742, "lr": 1.361053961295312e-07, "epoch": 0.8488573074707125, "percentage": 84.89, "elapsed_time": "0:49:30", "remaining_time": "0:08:48", "throughput": 14053.12, "total_tokens": 41738112} +{"current_steps": 13265, "total_steps": 15621, "loss": 0.3463, "lr": 1.3554314772342412e-07, "epoch": 0.8491773894116894, "percentage": 84.92, "elapsed_time": "0:49:30", "remaining_time": "0:08:47", "throughput": 14055.5, "total_tokens": 41753792} +{"current_steps": 13270, "total_steps": 15621, "loss": 0.3268, "lr": 1.349819786018469e-07, "epoch": 0.8494974713526663, "percentage": 84.95, "elapsed_time": "0:49:31", "remaining_time": "0:08:46", "throughput": 14058.26, "total_tokens": 41771328} +{"current_steps": 13275, "total_steps": 15621, "loss": 0.375, "lr": 1.3442188946542566e-07, "epoch": 0.8498175532936432, "percentage": 84.98, "elapsed_time": "0:49:31", "remaining_time": "0:08:45", "throughput": 14060.81, "total_tokens": 41787712} +{"current_steps": 13280, "total_steps": 15621, "loss": 0.2995, "lr": 1.338628810134388e-07, "epoch": 0.85013763523462, "percentage": 85.01, "elapsed_time": "0:49:32", "remaining_time": "0:08:43", "throughput": 14063.1, "total_tokens": 41803072} +{"current_steps": 13285, "total_steps": 15621, "loss": 0.3636, "lr": 1.3330495394381435e-07, "epoch": 0.8504577171755969, "percentage": 85.05, "elapsed_time": "0:49:33", "remaining_time": "0:08:42", "throughput": 14065.5, "total_tokens": 41818688} +{"current_steps": 13290, "total_steps": 15621, "loss": 0.272, "lr": 1.3274810895313083e-07, "epoch": 0.8507777991165738, "percentage": 85.08, "elapsed_time": "0:49:33", "remaining_time": "0:08:41", "throughput": 14067.79, "total_tokens": 41833792} +{"current_steps": 13294, "total_steps": 15621, "eval_loss": 0.3570670485496521, "epoch": 0.8510338646693554, "percentage": 85.1, "elapsed_time": "0:50:23", "remaining_time": "0:08:49", "throughput": 13841.27, "total_tokens": 41847872} +{"current_steps": 13295, "total_steps": 15621, "loss": 0.3708, "lr": 1.321923467366164e-07, "epoch": 0.8510978810575507, "percentage": 85.11, "elapsed_time": "0:51:05", "remaining_time": "0:08:56", "throughput": 13653.41, "total_tokens": 41850880} +{"current_steps": 13300, "total_steps": 15621, "loss": 0.1815, "lr": 1.3163766798814603e-07, "epoch": 0.8514179629985277, "percentage": 85.14, "elapsed_time": "0:51:05", "remaining_time": "0:08:55", "throughput": 13655.78, "total_tokens": 41866560} +{"current_steps": 13305, "total_steps": 15621, "loss": 0.2872, "lr": 1.3108407340024264e-07, "epoch": 0.8517380449395046, "percentage": 85.17, "elapsed_time": "0:51:06", "remaining_time": "0:08:53", "throughput": 13658.17, "total_tokens": 41882240} +{"current_steps": 13310, "total_steps": 15621, "loss": 0.332, "lr": 1.3053156366407613e-07, "epoch": 0.8520581268804814, "percentage": 85.21, "elapsed_time": "0:51:07", "remaining_time": "0:08:52", "throughput": 13660.77, "total_tokens": 41898880} +{"current_steps": 13315, "total_steps": 15621, "loss": 0.2398, "lr": 1.2998013946946119e-07, "epoch": 0.8523782088214583, "percentage": 85.24, "elapsed_time": "0:51:07", "remaining_time": "0:08:51", "throughput": 13663.44, "total_tokens": 41915968} +{"current_steps": 13320, "total_steps": 15621, "loss": 0.3556, "lr": 1.2942980150485706e-07, "epoch": 0.8526982907624352, "percentage": 85.27, "elapsed_time": "0:51:08", "remaining_time": "0:08:50", "throughput": 13665.71, "total_tokens": 41930816} +{"current_steps": 13325, "total_steps": 15621, "loss": 0.3098, "lr": 1.2888055045736723e-07, "epoch": 0.8530183727034121, "percentage": 85.3, "elapsed_time": "0:51:08", "remaining_time": "0:08:48", "throughput": 13668.24, "total_tokens": 41947200} +{"current_steps": 13330, "total_steps": 15621, "loss": 0.3021, "lr": 1.283323870127384e-07, "epoch": 0.853338454644389, "percentage": 85.33, "elapsed_time": "0:51:09", "remaining_time": "0:08:47", "throughput": 13670.49, "total_tokens": 41962240} +{"current_steps": 13335, "total_steps": 15621, "loss": 0.3063, "lr": 1.2778531185535911e-07, "epoch": 0.8536585365853658, "percentage": 85.37, "elapsed_time": "0:51:10", "remaining_time": "0:08:46", "throughput": 13673.03, "total_tokens": 41978752} +{"current_steps": 13340, "total_steps": 15621, "loss": 0.324, "lr": 1.2723932566825844e-07, "epoch": 0.8539786185263427, "percentage": 85.4, "elapsed_time": "0:51:10", "remaining_time": "0:08:45", "throughput": 13675.36, "total_tokens": 41994112} +{"current_steps": 13345, "total_steps": 15621, "loss": 0.2986, "lr": 1.2669442913310723e-07, "epoch": 0.8542987004673196, "percentage": 85.43, "elapsed_time": "0:51:11", "remaining_time": "0:08:43", "throughput": 13677.84, "total_tokens": 42010432} +{"current_steps": 13350, "total_steps": 15621, "loss": 0.2722, "lr": 1.2615062293021506e-07, "epoch": 0.8546187824082965, "percentage": 85.46, "elapsed_time": "0:51:12", "remaining_time": "0:08:42", "throughput": 13680.19, "total_tokens": 42025984} +{"current_steps": 13355, "total_steps": 15621, "loss": 0.3185, "lr": 1.2560790773853025e-07, "epoch": 0.8549388643492735, "percentage": 85.49, "elapsed_time": "0:51:12", "remaining_time": "0:08:41", "throughput": 13682.42, "total_tokens": 42040832} +{"current_steps": 13360, "total_steps": 15621, "loss": 0.4035, "lr": 1.2506628423563915e-07, "epoch": 0.8552589462902503, "percentage": 85.53, "elapsed_time": "0:51:13", "remaining_time": "0:08:40", "throughput": 13685.02, "total_tokens": 42057536} +{"current_steps": 13365, "total_steps": 15621, "loss": 0.2863, "lr": 1.2452575309776493e-07, "epoch": 0.8555790282312272, "percentage": 85.56, "elapsed_time": "0:51:13", "remaining_time": "0:08:38", "throughput": 13687.4, "total_tokens": 42073152} +{"current_steps": 13370, "total_steps": 15621, "loss": 0.304, "lr": 1.2398631499976732e-07, "epoch": 0.8558991101722041, "percentage": 85.59, "elapsed_time": "0:51:14", "remaining_time": "0:08:37", "throughput": 13689.72, "total_tokens": 42088512} +{"current_steps": 13375, "total_steps": 15621, "loss": 0.4208, "lr": 1.234479706151409e-07, "epoch": 0.856219192113181, "percentage": 85.62, "elapsed_time": "0:51:15", "remaining_time": "0:08:36", "throughput": 13691.97, "total_tokens": 42103552} +{"current_steps": 13380, "total_steps": 15621, "loss": 0.3608, "lr": 1.2291072061601503e-07, "epoch": 0.8565392740541579, "percentage": 85.65, "elapsed_time": "0:51:15", "remaining_time": "0:08:35", "throughput": 13694.5, "total_tokens": 42119872} +{"current_steps": 13385, "total_steps": 15621, "loss": 0.4351, "lr": 1.2237456567315264e-07, "epoch": 0.8568593559951347, "percentage": 85.69, "elapsed_time": "0:51:16", "remaining_time": "0:08:33", "throughput": 13697.15, "total_tokens": 42136832} +{"current_steps": 13390, "total_steps": 15621, "loss": 0.2975, "lr": 1.2183950645594944e-07, "epoch": 0.8571794379361116, "percentage": 85.72, "elapsed_time": "0:51:16", "remaining_time": "0:08:32", "throughput": 13699.59, "total_tokens": 42152896} +{"current_steps": 13395, "total_steps": 15621, "loss": 0.3421, "lr": 1.2130554363243318e-07, "epoch": 0.8574995198770885, "percentage": 85.75, "elapsed_time": "0:51:17", "remaining_time": "0:08:31", "throughput": 13701.84, "total_tokens": 42168064} +{"current_steps": 13400, "total_steps": 15621, "loss": 0.3703, "lr": 1.207726778692625e-07, "epoch": 0.8578196018180654, "percentage": 85.78, "elapsed_time": "0:51:18", "remaining_time": "0:08:30", "throughput": 13704.03, "total_tokens": 42182784} +{"current_steps": 13405, "total_steps": 15621, "loss": 0.3271, "lr": 1.2024090983172718e-07, "epoch": 0.8581396837590423, "percentage": 85.81, "elapsed_time": "0:51:18", "remaining_time": "0:08:28", "throughput": 13706.62, "total_tokens": 42199744} +{"current_steps": 13410, "total_steps": 15621, "loss": 0.3625, "lr": 1.1971024018374532e-07, "epoch": 0.8584597657000193, "percentage": 85.85, "elapsed_time": "0:51:19", "remaining_time": "0:08:27", "throughput": 13708.92, "total_tokens": 42215040} +{"current_steps": 13415, "total_steps": 15621, "loss": 0.3091, "lr": 1.1918066958786432e-07, "epoch": 0.8587798476409961, "percentage": 85.88, "elapsed_time": "0:51:19", "remaining_time": "0:08:26", "throughput": 13711.17, "total_tokens": 42230144} +{"current_steps": 13420, "total_steps": 15621, "loss": 0.3553, "lr": 1.1865219870525922e-07, "epoch": 0.859099929581973, "percentage": 85.91, "elapsed_time": "0:51:20", "remaining_time": "0:08:25", "throughput": 13713.64, "total_tokens": 42246528} +{"current_steps": 13425, "total_steps": 15621, "loss": 0.4317, "lr": 1.1812482819573222e-07, "epoch": 0.8594200115229499, "percentage": 85.94, "elapsed_time": "0:51:21", "remaining_time": "0:08:24", "throughput": 13716.18, "total_tokens": 42263168} +{"current_steps": 13430, "total_steps": 15621, "loss": 0.3905, "lr": 1.1759855871771163e-07, "epoch": 0.8597400934639268, "percentage": 85.97, "elapsed_time": "0:51:21", "remaining_time": "0:08:22", "throughput": 13718.56, "total_tokens": 42278912} +{"current_steps": 13435, "total_steps": 15621, "loss": 0.3824, "lr": 1.1707339092825075e-07, "epoch": 0.8600601754049036, "percentage": 86.01, "elapsed_time": "0:51:22", "remaining_time": "0:08:21", "throughput": 13720.97, "total_tokens": 42294656} +{"current_steps": 13440, "total_steps": 15621, "loss": 0.3909, "lr": 1.1654932548302842e-07, "epoch": 0.8603802573458805, "percentage": 86.04, "elapsed_time": "0:51:23", "remaining_time": "0:08:20", "throughput": 13723.55, "total_tokens": 42311552} +{"current_steps": 13445, "total_steps": 15621, "loss": 0.3635, "lr": 1.1602636303634595e-07, "epoch": 0.8607003392868574, "percentage": 86.07, "elapsed_time": "0:51:23", "remaining_time": "0:08:19", "throughput": 13725.98, "total_tokens": 42327552} +{"current_steps": 13450, "total_steps": 15621, "loss": 0.3583, "lr": 1.1550450424112801e-07, "epoch": 0.8610204212278343, "percentage": 86.1, "elapsed_time": "0:51:24", "remaining_time": "0:08:17", "throughput": 13728.37, "total_tokens": 42343360} +{"current_steps": 13455, "total_steps": 15621, "loss": 0.3341, "lr": 1.1498374974892178e-07, "epoch": 0.8613405031688112, "percentage": 86.13, "elapsed_time": "0:51:25", "remaining_time": "0:08:16", "throughput": 13730.94, "total_tokens": 42360064} +{"current_steps": 13460, "total_steps": 15621, "loss": 0.4371, "lr": 1.144641002098955e-07, "epoch": 0.8616605851097882, "percentage": 86.17, "elapsed_time": "0:51:25", "remaining_time": "0:08:15", "throughput": 13733.12, "total_tokens": 42374976} +{"current_steps": 13465, "total_steps": 15621, "loss": 0.3524, "lr": 1.1394555627283697e-07, "epoch": 0.861980667050765, "percentage": 86.2, "elapsed_time": "0:51:26", "remaining_time": "0:08:14", "throughput": 13735.68, "total_tokens": 42391616} +{"current_steps": 13470, "total_steps": 15621, "loss": 0.3095, "lr": 1.134281185851551e-07, "epoch": 0.8623007489917419, "percentage": 86.23, "elapsed_time": "0:51:26", "remaining_time": "0:08:12", "throughput": 13737.88, "total_tokens": 42406528} +{"current_steps": 13475, "total_steps": 15621, "loss": 0.288, "lr": 1.1291178779287691e-07, "epoch": 0.8626208309327188, "percentage": 86.26, "elapsed_time": "0:51:27", "remaining_time": "0:08:11", "throughput": 13740.68, "total_tokens": 42424320} +{"current_steps": 13480, "total_steps": 15621, "loss": 0.3654, "lr": 1.1239656454064683e-07, "epoch": 0.8629409128736957, "percentage": 86.29, "elapsed_time": "0:51:28", "remaining_time": "0:08:10", "throughput": 13743.23, "total_tokens": 42440960} +{"current_steps": 13485, "total_steps": 15621, "loss": 0.2474, "lr": 1.1188244947172776e-07, "epoch": 0.8632609948146726, "percentage": 86.33, "elapsed_time": "0:51:28", "remaining_time": "0:08:09", "throughput": 13745.55, "total_tokens": 42456448} +{"current_steps": 13490, "total_steps": 15621, "loss": 0.3165, "lr": 1.1136944322799812e-07, "epoch": 0.8635810767556494, "percentage": 86.36, "elapsed_time": "0:51:29", "remaining_time": "0:08:08", "throughput": 13747.95, "total_tokens": 42472448} +{"current_steps": 13495, "total_steps": 15621, "loss": 0.3147, "lr": 1.1085754644995227e-07, "epoch": 0.8639011586966263, "percentage": 86.39, "elapsed_time": "0:51:29", "remaining_time": "0:08:06", "throughput": 13750.23, "total_tokens": 42487808} +{"current_steps": 13500, "total_steps": 15621, "loss": 0.3516, "lr": 1.1034675977669938e-07, "epoch": 0.8642212406376032, "percentage": 86.42, "elapsed_time": "0:51:30", "remaining_time": "0:08:05", "throughput": 13752.64, "total_tokens": 42503744} +{"current_steps": 13505, "total_steps": 15621, "loss": 0.5636, "lr": 1.0983708384596258e-07, "epoch": 0.8645413225785801, "percentage": 86.45, "elapsed_time": "0:51:31", "remaining_time": "0:08:04", "throughput": 13755.27, "total_tokens": 42520768} +{"current_steps": 13510, "total_steps": 15621, "loss": 0.3664, "lr": 1.0932851929407827e-07, "epoch": 0.864861404519557, "percentage": 86.49, "elapsed_time": "0:51:31", "remaining_time": "0:08:03", "throughput": 13757.78, "total_tokens": 42537408} +{"current_steps": 13515, "total_steps": 15621, "loss": 0.36, "lr": 1.0882106675599534e-07, "epoch": 0.8651814864605339, "percentage": 86.52, "elapsed_time": "0:51:32", "remaining_time": "0:08:01", "throughput": 13760.24, "total_tokens": 42553728} +{"current_steps": 13520, "total_steps": 15621, "loss": 0.3304, "lr": 1.0831472686527409e-07, "epoch": 0.8655015684015108, "percentage": 86.55, "elapsed_time": "0:51:33", "remaining_time": "0:08:00", "throughput": 13762.47, "total_tokens": 42568896} +{"current_steps": 13525, "total_steps": 15621, "loss": 0.2939, "lr": 1.0780950025408586e-07, "epoch": 0.8658216503424877, "percentage": 86.58, "elapsed_time": "0:51:33", "remaining_time": "0:07:59", "throughput": 13764.68, "total_tokens": 42584000} +{"current_steps": 13530, "total_steps": 15621, "loss": 0.3824, "lr": 1.0730538755321217e-07, "epoch": 0.8661417322834646, "percentage": 86.61, "elapsed_time": "0:51:34", "remaining_time": "0:07:58", "throughput": 13767.08, "total_tokens": 42600192} +{"current_steps": 13535, "total_steps": 15621, "loss": 0.304, "lr": 1.0680238939204334e-07, "epoch": 0.8664618142244415, "percentage": 86.65, "elapsed_time": "0:51:34", "remaining_time": "0:07:56", "throughput": 13769.14, "total_tokens": 42614656} +{"current_steps": 13540, "total_steps": 15621, "loss": 0.3989, "lr": 1.0630050639857879e-07, "epoch": 0.8667818961654183, "percentage": 86.68, "elapsed_time": "0:51:35", "remaining_time": "0:07:55", "throughput": 13771.31, "total_tokens": 42629504} +{"current_steps": 13545, "total_steps": 15621, "loss": 0.3036, "lr": 1.0579973919942508e-07, "epoch": 0.8671019781063952, "percentage": 86.71, "elapsed_time": "0:51:36", "remaining_time": "0:07:54", "throughput": 13773.46, "total_tokens": 42644224} +{"current_steps": 13550, "total_steps": 15621, "loss": 0.2417, "lr": 1.0530008841979621e-07, "epoch": 0.8674220600473721, "percentage": 86.74, "elapsed_time": "0:51:36", "remaining_time": "0:07:53", "throughput": 13775.73, "total_tokens": 42659584} +{"current_steps": 13555, "total_steps": 15621, "loss": 0.2756, "lr": 1.048015546835117e-07, "epoch": 0.867742141988349, "percentage": 86.77, "elapsed_time": "0:51:37", "remaining_time": "0:07:52", "throughput": 13778.19, "total_tokens": 42675776} +{"current_steps": 13560, "total_steps": 15621, "loss": 0.3976, "lr": 1.0430413861299691e-07, "epoch": 0.8680622239293259, "percentage": 86.81, "elapsed_time": "0:51:38", "remaining_time": "0:07:50", "throughput": 13780.87, "total_tokens": 42693184} +{"current_steps": 13565, "total_steps": 15621, "loss": 0.4533, "lr": 1.0380784082928196e-07, "epoch": 0.8683823058703029, "percentage": 86.84, "elapsed_time": "0:51:38", "remaining_time": "0:07:49", "throughput": 13783.59, "total_tokens": 42710784} +{"current_steps": 13570, "total_steps": 15621, "loss": 0.3903, "lr": 1.0331266195200006e-07, "epoch": 0.8687023878112797, "percentage": 86.87, "elapsed_time": "0:51:39", "remaining_time": "0:07:48", "throughput": 13786.06, "total_tokens": 42727040} +{"current_steps": 13575, "total_steps": 15621, "loss": 0.3126, "lr": 1.0281860259938779e-07, "epoch": 0.8690224697522566, "percentage": 86.9, "elapsed_time": "0:51:39", "remaining_time": "0:07:47", "throughput": 13788.31, "total_tokens": 42742208} +{"current_steps": 13580, "total_steps": 15621, "loss": 0.3673, "lr": 1.0232566338828452e-07, "epoch": 0.8693425516932335, "percentage": 86.93, "elapsed_time": "0:51:40", "remaining_time": "0:07:45", "throughput": 13790.76, "total_tokens": 42758464} +{"current_steps": 13585, "total_steps": 15621, "loss": 0.4102, "lr": 1.018338449341305e-07, "epoch": 0.8696626336342104, "percentage": 86.97, "elapsed_time": "0:51:41", "remaining_time": "0:07:44", "throughput": 13793.07, "total_tokens": 42774016} +{"current_steps": 13590, "total_steps": 15621, "loss": 0.3942, "lr": 1.0134314785096632e-07, "epoch": 0.8699827155751872, "percentage": 87.0, "elapsed_time": "0:51:41", "remaining_time": "0:07:43", "throughput": 13795.33, "total_tokens": 42789248} +{"current_steps": 13595, "total_steps": 15621, "loss": 0.342, "lr": 1.0085357275143359e-07, "epoch": 0.8703027975161641, "percentage": 87.03, "elapsed_time": "0:51:42", "remaining_time": "0:07:42", "throughput": 13797.61, "total_tokens": 42804608} +{"current_steps": 13600, "total_steps": 15621, "loss": 0.4964, "lr": 1.0036512024677268e-07, "epoch": 0.870622879457141, "percentage": 87.06, "elapsed_time": "0:51:42", "remaining_time": "0:07:41", "throughput": 13799.8, "total_tokens": 42819584} +{"current_steps": 13605, "total_steps": 15621, "loss": 0.2733, "lr": 9.98777909468217e-08, "epoch": 0.8709429613981179, "percentage": 87.09, "elapsed_time": "0:51:43", "remaining_time": "0:07:39", "throughput": 13802.1, "total_tokens": 42835200} +{"current_steps": 13610, "total_steps": 15621, "loss": 0.406, "lr": 9.939158546001736e-08, "epoch": 0.8712630433390948, "percentage": 87.13, "elapsed_time": "0:51:44", "remaining_time": "0:07:38", "throughput": 13804.77, "total_tokens": 42852672} +{"current_steps": 13615, "total_steps": 15621, "loss": 0.3322, "lr": 9.890650439339299e-08, "epoch": 0.8715831252800716, "percentage": 87.16, "elapsed_time": "0:51:44", "remaining_time": "0:07:37", "throughput": 13807.17, "total_tokens": 42868672} +{"current_steps": 13620, "total_steps": 15621, "loss": 0.416, "lr": 9.842254835257791e-08, "epoch": 0.8719032072210486, "percentage": 87.19, "elapsed_time": "0:51:45", "remaining_time": "0:07:36", "throughput": 13809.45, "total_tokens": 42884096} +{"current_steps": 13625, "total_steps": 15621, "loss": 0.3767, "lr": 9.793971794179679e-08, "epoch": 0.8722232891620255, "percentage": 87.22, "elapsed_time": "0:51:45", "remaining_time": "0:07:35", "throughput": 13811.61, "total_tokens": 42898752} +{"current_steps": 13630, "total_steps": 15621, "loss": 0.3417, "lr": 9.745801376386931e-08, "epoch": 0.8725433711030024, "percentage": 87.25, "elapsed_time": "0:51:46", "remaining_time": "0:07:33", "throughput": 13813.99, "total_tokens": 42914688} +{"current_steps": 13635, "total_steps": 15621, "loss": 0.3211, "lr": 9.697743642020861e-08, "epoch": 0.8728634530439793, "percentage": 87.29, "elapsed_time": "0:51:47", "remaining_time": "0:07:32", "throughput": 13816.42, "total_tokens": 42930688} +{"current_steps": 13640, "total_steps": 15621, "loss": 0.3372, "lr": 9.649798651082119e-08, "epoch": 0.8731835349849562, "percentage": 87.32, "elapsed_time": "0:51:47", "remaining_time": "0:07:31", "throughput": 13818.89, "total_tokens": 42947008} +{"current_steps": 13645, "total_steps": 15621, "loss": 0.3946, "lr": 9.601966463430588e-08, "epoch": 0.873503616925933, "percentage": 87.35, "elapsed_time": "0:51:48", "remaining_time": "0:07:30", "throughput": 13821.22, "total_tokens": 42962816} +{"current_steps": 13650, "total_steps": 15621, "loss": 0.3405, "lr": 9.554247138785321e-08, "epoch": 0.8738236988669099, "percentage": 87.38, "elapsed_time": "0:51:49", "remaining_time": "0:07:28", "throughput": 13823.36, "total_tokens": 42977664} +{"current_steps": 13655, "total_steps": 15621, "loss": 0.4684, "lr": 9.506640736724447e-08, "epoch": 0.8741437808078868, "percentage": 87.41, "elapsed_time": "0:51:49", "remaining_time": "0:07:27", "throughput": 13825.7, "total_tokens": 42993472} +{"current_steps": 13660, "total_steps": 15621, "loss": 0.3895, "lr": 9.459147316685123e-08, "epoch": 0.8744638627488637, "percentage": 87.45, "elapsed_time": "0:51:50", "remaining_time": "0:07:26", "throughput": 13828.34, "total_tokens": 43010688} +{"current_steps": 13665, "total_steps": 15621, "loss": 0.3357, "lr": 9.41176693796345e-08, "epoch": 0.8747839446898406, "percentage": 87.48, "elapsed_time": "0:51:50", "remaining_time": "0:07:25", "throughput": 13830.89, "total_tokens": 43027392} +{"current_steps": 13670, "total_steps": 15621, "loss": 0.4172, "lr": 9.364499659714364e-08, "epoch": 0.8751040266308175, "percentage": 87.51, "elapsed_time": "0:51:51", "remaining_time": "0:07:24", "throughput": 13833.21, "total_tokens": 43043008} +{"current_steps": 13675, "total_steps": 15621, "loss": 0.342, "lr": 9.31734554095165e-08, "epoch": 0.8754241085717944, "percentage": 87.54, "elapsed_time": "0:51:52", "remaining_time": "0:07:22", "throughput": 13835.61, "total_tokens": 43059072} +{"current_steps": 13680, "total_steps": 15621, "loss": 0.3481, "lr": 9.270304640547744e-08, "epoch": 0.8757441905127713, "percentage": 87.57, "elapsed_time": "0:51:52", "remaining_time": "0:07:21", "throughput": 13837.92, "total_tokens": 43074624} +{"current_steps": 13685, "total_steps": 15621, "loss": 0.3952, "lr": 9.223377017233768e-08, "epoch": 0.8760642724537482, "percentage": 87.61, "elapsed_time": "0:51:53", "remaining_time": "0:07:20", "throughput": 13840.11, "total_tokens": 43089536} +{"current_steps": 13690, "total_steps": 15621, "loss": 0.3535, "lr": 9.176562729599458e-08, "epoch": 0.8763843543947251, "percentage": 87.64, "elapsed_time": "0:51:53", "remaining_time": "0:07:19", "throughput": 13842.3, "total_tokens": 43104512} +{"current_steps": 13695, "total_steps": 15621, "loss": 0.3463, "lr": 9.129861836092944e-08, "epoch": 0.8767044363357019, "percentage": 87.67, "elapsed_time": "0:51:54", "remaining_time": "0:07:18", "throughput": 13844.71, "total_tokens": 43120640} +{"current_steps": 13700, "total_steps": 15621, "loss": 0.4422, "lr": 9.083274395020845e-08, "epoch": 0.8770245182766788, "percentage": 87.7, "elapsed_time": "0:51:55", "remaining_time": "0:07:16", "throughput": 13847.04, "total_tokens": 43136384} +{"current_steps": 13705, "total_steps": 15621, "loss": 0.4045, "lr": 9.036800464548156e-08, "epoch": 0.8773446002176557, "percentage": 87.73, "elapsed_time": "0:51:55", "remaining_time": "0:07:15", "throughput": 13849.61, "total_tokens": 43153216} +{"current_steps": 13710, "total_steps": 15621, "loss": 0.3473, "lr": 8.990440102698138e-08, "epoch": 0.8776646821586326, "percentage": 87.77, "elapsed_time": "0:51:56", "remaining_time": "0:07:14", "throughput": 13851.73, "total_tokens": 43167936} +{"current_steps": 13715, "total_steps": 15621, "loss": 0.2767, "lr": 8.944193367352182e-08, "epoch": 0.8779847640996095, "percentage": 87.8, "elapsed_time": "0:51:57", "remaining_time": "0:07:13", "throughput": 13854.07, "total_tokens": 43183872} +{"current_steps": 13720, "total_steps": 15621, "loss": 0.4057, "lr": 8.898060316249944e-08, "epoch": 0.8783048460405863, "percentage": 87.83, "elapsed_time": "0:51:57", "remaining_time": "0:07:11", "throughput": 13856.49, "total_tokens": 43200256} +{"current_steps": 13725, "total_steps": 15621, "loss": 0.3563, "lr": 8.852041006989064e-08, "epoch": 0.8786249279815633, "percentage": 87.86, "elapsed_time": "0:51:58", "remaining_time": "0:07:10", "throughput": 13859.12, "total_tokens": 43217600} +{"current_steps": 13730, "total_steps": 15621, "loss": 0.3785, "lr": 8.80613549702518e-08, "epoch": 0.8789450099225402, "percentage": 87.89, "elapsed_time": "0:51:58", "remaining_time": "0:07:09", "throughput": 13861.47, "total_tokens": 43233344} +{"current_steps": 13735, "total_steps": 15621, "loss": 0.5423, "lr": 8.760343843671824e-08, "epoch": 0.8792650918635171, "percentage": 87.93, "elapsed_time": "0:51:59", "remaining_time": "0:07:08", "throughput": 13863.8, "total_tokens": 43249280} +{"current_steps": 13740, "total_steps": 15621, "loss": 0.4461, "lr": 8.714666104100487e-08, "epoch": 0.879585173804494, "percentage": 87.96, "elapsed_time": "0:52:00", "remaining_time": "0:07:07", "throughput": 13866.14, "total_tokens": 43265024} +{"current_steps": 13745, "total_steps": 15621, "loss": 0.3544, "lr": 8.66910233534034e-08, "epoch": 0.8799052557454708, "percentage": 87.99, "elapsed_time": "0:52:00", "remaining_time": "0:07:05", "throughput": 13868.44, "total_tokens": 43280576} +{"current_steps": 13750, "total_steps": 15621, "loss": 0.3156, "lr": 8.62365259427823e-08, "epoch": 0.8802253376864477, "percentage": 88.02, "elapsed_time": "0:52:01", "remaining_time": "0:07:04", "throughput": 13870.7, "total_tokens": 43296064} +{"current_steps": 13755, "total_steps": 15621, "loss": 0.2899, "lr": 8.578316937658758e-08, "epoch": 0.8805454196274246, "percentage": 88.05, "elapsed_time": "0:52:02", "remaining_time": "0:07:03", "throughput": 13872.98, "total_tokens": 43311552} +{"current_steps": 13760, "total_steps": 15621, "loss": 0.3116, "lr": 8.533095422083992e-08, "epoch": 0.8808655015684015, "percentage": 88.09, "elapsed_time": "0:52:02", "remaining_time": "0:07:02", "throughput": 13875.12, "total_tokens": 43326272} +{"current_steps": 13765, "total_steps": 15621, "loss": 0.2906, "lr": 8.487988104013533e-08, "epoch": 0.8811855835093784, "percentage": 88.12, "elapsed_time": "0:52:03", "remaining_time": "0:07:01", "throughput": 13877.55, "total_tokens": 43342592} +{"current_steps": 13770, "total_steps": 15621, "loss": 0.3188, "lr": 8.4429950397644e-08, "epoch": 0.8815056654503552, "percentage": 88.15, "elapsed_time": "0:52:03", "remaining_time": "0:06:59", "throughput": 13879.77, "total_tokens": 43357888} +{"current_steps": 13775, "total_steps": 15621, "loss": 0.2679, "lr": 8.398116285510948e-08, "epoch": 0.8818257473913321, "percentage": 88.18, "elapsed_time": "0:52:04", "remaining_time": "0:06:58", "throughput": 13882.2, "total_tokens": 43374272} +{"current_steps": 13780, "total_steps": 15621, "loss": 0.2698, "lr": 8.353351897284844e-08, "epoch": 0.8821458293323091, "percentage": 88.21, "elapsed_time": "0:52:05", "remaining_time": "0:06:57", "throughput": 13885.12, "total_tokens": 43393280} +{"current_steps": 13785, "total_steps": 15621, "loss": 0.4762, "lr": 8.308701930974949e-08, "epoch": 0.882465911273286, "percentage": 88.25, "elapsed_time": "0:52:05", "remaining_time": "0:06:56", "throughput": 13887.58, "total_tokens": 43409600} +{"current_steps": 13790, "total_steps": 15621, "loss": 0.4038, "lr": 8.264166442327269e-08, "epoch": 0.8827859932142629, "percentage": 88.28, "elapsed_time": "0:52:06", "remaining_time": "0:06:55", "throughput": 13889.73, "total_tokens": 43424384} +{"current_steps": 13795, "total_steps": 15621, "loss": 0.2533, "lr": 8.219745486944885e-08, "epoch": 0.8831060751552398, "percentage": 88.31, "elapsed_time": "0:52:06", "remaining_time": "0:06:53", "throughput": 13892.02, "total_tokens": 43440128} +{"current_steps": 13800, "total_steps": 15621, "loss": 0.4597, "lr": 8.175439120287875e-08, "epoch": 0.8834261570962166, "percentage": 88.34, "elapsed_time": "0:52:07", "remaining_time": "0:06:52", "throughput": 13894.19, "total_tokens": 43455168} +{"current_steps": 13805, "total_steps": 15621, "loss": 0.3494, "lr": 8.131247397673269e-08, "epoch": 0.8837462390371935, "percentage": 88.37, "elapsed_time": "0:52:08", "remaining_time": "0:06:51", "throughput": 13896.73, "total_tokens": 43472064} +{"current_steps": 13810, "total_steps": 15621, "loss": 0.4333, "lr": 8.087170374274921e-08, "epoch": 0.8840663209781704, "percentage": 88.41, "elapsed_time": "0:52:08", "remaining_time": "0:06:50", "throughput": 13899.07, "total_tokens": 43488000} +{"current_steps": 13815, "total_steps": 15621, "loss": 0.2981, "lr": 8.043208105123578e-08, "epoch": 0.8843864029191473, "percentage": 88.44, "elapsed_time": "0:52:09", "remaining_time": "0:06:49", "throughput": 13901.35, "total_tokens": 43503488} +{"current_steps": 13820, "total_steps": 15621, "loss": 0.335, "lr": 7.999360645106579e-08, "epoch": 0.8847064848601242, "percentage": 88.47, "elapsed_time": "0:52:10", "remaining_time": "0:06:47", "throughput": 13903.5, "total_tokens": 43518336} +{"current_steps": 13825, "total_steps": 15621, "loss": 0.2651, "lr": 7.955628048968011e-08, "epoch": 0.885026566801101, "percentage": 88.5, "elapsed_time": "0:52:10", "remaining_time": "0:06:46", "throughput": 13905.57, "total_tokens": 43532800} +{"current_steps": 13830, "total_steps": 15621, "loss": 0.2627, "lr": 7.912010371308564e-08, "epoch": 0.885346648742078, "percentage": 88.53, "elapsed_time": "0:52:11", "remaining_time": "0:06:45", "throughput": 13907.72, "total_tokens": 43547648} +{"current_steps": 13835, "total_steps": 15621, "loss": 0.2935, "lr": 7.868507666585422e-08, "epoch": 0.8856667306830549, "percentage": 88.57, "elapsed_time": "0:52:11", "remaining_time": "0:06:44", "throughput": 13909.87, "total_tokens": 43562688} +{"current_steps": 13840, "total_steps": 15621, "loss": 0.4137, "lr": 7.825119989112172e-08, "epoch": 0.8859868126240318, "percentage": 88.6, "elapsed_time": "0:52:12", "remaining_time": "0:06:43", "throughput": 13912.14, "total_tokens": 43578176} +{"current_steps": 13845, "total_steps": 15621, "loss": 0.2938, "lr": 7.78184739305886e-08, "epoch": 0.8863068945650087, "percentage": 88.63, "elapsed_time": "0:52:12", "remaining_time": "0:06:41", "throughput": 13914.46, "total_tokens": 43593920} +{"current_steps": 13850, "total_steps": 15621, "loss": 0.3491, "lr": 7.73868993245187e-08, "epoch": 0.8866269765059855, "percentage": 88.66, "elapsed_time": "0:52:13", "remaining_time": "0:06:40", "throughput": 13917.01, "total_tokens": 43610944} +{"current_steps": 13855, "total_steps": 15621, "loss": 0.3412, "lr": 7.695647661173754e-08, "epoch": 0.8869470584469624, "percentage": 88.69, "elapsed_time": "0:52:14", "remaining_time": "0:06:39", "throughput": 13919.37, "total_tokens": 43627008} +{"current_steps": 13860, "total_steps": 15621, "loss": 0.3785, "lr": 7.652720632963284e-08, "epoch": 0.8872671403879393, "percentage": 88.73, "elapsed_time": "0:52:14", "remaining_time": "0:06:38", "throughput": 13921.67, "total_tokens": 43642752} +{"current_steps": 13865, "total_steps": 15621, "loss": 0.3396, "lr": 7.609908901415396e-08, "epoch": 0.8875872223289162, "percentage": 88.76, "elapsed_time": "0:52:15", "remaining_time": "0:06:37", "throughput": 13923.95, "total_tokens": 43658496} +{"current_steps": 13870, "total_steps": 15621, "loss": 0.4018, "lr": 7.567212519981047e-08, "epoch": 0.8879073042698931, "percentage": 88.79, "elapsed_time": "0:52:16", "remaining_time": "0:06:35", "throughput": 13926.23, "total_tokens": 43674304} +{"current_steps": 13875, "total_steps": 15621, "loss": 0.3382, "lr": 7.524631541967108e-08, "epoch": 0.8882273862108699, "percentage": 88.82, "elapsed_time": "0:52:16", "remaining_time": "0:06:34", "throughput": 13928.45, "total_tokens": 43689536} +{"current_steps": 13880, "total_steps": 15621, "loss": 0.2903, "lr": 7.482166020536485e-08, "epoch": 0.8885474681518468, "percentage": 88.85, "elapsed_time": "0:52:17", "remaining_time": "0:06:33", "throughput": 13930.96, "total_tokens": 43706496} +{"current_steps": 13885, "total_steps": 15621, "loss": 0.3108, "lr": 7.439816008707877e-08, "epoch": 0.8888675500928238, "percentage": 88.89, "elapsed_time": "0:52:17", "remaining_time": "0:06:32", "throughput": 13933.08, "total_tokens": 43721408} +{"current_steps": 13890, "total_steps": 15621, "loss": 0.3216, "lr": 7.397581559355748e-08, "epoch": 0.8891876320338007, "percentage": 88.92, "elapsed_time": "0:52:18", "remaining_time": "0:06:31", "throughput": 13935.45, "total_tokens": 43737536} +{"current_steps": 13895, "total_steps": 15621, "loss": 0.4116, "lr": 7.355462725210315e-08, "epoch": 0.8895077139747776, "percentage": 88.95, "elapsed_time": "0:52:19", "remaining_time": "0:06:29", "throughput": 13937.64, "total_tokens": 43752640} +{"current_steps": 13900, "total_steps": 15621, "loss": 0.4081, "lr": 7.313459558857438e-08, "epoch": 0.8898277959157544, "percentage": 88.98, "elapsed_time": "0:52:19", "remaining_time": "0:06:28", "throughput": 13939.93, "total_tokens": 43768384} +{"current_steps": 13905, "total_steps": 15621, "loss": 0.3108, "lr": 7.271572112738566e-08, "epoch": 0.8901478778567313, "percentage": 89.01, "elapsed_time": "0:52:20", "remaining_time": "0:06:27", "throughput": 13942.25, "total_tokens": 43784320} +{"current_steps": 13910, "total_steps": 15621, "loss": 0.3582, "lr": 7.229800439150657e-08, "epoch": 0.8904679597977082, "percentage": 89.05, "elapsed_time": "0:52:20", "remaining_time": "0:06:26", "throughput": 13944.4, "total_tokens": 43799232} +{"current_steps": 13915, "total_steps": 15621, "loss": 0.3721, "lr": 7.188144590246148e-08, "epoch": 0.8907880417386851, "percentage": 89.08, "elapsed_time": "0:52:21", "remaining_time": "0:06:25", "throughput": 13946.78, "total_tokens": 43815360} +{"current_steps": 13920, "total_steps": 15621, "loss": 0.339, "lr": 7.146604618032848e-08, "epoch": 0.891108123679662, "percentage": 89.11, "elapsed_time": "0:52:22", "remaining_time": "0:06:23", "throughput": 13948.9, "total_tokens": 43830336} +{"current_steps": 13925, "total_steps": 15621, "loss": 0.4065, "lr": 7.105180574373904e-08, "epoch": 0.8914282056206388, "percentage": 89.14, "elapsed_time": "0:52:22", "remaining_time": "0:06:22", "throughput": 13951.29, "total_tokens": 43846656} +{"current_steps": 13930, "total_steps": 15621, "loss": 0.3231, "lr": 7.063872510987712e-08, "epoch": 0.8917482875616157, "percentage": 89.17, "elapsed_time": "0:52:23", "remaining_time": "0:06:21", "throughput": 13953.63, "total_tokens": 43862720} +{"current_steps": 13935, "total_steps": 15621, "loss": 0.3558, "lr": 7.022680479447874e-08, "epoch": 0.8920683695025927, "percentage": 89.21, "elapsed_time": "0:52:24", "remaining_time": "0:06:20", "throughput": 13955.6, "total_tokens": 43876800} +{"current_steps": 13940, "total_steps": 15621, "loss": 0.2952, "lr": 6.98160453118316e-08, "epoch": 0.8923884514435696, "percentage": 89.24, "elapsed_time": "0:52:24", "remaining_time": "0:06:19", "throughput": 13957.81, "total_tokens": 43892160} +{"current_steps": 13945, "total_steps": 15621, "loss": 0.333, "lr": 6.940644717477328e-08, "epoch": 0.8927085333845465, "percentage": 89.27, "elapsed_time": "0:52:25", "remaining_time": "0:06:18", "throughput": 13960.18, "total_tokens": 43908416} +{"current_steps": 13950, "total_steps": 15621, "loss": 0.4213, "lr": 6.899801089469204e-08, "epoch": 0.8930286153255234, "percentage": 89.3, "elapsed_time": "0:52:25", "remaining_time": "0:06:16", "throughput": 13962.37, "total_tokens": 43923712} +{"current_steps": 13955, "total_steps": 15621, "loss": 0.3555, "lr": 6.85907369815254e-08, "epoch": 0.8933486972665002, "percentage": 89.33, "elapsed_time": "0:52:26", "remaining_time": "0:06:15", "throughput": 13964.65, "total_tokens": 43939520} +{"current_steps": 13960, "total_steps": 15621, "loss": 0.3895, "lr": 6.81846259437595e-08, "epoch": 0.8936687792074771, "percentage": 89.37, "elapsed_time": "0:52:27", "remaining_time": "0:06:14", "throughput": 13966.81, "total_tokens": 43954688} +{"current_steps": 13965, "total_steps": 15621, "loss": 0.3146, "lr": 6.77796782884289e-08, "epoch": 0.893988861148454, "percentage": 89.4, "elapsed_time": "0:52:27", "remaining_time": "0:06:13", "throughput": 13968.94, "total_tokens": 43969600} +{"current_steps": 13970, "total_steps": 15621, "loss": 0.3824, "lr": 6.737589452111526e-08, "epoch": 0.8943089430894309, "percentage": 89.43, "elapsed_time": "0:52:28", "remaining_time": "0:06:12", "throughput": 13971.25, "total_tokens": 43985472} +{"current_steps": 13975, "total_steps": 15621, "loss": 0.3916, "lr": 6.697327514594786e-08, "epoch": 0.8946290250304078, "percentage": 89.46, "elapsed_time": "0:52:28", "remaining_time": "0:06:10", "throughput": 13973.45, "total_tokens": 44000768} +{"current_steps": 13980, "total_steps": 15621, "loss": 0.4586, "lr": 6.657182066560118e-08, "epoch": 0.8949491069713846, "percentage": 89.49, "elapsed_time": "0:52:29", "remaining_time": "0:06:09", "throughput": 13975.8, "total_tokens": 44017088} +{"current_steps": 13985, "total_steps": 15621, "loss": 0.37, "lr": 6.617153158129596e-08, "epoch": 0.8952691889123615, "percentage": 89.53, "elapsed_time": "0:52:30", "remaining_time": "0:06:08", "throughput": 13977.84, "total_tokens": 44031488} +{"current_steps": 13990, "total_steps": 15621, "loss": 0.337, "lr": 6.577240839279807e-08, "epoch": 0.8955892708533385, "percentage": 89.56, "elapsed_time": "0:52:30", "remaining_time": "0:06:07", "throughput": 13980.12, "total_tokens": 44047296} +{"current_steps": 13995, "total_steps": 15621, "loss": 0.3143, "lr": 6.537445159841748e-08, "epoch": 0.8959093527943154, "percentage": 89.59, "elapsed_time": "0:52:31", "remaining_time": "0:06:06", "throughput": 13982.51, "total_tokens": 44063744} +{"current_steps": 14000, "total_steps": 15621, "loss": 0.3936, "lr": 6.497766169500752e-08, "epoch": 0.8962294347352923, "percentage": 89.62, "elapsed_time": "0:52:31", "remaining_time": "0:06:04", "throughput": 13984.77, "total_tokens": 44079168} +{"current_steps": 14005, "total_steps": 15621, "loss": 0.2643, "lr": 6.458203917796546e-08, "epoch": 0.8965495166762691, "percentage": 89.65, "elapsed_time": "0:52:32", "remaining_time": "0:06:03", "throughput": 13986.83, "total_tokens": 44093824} +{"current_steps": 14010, "total_steps": 15621, "loss": 0.455, "lr": 6.418758454123041e-08, "epoch": 0.896869598617246, "percentage": 89.69, "elapsed_time": "0:52:33", "remaining_time": "0:06:02", "throughput": 13989.41, "total_tokens": 44111296} +{"current_steps": 14015, "total_steps": 15621, "loss": 0.3905, "lr": 6.379429827728377e-08, "epoch": 0.8971896805582229, "percentage": 89.72, "elapsed_time": "0:52:33", "remaining_time": "0:06:01", "throughput": 13991.91, "total_tokens": 44128000} +{"current_steps": 14020, "total_steps": 15621, "loss": 0.3833, "lr": 6.340218087714799e-08, "epoch": 0.8975097624991998, "percentage": 89.75, "elapsed_time": "0:52:34", "remaining_time": "0:06:00", "throughput": 13994.12, "total_tokens": 44143488} +{"current_steps": 14025, "total_steps": 15621, "loss": 0.3567, "lr": 6.301123283038634e-08, "epoch": 0.8978298444401767, "percentage": 89.78, "elapsed_time": "0:52:35", "remaining_time": "0:05:59", "throughput": 13996.37, "total_tokens": 44158976} +{"current_steps": 14030, "total_steps": 15621, "loss": 0.319, "lr": 6.262145462510193e-08, "epoch": 0.8981499263811535, "percentage": 89.81, "elapsed_time": "0:52:35", "remaining_time": "0:05:57", "throughput": 13998.87, "total_tokens": 44175808} +{"current_steps": 14035, "total_steps": 15621, "loss": 0.2817, "lr": 6.223284674793738e-08, "epoch": 0.8984700083221304, "percentage": 89.85, "elapsed_time": "0:52:36", "remaining_time": "0:05:56", "throughput": 14000.92, "total_tokens": 44190336} +{"current_steps": 14040, "total_steps": 15621, "loss": 0.3835, "lr": 6.184540968407437e-08, "epoch": 0.8987900902631074, "percentage": 89.88, "elapsed_time": "0:52:36", "remaining_time": "0:05:55", "throughput": 14003.11, "total_tokens": 44205696} +{"current_steps": 14045, "total_steps": 15621, "loss": 0.3546, "lr": 6.145914391723239e-08, "epoch": 0.8991101722040843, "percentage": 89.91, "elapsed_time": "0:52:37", "remaining_time": "0:05:54", "throughput": 14005.49, "total_tokens": 44222016} +{"current_steps": 14050, "total_steps": 15621, "loss": 0.3285, "lr": 6.107404992966902e-08, "epoch": 0.8994302541450612, "percentage": 89.94, "elapsed_time": "0:52:38", "remaining_time": "0:05:53", "throughput": 14007.92, "total_tokens": 44238592} +{"current_steps": 14055, "total_steps": 15621, "loss": 0.2517, "lr": 6.069012820217856e-08, "epoch": 0.899750336086038, "percentage": 89.98, "elapsed_time": "0:52:38", "remaining_time": "0:05:51", "throughput": 14010.12, "total_tokens": 44254016} +{"current_steps": 14060, "total_steps": 15621, "loss": 0.3757, "lr": 6.030737921409168e-08, "epoch": 0.9000704180270149, "percentage": 90.01, "elapsed_time": "0:52:39", "remaining_time": "0:05:50", "throughput": 14012.34, "total_tokens": 44269376} +{"current_steps": 14065, "total_steps": 15621, "loss": 0.4646, "lr": 5.992580344327503e-08, "epoch": 0.9003904999679918, "percentage": 90.04, "elapsed_time": "0:52:39", "remaining_time": "0:05:49", "throughput": 14014.5, "total_tokens": 44284672} +{"current_steps": 14070, "total_steps": 15621, "loss": 0.352, "lr": 5.954540136613051e-08, "epoch": 0.9007105819089687, "percentage": 90.07, "elapsed_time": "0:52:40", "remaining_time": "0:05:48", "throughput": 14016.75, "total_tokens": 44300224} +{"current_steps": 14075, "total_steps": 15621, "loss": 0.3451, "lr": 5.916617345759456e-08, "epoch": 0.9010306638499456, "percentage": 90.1, "elapsed_time": "0:52:41", "remaining_time": "0:05:47", "throughput": 14018.9, "total_tokens": 44315264} +{"current_steps": 14076, "total_steps": 15621, "eval_loss": 0.3543796241283417, "epoch": 0.901094680238141, "percentage": 90.11, "elapsed_time": "0:53:30", "remaining_time": "0:05:52", "throughput": 13804.79, "total_tokens": 44318848} +{"current_steps": 14080, "total_steps": 15621, "loss": 0.4234, "lr": 5.878812019113766e-08, "epoch": 0.9013507457909224, "percentage": 90.14, "elapsed_time": "0:54:00", "remaining_time": "0:05:54", "throughput": 13680.08, "total_tokens": 44330176} +{"current_steps": 14085, "total_steps": 15621, "loss": 0.2892, "lr": 5.84112420387638e-08, "epoch": 0.9016708277318993, "percentage": 90.17, "elapsed_time": "0:54:01", "remaining_time": "0:05:53", "throughput": 13682.21, "total_tokens": 44345152} +{"current_steps": 14090, "total_steps": 15621, "loss": 0.3656, "lr": 5.8035539471009697e-08, "epoch": 0.9019909096728762, "percentage": 90.2, "elapsed_time": "0:54:01", "remaining_time": "0:05:52", "throughput": 13684.52, "total_tokens": 44361152} +{"current_steps": 14095, "total_steps": 15621, "loss": 0.4078, "lr": 5.7661012956944253e-08, "epoch": 0.9023109916138532, "percentage": 90.23, "elapsed_time": "0:54:02", "remaining_time": "0:05:51", "throughput": 13686.67, "total_tokens": 44376128} +{"current_steps": 14100, "total_steps": 15621, "loss": 0.2842, "lr": 5.728766296416876e-08, "epoch": 0.9026310735548301, "percentage": 90.26, "elapsed_time": "0:54:02", "remaining_time": "0:05:49", "throughput": 13689.04, "total_tokens": 44392192} +{"current_steps": 14105, "total_steps": 15621, "loss": 0.4079, "lr": 5.6915489958814453e-08, "epoch": 0.902951155495807, "percentage": 90.3, "elapsed_time": "0:54:03", "remaining_time": "0:05:48", "throughput": 13691.28, "total_tokens": 44407680} +{"current_steps": 14110, "total_steps": 15621, "loss": 0.4093, "lr": 5.654449440554399e-08, "epoch": 0.9032712374367838, "percentage": 90.33, "elapsed_time": "0:54:04", "remaining_time": "0:05:47", "throughput": 13693.74, "total_tokens": 44424384} +{"current_steps": 14115, "total_steps": 15621, "loss": 0.3752, "lr": 5.617467676754972e-08, "epoch": 0.9035913193777607, "percentage": 90.36, "elapsed_time": "0:54:04", "remaining_time": "0:05:46", "throughput": 13695.91, "total_tokens": 44439744} +{"current_steps": 14120, "total_steps": 15621, "loss": 0.3012, "lr": 5.580603750655344e-08, "epoch": 0.9039114013187376, "percentage": 90.39, "elapsed_time": "0:54:05", "remaining_time": "0:05:44", "throughput": 13697.97, "total_tokens": 44454272} +{"current_steps": 14125, "total_steps": 15621, "loss": 0.3578, "lr": 5.543857708280497e-08, "epoch": 0.9042314832597145, "percentage": 90.42, "elapsed_time": "0:54:05", "remaining_time": "0:05:43", "throughput": 13700.05, "total_tokens": 44468992} +{"current_steps": 14130, "total_steps": 15621, "loss": 0.4819, "lr": 5.507229595508367e-08, "epoch": 0.9045515652006914, "percentage": 90.46, "elapsed_time": "0:54:06", "remaining_time": "0:05:42", "throughput": 13702.33, "total_tokens": 44484864} +{"current_steps": 14135, "total_steps": 15621, "loss": 0.289, "lr": 5.4707194580695504e-08, "epoch": 0.9048716471416682, "percentage": 90.49, "elapsed_time": "0:54:07", "remaining_time": "0:05:41", "throughput": 13704.48, "total_tokens": 44499968} +{"current_steps": 14140, "total_steps": 15621, "loss": 0.4239, "lr": 5.4343273415473846e-08, "epoch": 0.9051917290826451, "percentage": 90.52, "elapsed_time": "0:54:07", "remaining_time": "0:05:40", "throughput": 13707.15, "total_tokens": 44517952} +{"current_steps": 14145, "total_steps": 15621, "loss": 0.3421, "lr": 5.3980532913778576e-08, "epoch": 0.905511811023622, "percentage": 90.55, "elapsed_time": "0:54:08", "remaining_time": "0:05:38", "throughput": 13709.29, "total_tokens": 44532928} +{"current_steps": 14150, "total_steps": 15621, "loss": 0.3955, "lr": 5.361897352849554e-08, "epoch": 0.905831892964599, "percentage": 90.58, "elapsed_time": "0:54:08", "remaining_time": "0:05:37", "throughput": 13711.47, "total_tokens": 44548288} +{"current_steps": 14155, "total_steps": 15621, "loss": 0.3331, "lr": 5.325859571103586e-08, "epoch": 0.9061519749055759, "percentage": 90.62, "elapsed_time": "0:54:09", "remaining_time": "0:05:36", "throughput": 13713.67, "total_tokens": 44563712} +{"current_steps": 14160, "total_steps": 15621, "loss": 0.3333, "lr": 5.289939991133508e-08, "epoch": 0.9064720568465527, "percentage": 90.65, "elapsed_time": "0:54:10", "remaining_time": "0:05:35", "throughput": 13715.89, "total_tokens": 44579264} +{"current_steps": 14165, "total_steps": 15621, "loss": 0.2384, "lr": 5.2541386577853895e-08, "epoch": 0.9067921387875296, "percentage": 90.68, "elapsed_time": "0:54:10", "remaining_time": "0:05:34", "throughput": 13718.0, "total_tokens": 44594176} +{"current_steps": 14170, "total_steps": 15621, "loss": 0.2502, "lr": 5.2184556157576e-08, "epoch": 0.9071122207285065, "percentage": 90.71, "elapsed_time": "0:54:11", "remaining_time": "0:05:32", "throughput": 13720.22, "total_tokens": 44609664} +{"current_steps": 14175, "total_steps": 15621, "loss": 0.3649, "lr": 5.1828909096008234e-08, "epoch": 0.9074323026694834, "percentage": 90.74, "elapsed_time": "0:54:12", "remaining_time": "0:05:31", "throughput": 13722.76, "total_tokens": 44626944} +{"current_steps": 14180, "total_steps": 15621, "loss": 0.2331, "lr": 5.14744458371803e-08, "epoch": 0.9077523846104603, "percentage": 90.78, "elapsed_time": "0:54:12", "remaining_time": "0:05:30", "throughput": 13725.16, "total_tokens": 44643520} +{"current_steps": 14185, "total_steps": 15621, "loss": 0.5075, "lr": 5.1121166823643646e-08, "epoch": 0.9080724665514371, "percentage": 90.81, "elapsed_time": "0:54:13", "remaining_time": "0:05:29", "throughput": 13727.16, "total_tokens": 44657984} +{"current_steps": 14190, "total_steps": 15621, "loss": 0.376, "lr": 5.076907249647122e-08, "epoch": 0.908392548492414, "percentage": 90.84, "elapsed_time": "0:54:13", "remaining_time": "0:05:28", "throughput": 13729.29, "total_tokens": 44673024} +{"current_steps": 14195, "total_steps": 15621, "loss": 0.412, "lr": 5.0418163295257055e-08, "epoch": 0.9087126304333909, "percentage": 90.87, "elapsed_time": "0:54:14", "remaining_time": "0:05:26", "throughput": 13731.27, "total_tokens": 44687424} +{"current_steps": 14200, "total_steps": 15621, "loss": 0.2867, "lr": 5.006843965811536e-08, "epoch": 0.9090327123743679, "percentage": 90.9, "elapsed_time": "0:54:15", "remaining_time": "0:05:25", "throughput": 13733.49, "total_tokens": 44702976} +{"current_steps": 14205, "total_steps": 15621, "loss": 0.482, "lr": 4.971990202168008e-08, "epoch": 0.9093527943153448, "percentage": 90.94, "elapsed_time": "0:54:15", "remaining_time": "0:05:24", "throughput": 13735.65, "total_tokens": 44718144} +{"current_steps": 14210, "total_steps": 15621, "loss": 0.3277, "lr": 4.9372550821104697e-08, "epoch": 0.9096728762563216, "percentage": 90.97, "elapsed_time": "0:54:16", "remaining_time": "0:05:23", "throughput": 13738.11, "total_tokens": 44734912} +{"current_steps": 14215, "total_steps": 15621, "loss": 0.311, "lr": 4.902638649006119e-08, "epoch": 0.9099929581972985, "percentage": 91.0, "elapsed_time": "0:54:16", "remaining_time": "0:05:22", "throughput": 13740.24, "total_tokens": 44749888} +{"current_steps": 14220, "total_steps": 15621, "loss": 0.3201, "lr": 4.868140946073973e-08, "epoch": 0.9103130401382754, "percentage": 91.03, "elapsed_time": "0:54:17", "remaining_time": "0:05:20", "throughput": 13742.26, "total_tokens": 44764544} +{"current_steps": 14225, "total_steps": 15621, "loss": 0.2995, "lr": 4.833762016384857e-08, "epoch": 0.9106331220792523, "percentage": 91.06, "elapsed_time": "0:54:18", "remaining_time": "0:05:19", "throughput": 13744.64, "total_tokens": 44780992} +{"current_steps": 14230, "total_steps": 15621, "loss": 0.3879, "lr": 4.799501902861214e-08, "epoch": 0.9109532040202292, "percentage": 91.1, "elapsed_time": "0:54:18", "remaining_time": "0:05:18", "throughput": 13746.83, "total_tokens": 44796672} +{"current_steps": 14235, "total_steps": 15621, "loss": 0.4313, "lr": 4.765360648277217e-08, "epoch": 0.911273285961206, "percentage": 91.13, "elapsed_time": "0:54:19", "remaining_time": "0:05:17", "throughput": 13749.04, "total_tokens": 44812224} +{"current_steps": 14240, "total_steps": 15621, "loss": 0.4254, "lr": 4.7313382952586465e-08, "epoch": 0.9115933679021829, "percentage": 91.16, "elapsed_time": "0:54:19", "remaining_time": "0:05:16", "throughput": 13751.13, "total_tokens": 44827136} +{"current_steps": 14245, "total_steps": 15621, "loss": 0.3787, "lr": 4.6974348862828027e-08, "epoch": 0.9119134498431598, "percentage": 91.19, "elapsed_time": "0:54:20", "remaining_time": "0:05:14", "throughput": 13753.24, "total_tokens": 44842176} +{"current_steps": 14250, "total_steps": 15621, "loss": 0.4211, "lr": 4.663650463678448e-08, "epoch": 0.9122335317841367, "percentage": 91.22, "elapsed_time": "0:54:21", "remaining_time": "0:05:13", "throughput": 13755.63, "total_tokens": 44858880} +{"current_steps": 14255, "total_steps": 15621, "loss": 0.4399, "lr": 4.629985069625875e-08, "epoch": 0.9125536137251137, "percentage": 91.26, "elapsed_time": "0:54:21", "remaining_time": "0:05:12", "throughput": 13758.02, "total_tokens": 44875328} +{"current_steps": 14260, "total_steps": 15621, "loss": 0.3625, "lr": 4.596438746156728e-08, "epoch": 0.9128736956660906, "percentage": 91.29, "elapsed_time": "0:54:22", "remaining_time": "0:05:11", "throughput": 13760.46, "total_tokens": 44892032} +{"current_steps": 14265, "total_steps": 15621, "loss": 0.3618, "lr": 4.563011535153949e-08, "epoch": 0.9131937776070674, "percentage": 91.32, "elapsed_time": "0:54:22", "remaining_time": "0:05:10", "throughput": 13762.61, "total_tokens": 44907328} +{"current_steps": 14270, "total_steps": 15621, "loss": 0.2686, "lr": 4.52970347835181e-08, "epoch": 0.9135138595480443, "percentage": 91.35, "elapsed_time": "0:54:23", "remaining_time": "0:05:08", "throughput": 13764.74, "total_tokens": 44922560} +{"current_steps": 14275, "total_steps": 15621, "loss": 0.3256, "lr": 4.496514617335845e-08, "epoch": 0.9138339414890212, "percentage": 91.38, "elapsed_time": "0:54:24", "remaining_time": "0:05:07", "throughput": 13766.84, "total_tokens": 44937728} +{"current_steps": 14280, "total_steps": 15621, "loss": 0.3568, "lr": 4.4634449935427197e-08, "epoch": 0.9141540234299981, "percentage": 91.42, "elapsed_time": "0:54:24", "remaining_time": "0:05:06", "throughput": 13769.25, "total_tokens": 44954560} +{"current_steps": 14285, "total_steps": 15621, "loss": 0.3032, "lr": 4.430494648260219e-08, "epoch": 0.914474105370975, "percentage": 91.45, "elapsed_time": "0:54:25", "remaining_time": "0:05:05", "throughput": 13771.69, "total_tokens": 44971520} +{"current_steps": 14290, "total_steps": 15621, "loss": 0.4391, "lr": 4.397663622627279e-08, "epoch": 0.9147941873119518, "percentage": 91.48, "elapsed_time": "0:54:26", "remaining_time": "0:05:04", "throughput": 13773.92, "total_tokens": 44987392} +{"current_steps": 14295, "total_steps": 15621, "loss": 0.3116, "lr": 4.364951957633789e-08, "epoch": 0.9151142692529287, "percentage": 91.51, "elapsed_time": "0:54:26", "remaining_time": "0:05:03", "throughput": 13776.08, "total_tokens": 45002688} +{"current_steps": 14300, "total_steps": 15621, "loss": 0.2874, "lr": 4.332359694120669e-08, "epoch": 0.9154343511939056, "percentage": 91.54, "elapsed_time": "0:54:27", "remaining_time": "0:05:01", "throughput": 13778.19, "total_tokens": 45017792} +{"current_steps": 14305, "total_steps": 15621, "loss": 0.3561, "lr": 4.299886872779734e-08, "epoch": 0.9157544331348826, "percentage": 91.58, "elapsed_time": "0:54:27", "remaining_time": "0:05:00", "throughput": 13780.24, "total_tokens": 45032640} +{"current_steps": 14310, "total_steps": 15621, "loss": 0.2945, "lr": 4.267533534153678e-08, "epoch": 0.9160745150758595, "percentage": 91.61, "elapsed_time": "0:54:28", "remaining_time": "0:04:59", "throughput": 13782.46, "total_tokens": 45048256} +{"current_steps": 14315, "total_steps": 15621, "loss": 0.3251, "lr": 4.2352997186360316e-08, "epoch": 0.9163945970168363, "percentage": 91.64, "elapsed_time": "0:54:29", "remaining_time": "0:04:58", "throughput": 13784.74, "total_tokens": 45064192} +{"current_steps": 14320, "total_steps": 15621, "loss": 0.321, "lr": 4.203185466471082e-08, "epoch": 0.9167146789578132, "percentage": 91.67, "elapsed_time": "0:54:29", "remaining_time": "0:04:57", "throughput": 13786.84, "total_tokens": 45079488} +{"current_steps": 14325, "total_steps": 15621, "loss": 0.3791, "lr": 4.1711908177538556e-08, "epoch": 0.9170347608987901, "percentage": 91.7, "elapsed_time": "0:54:30", "remaining_time": "0:04:55", "throughput": 13789.11, "total_tokens": 45095616} +{"current_steps": 14330, "total_steps": 15621, "loss": 0.3797, "lr": 4.139315812430055e-08, "epoch": 0.917354842839767, "percentage": 91.74, "elapsed_time": "0:54:30", "remaining_time": "0:04:54", "throughput": 13791.21, "total_tokens": 45110592} +{"current_steps": 14335, "total_steps": 15621, "loss": 0.3756, "lr": 4.1075604902959915e-08, "epoch": 0.9176749247807439, "percentage": 91.77, "elapsed_time": "0:54:31", "remaining_time": "0:04:53", "throughput": 13793.58, "total_tokens": 45127168} +{"current_steps": 14340, "total_steps": 15621, "loss": 0.3157, "lr": 4.07592489099855e-08, "epoch": 0.9179950067217207, "percentage": 91.8, "elapsed_time": "0:54:32", "remaining_time": "0:04:52", "throughput": 13795.66, "total_tokens": 45142208} +{"current_steps": 14345, "total_steps": 15621, "loss": 0.3917, "lr": 4.044409054035147e-08, "epoch": 0.9183150886626976, "percentage": 91.83, "elapsed_time": "0:54:32", "remaining_time": "0:04:51", "throughput": 13797.74, "total_tokens": 45157184} +{"current_steps": 14350, "total_steps": 15621, "loss": 0.3891, "lr": 4.0130130187537195e-08, "epoch": 0.9186351706036745, "percentage": 91.86, "elapsed_time": "0:54:33", "remaining_time": "0:04:49", "throughput": 13800.26, "total_tokens": 45174464} +{"current_steps": 14355, "total_steps": 15621, "loss": 0.3157, "lr": 3.981736824352522e-08, "epoch": 0.9189552525446514, "percentage": 91.9, "elapsed_time": "0:54:34", "remaining_time": "0:04:48", "throughput": 13802.24, "total_tokens": 45188992} +{"current_steps": 14360, "total_steps": 15621, "loss": 0.4661, "lr": 3.950580509880286e-08, "epoch": 0.9192753344856284, "percentage": 91.93, "elapsed_time": "0:54:34", "remaining_time": "0:04:47", "throughput": 13804.3, "total_tokens": 45204032} +{"current_steps": 14365, "total_steps": 15621, "loss": 0.4012, "lr": 3.9195441142360066e-08, "epoch": 0.9195954164266052, "percentage": 91.96, "elapsed_time": "0:54:35", "remaining_time": "0:04:46", "throughput": 13806.4, "total_tokens": 45219328} +{"current_steps": 14370, "total_steps": 15621, "loss": 0.3271, "lr": 3.888627676169043e-08, "epoch": 0.9199154983675821, "percentage": 91.99, "elapsed_time": "0:54:35", "remaining_time": "0:04:45", "throughput": 13808.71, "total_tokens": 45235584} +{"current_steps": 14375, "total_steps": 15621, "loss": 0.3709, "lr": 3.857831234278886e-08, "epoch": 0.920235580308559, "percentage": 92.02, "elapsed_time": "0:54:36", "remaining_time": "0:04:43", "throughput": 13810.83, "total_tokens": 45250880} +{"current_steps": 14380, "total_steps": 15621, "loss": 0.4085, "lr": 3.827154827015255e-08, "epoch": 0.9205556622495359, "percentage": 92.06, "elapsed_time": "0:54:37", "remaining_time": "0:04:42", "throughput": 13813.07, "total_tokens": 45266752} +{"current_steps": 14385, "total_steps": 15621, "loss": 0.2914, "lr": 3.7965984926780383e-08, "epoch": 0.9208757441905128, "percentage": 92.09, "elapsed_time": "0:54:37", "remaining_time": "0:04:41", "throughput": 13815.3, "total_tokens": 45282496} +{"current_steps": 14390, "total_steps": 15621, "loss": 0.3577, "lr": 3.766162269417139e-08, "epoch": 0.9211958261314896, "percentage": 92.12, "elapsed_time": "0:54:38", "remaining_time": "0:04:40", "throughput": 13817.28, "total_tokens": 45297024} +{"current_steps": 14395, "total_steps": 15621, "loss": 0.3693, "lr": 3.73584619523255e-08, "epoch": 0.9215159080724665, "percentage": 92.15, "elapsed_time": "0:54:38", "remaining_time": "0:04:39", "throughput": 13819.74, "total_tokens": 45314176} +{"current_steps": 14400, "total_steps": 15621, "loss": 0.3557, "lr": 3.7056503079742616e-08, "epoch": 0.9218359900134434, "percentage": 92.18, "elapsed_time": "0:54:39", "remaining_time": "0:04:38", "throughput": 13821.85, "total_tokens": 45329344} +{"current_steps": 14405, "total_steps": 15621, "loss": 0.3428, "lr": 3.6755746453421945e-08, "epoch": 0.9221560719544203, "percentage": 92.22, "elapsed_time": "0:54:40", "remaining_time": "0:04:36", "throughput": 13823.96, "total_tokens": 45344384} +{"current_steps": 14410, "total_steps": 15621, "loss": 0.2869, "lr": 3.645619244886145e-08, "epoch": 0.9224761538953972, "percentage": 92.25, "elapsed_time": "0:54:40", "remaining_time": "0:04:35", "throughput": 13826.16, "total_tokens": 45360192} +{"current_steps": 14415, "total_steps": 15621, "loss": 0.3103, "lr": 3.615784144005796e-08, "epoch": 0.9227962358363742, "percentage": 92.28, "elapsed_time": "0:54:41", "remaining_time": "0:04:34", "throughput": 13828.37, "total_tokens": 45376000} +{"current_steps": 14420, "total_steps": 15621, "loss": 0.4093, "lr": 3.5860693799506184e-08, "epoch": 0.923116317777351, "percentage": 92.31, "elapsed_time": "0:54:41", "remaining_time": "0:04:33", "throughput": 13830.33, "total_tokens": 45390400} +{"current_steps": 14425, "total_steps": 15621, "loss": 0.4518, "lr": 3.5564749898198466e-08, "epoch": 0.9234363997183279, "percentage": 92.34, "elapsed_time": "0:54:42", "remaining_time": "0:04:32", "throughput": 13832.68, "total_tokens": 45406976} +{"current_steps": 14430, "total_steps": 15621, "loss": 0.3481, "lr": 3.527001010562425e-08, "epoch": 0.9237564816593048, "percentage": 92.38, "elapsed_time": "0:54:43", "remaining_time": "0:04:30", "throughput": 13834.78, "total_tokens": 45422080} +{"current_steps": 14435, "total_steps": 15621, "loss": 0.3429, "lr": 3.4976474789769504e-08, "epoch": 0.9240765636002817, "percentage": 92.41, "elapsed_time": "0:54:43", "remaining_time": "0:04:29", "throughput": 13837.28, "total_tokens": 45439296} +{"current_steps": 14440, "total_steps": 15621, "loss": 0.2983, "lr": 3.4684144317116636e-08, "epoch": 0.9243966455412586, "percentage": 92.44, "elapsed_time": "0:54:44", "remaining_time": "0:04:28", "throughput": 13839.37, "total_tokens": 45454208} +{"current_steps": 14445, "total_steps": 15621, "loss": 0.3001, "lr": 3.439301905264369e-08, "epoch": 0.9247167274822354, "percentage": 92.47, "elapsed_time": "0:54:45", "remaining_time": "0:04:27", "throughput": 13841.65, "total_tokens": 45470400} +{"current_steps": 14450, "total_steps": 15621, "loss": 0.3212, "lr": 3.410309935982403e-08, "epoch": 0.9250368094232123, "percentage": 92.5, "elapsed_time": "0:54:45", "remaining_time": "0:04:26", "throughput": 13843.91, "total_tokens": 45486528} +{"current_steps": 14455, "total_steps": 15621, "loss": 0.3429, "lr": 3.381438560062555e-08, "epoch": 0.9253568913641892, "percentage": 92.54, "elapsed_time": "0:54:46", "remaining_time": "0:04:25", "throughput": 13845.98, "total_tokens": 45501440} +{"current_steps": 14460, "total_steps": 15621, "loss": 0.3181, "lr": 3.3526878135511025e-08, "epoch": 0.9256769733051661, "percentage": 92.57, "elapsed_time": "0:54:46", "remaining_time": "0:04:23", "throughput": 13848.3, "total_tokens": 45517760} +{"current_steps": 14465, "total_steps": 15621, "loss": 0.3642, "lr": 3.324057732343666e-08, "epoch": 0.9259970552461431, "percentage": 92.6, "elapsed_time": "0:54:47", "remaining_time": "0:04:22", "throughput": 13850.41, "total_tokens": 45533056} +{"current_steps": 14470, "total_steps": 15621, "loss": 0.4131, "lr": 3.295548352185262e-08, "epoch": 0.9263171371871199, "percentage": 92.63, "elapsed_time": "0:54:48", "remaining_time": "0:04:21", "throughput": 13852.65, "total_tokens": 45549248} +{"current_steps": 14475, "total_steps": 15621, "loss": 0.3477, "lr": 3.2671597086701753e-08, "epoch": 0.9266372191280968, "percentage": 92.66, "elapsed_time": "0:54:48", "remaining_time": "0:04:20", "throughput": 13855.02, "total_tokens": 45565760} +{"current_steps": 14480, "total_steps": 15621, "loss": 0.3246, "lr": 3.238891837241964e-08, "epoch": 0.9269573010690737, "percentage": 92.7, "elapsed_time": "0:54:49", "remaining_time": "0:04:19", "throughput": 13857.23, "total_tokens": 45581568} +{"current_steps": 14485, "total_steps": 15621, "loss": 0.4038, "lr": 3.210744773193386e-08, "epoch": 0.9272773830100506, "percentage": 92.73, "elapsed_time": "0:54:49", "remaining_time": "0:04:18", "throughput": 13859.38, "total_tokens": 45596928} +{"current_steps": 14490, "total_steps": 15621, "loss": 0.2948, "lr": 3.182718551666386e-08, "epoch": 0.9275974649510275, "percentage": 92.76, "elapsed_time": "0:54:50", "remaining_time": "0:04:16", "throughput": 13861.61, "total_tokens": 45612800} +{"current_steps": 14495, "total_steps": 15621, "loss": 0.4114, "lr": 3.154813207652063e-08, "epoch": 0.9279175468920043, "percentage": 92.79, "elapsed_time": "0:54:51", "remaining_time": "0:04:15", "throughput": 13863.66, "total_tokens": 45627584} +{"current_steps": 14500, "total_steps": 15621, "loss": 0.3379, "lr": 3.1270287759905143e-08, "epoch": 0.9282376288329812, "percentage": 92.82, "elapsed_time": "0:54:51", "remaining_time": "0:04:14", "throughput": 13865.94, "total_tokens": 45643840} +{"current_steps": 14505, "total_steps": 15621, "loss": 0.2884, "lr": 3.0993652913709476e-08, "epoch": 0.9285577107739581, "percentage": 92.86, "elapsed_time": "0:54:52", "remaining_time": "0:04:13", "throughput": 13868.06, "total_tokens": 45659072} +{"current_steps": 14510, "total_steps": 15621, "loss": 0.482, "lr": 3.0718227883315796e-08, "epoch": 0.928877792714935, "percentage": 92.89, "elapsed_time": "0:54:53", "remaining_time": "0:04:12", "throughput": 13870.36, "total_tokens": 45675328} +{"current_steps": 14515, "total_steps": 15621, "loss": 0.368, "lr": 3.044401301259503e-08, "epoch": 0.9291978746559119, "percentage": 92.92, "elapsed_time": "0:54:53", "remaining_time": "0:04:10", "throughput": 13872.51, "total_tokens": 45690816} +{"current_steps": 14520, "total_steps": 15621, "loss": 0.3333, "lr": 3.017100864390787e-08, "epoch": 0.9295179565968889, "percentage": 92.95, "elapsed_time": "0:54:54", "remaining_time": "0:04:09", "throughput": 13874.66, "total_tokens": 45706432} +{"current_steps": 14525, "total_steps": 15621, "loss": 0.3446, "lr": 2.9899215118103446e-08, "epoch": 0.9298380385378657, "percentage": 92.98, "elapsed_time": "0:54:54", "remaining_time": "0:04:08", "throughput": 13876.82, "total_tokens": 45721920} +{"current_steps": 14530, "total_steps": 15621, "loss": 0.3433, "lr": 2.9628632774519435e-08, "epoch": 0.9301581204788426, "percentage": 93.02, "elapsed_time": "0:54:55", "remaining_time": "0:04:07", "throughput": 13879.09, "total_tokens": 45738048} +{"current_steps": 14535, "total_steps": 15621, "loss": 0.3308, "lr": 2.9359261950980485e-08, "epoch": 0.9304782024198195, "percentage": 93.05, "elapsed_time": "0:54:56", "remaining_time": "0:04:06", "throughput": 13881.3, "total_tokens": 45753856} +{"current_steps": 14540, "total_steps": 15621, "loss": 0.3015, "lr": 2.90911029837998e-08, "epoch": 0.9307982843607964, "percentage": 93.08, "elapsed_time": "0:54:56", "remaining_time": "0:04:05", "throughput": 13883.35, "total_tokens": 45768704} +{"current_steps": 14545, "total_steps": 15621, "loss": 0.2789, "lr": 2.8824156207776673e-08, "epoch": 0.9311183663017732, "percentage": 93.11, "elapsed_time": "0:54:57", "remaining_time": "0:04:03", "throughput": 13885.47, "total_tokens": 45783936} +{"current_steps": 14550, "total_steps": 15621, "loss": 0.4514, "lr": 2.8558421956197397e-08, "epoch": 0.9314384482427501, "percentage": 93.14, "elapsed_time": "0:54:57", "remaining_time": "0:04:02", "throughput": 13887.78, "total_tokens": 45800320} +{"current_steps": 14555, "total_steps": 15621, "loss": 0.3864, "lr": 2.829390056083436e-08, "epoch": 0.931758530183727, "percentage": 93.18, "elapsed_time": "0:54:58", "remaining_time": "0:04:01", "throughput": 13890.05, "total_tokens": 45816512} +{"current_steps": 14560, "total_steps": 15621, "loss": 0.3037, "lr": 2.8030592351945492e-08, "epoch": 0.9320786121247039, "percentage": 93.21, "elapsed_time": "0:54:59", "remaining_time": "0:04:00", "throughput": 13892.19, "total_tokens": 45831936} +{"current_steps": 14565, "total_steps": 15621, "loss": 0.2968, "lr": 2.776849765827427e-08, "epoch": 0.9323986940656808, "percentage": 93.24, "elapsed_time": "0:54:59", "remaining_time": "0:03:59", "throughput": 13894.22, "total_tokens": 45846784} +{"current_steps": 14570, "total_steps": 15621, "loss": 0.4282, "lr": 2.750761680704905e-08, "epoch": 0.9327187760066578, "percentage": 93.27, "elapsed_time": "0:55:00", "remaining_time": "0:03:58", "throughput": 13896.32, "total_tokens": 45862080} +{"current_steps": 14575, "total_steps": 15621, "loss": 0.3937, "lr": 2.724795012398251e-08, "epoch": 0.9330388579476346, "percentage": 93.3, "elapsed_time": "0:55:00", "remaining_time": "0:03:56", "throughput": 13898.61, "total_tokens": 45878528} +{"current_steps": 14580, "total_steps": 15621, "loss": 0.3737, "lr": 2.6989497933271543e-08, "epoch": 0.9333589398886115, "percentage": 93.34, "elapsed_time": "0:55:01", "remaining_time": "0:03:55", "throughput": 13900.76, "total_tokens": 45894016} +{"current_steps": 14585, "total_steps": 15621, "loss": 0.3295, "lr": 2.673226055759692e-08, "epoch": 0.9336790218295884, "percentage": 93.37, "elapsed_time": "0:55:02", "remaining_time": "0:03:54", "throughput": 13902.87, "total_tokens": 45909504} +{"current_steps": 14590, "total_steps": 15621, "loss": 0.338, "lr": 2.6476238318122402e-08, "epoch": 0.9339991037705653, "percentage": 93.4, "elapsed_time": "0:55:02", "remaining_time": "0:03:53", "throughput": 13905.1, "total_tokens": 45925376} +{"current_steps": 14595, "total_steps": 15621, "loss": 0.3956, "lr": 2.6221431534494742e-08, "epoch": 0.9343191857115422, "percentage": 93.43, "elapsed_time": "0:55:03", "remaining_time": "0:03:52", "throughput": 13907.09, "total_tokens": 45940224} +{"current_steps": 14600, "total_steps": 15621, "loss": 0.3521, "lr": 2.5967840524843243e-08, "epoch": 0.934639267652519, "percentage": 93.46, "elapsed_time": "0:55:03", "remaining_time": "0:03:51", "throughput": 13909.12, "total_tokens": 45955072} +{"current_steps": 14605, "total_steps": 15621, "loss": 0.4287, "lr": 2.5715465605779195e-08, "epoch": 0.9349593495934959, "percentage": 93.5, "elapsed_time": "0:55:04", "remaining_time": "0:03:49", "throughput": 13911.2, "total_tokens": 45970240} +{"current_steps": 14610, "total_steps": 15621, "loss": 0.406, "lr": 2.5464307092395777e-08, "epoch": 0.9352794315344728, "percentage": 93.53, "elapsed_time": "0:55:05", "remaining_time": "0:03:48", "throughput": 13913.35, "total_tokens": 45985856} +{"current_steps": 14615, "total_steps": 15621, "loss": 0.3398, "lr": 2.5214365298267148e-08, "epoch": 0.9355995134754497, "percentage": 93.56, "elapsed_time": "0:55:05", "remaining_time": "0:03:47", "throughput": 13915.3, "total_tokens": 46000256} +{"current_steps": 14620, "total_steps": 15621, "loss": 0.32, "lr": 2.4965640535448917e-08, "epoch": 0.9359195954164266, "percentage": 93.59, "elapsed_time": "0:55:06", "remaining_time": "0:03:46", "throughput": 13917.41, "total_tokens": 46015616} +{"current_steps": 14625, "total_steps": 15621, "loss": 0.3741, "lr": 2.471813311447657e-08, "epoch": 0.9362396773574035, "percentage": 93.62, "elapsed_time": "0:55:06", "remaining_time": "0:03:45", "throughput": 13919.52, "total_tokens": 46031040} +{"current_steps": 14630, "total_steps": 15621, "loss": 0.3304, "lr": 2.4471843344365915e-08, "epoch": 0.9365597592983804, "percentage": 93.66, "elapsed_time": "0:55:07", "remaining_time": "0:03:44", "throughput": 13921.6, "total_tokens": 46046016} +{"current_steps": 14635, "total_steps": 15621, "loss": 0.2715, "lr": 2.42267715326131e-08, "epoch": 0.9368798412393573, "percentage": 93.69, "elapsed_time": "0:55:08", "remaining_time": "0:03:42", "throughput": 13923.93, "total_tokens": 46062528} +{"current_steps": 14640, "total_steps": 15621, "loss": 0.3426, "lr": 2.3982917985192697e-08, "epoch": 0.9371999231803342, "percentage": 93.72, "elapsed_time": "0:55:08", "remaining_time": "0:03:41", "throughput": 13926.11, "total_tokens": 46078144} +{"current_steps": 14645, "total_steps": 15621, "loss": 0.3748, "lr": 2.3740283006558838e-08, "epoch": 0.9375200051213111, "percentage": 93.75, "elapsed_time": "0:55:09", "remaining_time": "0:03:40", "throughput": 13928.82, "total_tokens": 46096896} +{"current_steps": 14650, "total_steps": 15621, "loss": 0.3715, "lr": 2.349886689964431e-08, "epoch": 0.9378400870622879, "percentage": 93.78, "elapsed_time": "0:55:10", "remaining_time": "0:03:39", "throughput": 13930.88, "total_tokens": 46111808} +{"current_steps": 14655, "total_steps": 15621, "loss": 0.2804, "lr": 2.32586699658599e-08, "epoch": 0.9381601690032648, "percentage": 93.82, "elapsed_time": "0:55:10", "remaining_time": "0:03:38", "throughput": 13933.14, "total_tokens": 46127936} +{"current_steps": 14660, "total_steps": 15621, "loss": 0.3522, "lr": 2.3019692505094056e-08, "epoch": 0.9384802509442417, "percentage": 93.85, "elapsed_time": "0:55:11", "remaining_time": "0:03:37", "throughput": 13935.17, "total_tokens": 46142848} +{"current_steps": 14665, "total_steps": 15621, "loss": 0.5364, "lr": 2.2781934815713223e-08, "epoch": 0.9388003328852186, "percentage": 93.88, "elapsed_time": "0:55:11", "remaining_time": "0:03:35", "throughput": 13937.36, "total_tokens": 46158848} +{"current_steps": 14670, "total_steps": 15621, "loss": 0.3566, "lr": 2.254539719456061e-08, "epoch": 0.9391204148261955, "percentage": 93.91, "elapsed_time": "0:55:12", "remaining_time": "0:03:34", "throughput": 13939.58, "total_tokens": 46174912} +{"current_steps": 14675, "total_steps": 15621, "loss": 0.2587, "lr": 2.231007993695633e-08, "epoch": 0.9394404967671725, "percentage": 93.94, "elapsed_time": "0:55:13", "remaining_time": "0:03:33", "throughput": 13941.51, "total_tokens": 46189248} +{"current_steps": 14680, "total_steps": 15621, "loss": 0.314, "lr": 2.2075983336696357e-08, "epoch": 0.9397605787081493, "percentage": 93.98, "elapsed_time": "0:55:13", "remaining_time": "0:03:32", "throughput": 13943.65, "total_tokens": 46204928} +{"current_steps": 14685, "total_steps": 15621, "loss": 0.3916, "lr": 2.1843107686053353e-08, "epoch": 0.9400806606491262, "percentage": 94.01, "elapsed_time": "0:55:14", "remaining_time": "0:03:31", "throughput": 13945.71, "total_tokens": 46220160} +{"current_steps": 14690, "total_steps": 15621, "loss": 0.4249, "lr": 2.1611453275775405e-08, "epoch": 0.9404007425901031, "percentage": 94.04, "elapsed_time": "0:55:14", "remaining_time": "0:03:30", "throughput": 13947.86, "total_tokens": 46235584} +{"current_steps": 14695, "total_steps": 15621, "loss": 0.2691, "lr": 2.138102039508538e-08, "epoch": 0.94072082453108, "percentage": 94.07, "elapsed_time": "0:55:15", "remaining_time": "0:03:28", "throughput": 13950.12, "total_tokens": 46251904} +{"current_steps": 14700, "total_steps": 15621, "loss": 0.3948, "lr": 2.1151809331681703e-08, "epoch": 0.9410409064720568, "percentage": 94.1, "elapsed_time": "0:55:16", "remaining_time": "0:03:27", "throughput": 13952.33, "total_tokens": 46268032} +{"current_steps": 14705, "total_steps": 15621, "loss": 0.3362, "lr": 2.092382037173701e-08, "epoch": 0.9413609884130337, "percentage": 94.14, "elapsed_time": "0:55:16", "remaining_time": "0:03:26", "throughput": 13954.43, "total_tokens": 46283392} +{"current_steps": 14710, "total_steps": 15621, "loss": 0.2966, "lr": 2.0697053799898277e-08, "epoch": 0.9416810703540106, "percentage": 94.17, "elapsed_time": "0:55:17", "remaining_time": "0:03:25", "throughput": 13956.52, "total_tokens": 46298752} +{"current_steps": 14715, "total_steps": 15621, "loss": 0.3392, "lr": 2.0471509899286144e-08, "epoch": 0.9420011522949875, "percentage": 94.2, "elapsed_time": "0:55:17", "remaining_time": "0:03:24", "throughput": 13958.71, "total_tokens": 46314624} +{"current_steps": 14720, "total_steps": 15621, "loss": 0.3403, "lr": 2.0247188951494797e-08, "epoch": 0.9423212342359644, "percentage": 94.23, "elapsed_time": "0:55:18", "remaining_time": "0:03:23", "throughput": 13961.11, "total_tokens": 46331712} +{"current_steps": 14725, "total_steps": 15621, "loss": 0.5398, "lr": 2.0024091236591655e-08, "epoch": 0.9426413161769412, "percentage": 94.26, "elapsed_time": "0:55:19", "remaining_time": "0:03:21", "throughput": 13963.2, "total_tokens": 46347200} +{"current_steps": 14730, "total_steps": 15621, "loss": 0.3166, "lr": 1.98022170331168e-08, "epoch": 0.9429613981179182, "percentage": 94.3, "elapsed_time": "0:55:19", "remaining_time": "0:03:20", "throughput": 13965.42, "total_tokens": 46363008} +{"current_steps": 14735, "total_steps": 15621, "loss": 0.3797, "lr": 1.9581566618082744e-08, "epoch": 0.9432814800588951, "percentage": 94.33, "elapsed_time": "0:55:20", "remaining_time": "0:03:19", "throughput": 13967.6, "total_tokens": 46378816} +{"current_steps": 14740, "total_steps": 15621, "loss": 0.3915, "lr": 1.9362140266974025e-08, "epoch": 0.943601561999872, "percentage": 94.36, "elapsed_time": "0:55:21", "remaining_time": "0:03:18", "throughput": 13969.84, "total_tokens": 46395200} +{"current_steps": 14745, "total_steps": 15621, "loss": 0.3198, "lr": 1.9143938253747383e-08, "epoch": 0.9439216439408489, "percentage": 94.39, "elapsed_time": "0:55:21", "remaining_time": "0:03:17", "throughput": 13972.16, "total_tokens": 46411840} +{"current_steps": 14750, "total_steps": 15621, "loss": 0.4515, "lr": 1.892696085083023e-08, "epoch": 0.9442417258818258, "percentage": 94.42, "elapsed_time": "0:55:22", "remaining_time": "0:03:16", "throughput": 13974.32, "total_tokens": 46427776} +{"current_steps": 14755, "total_steps": 15621, "loss": 0.3118, "lr": 1.8711208329121542e-08, "epoch": 0.9445618078228026, "percentage": 94.46, "elapsed_time": "0:55:23", "remaining_time": "0:03:15", "throughput": 13976.7, "total_tokens": 46444736} +{"current_steps": 14760, "total_steps": 15621, "loss": 0.3325, "lr": 1.849668095799084e-08, "epoch": 0.9448818897637795, "percentage": 94.49, "elapsed_time": "0:55:23", "remaining_time": "0:03:13", "throughput": 13978.9, "total_tokens": 46460672} +{"current_steps": 14765, "total_steps": 15621, "loss": 0.3344, "lr": 1.8283379005278098e-08, "epoch": 0.9452019717047564, "percentage": 94.52, "elapsed_time": "0:55:24", "remaining_time": "0:03:12", "throughput": 13981.09, "total_tokens": 46476736} +{"current_steps": 14770, "total_steps": 15621, "loss": 0.3231, "lr": 1.807130273729329e-08, "epoch": 0.9455220536457333, "percentage": 94.55, "elapsed_time": "0:55:24", "remaining_time": "0:03:11", "throughput": 13983.26, "total_tokens": 46492416} +{"current_steps": 14775, "total_steps": 15621, "loss": 0.3349, "lr": 1.7860452418816173e-08, "epoch": 0.9458421355867102, "percentage": 94.58, "elapsed_time": "0:55:25", "remaining_time": "0:03:10", "throughput": 13985.26, "total_tokens": 46507264} +{"current_steps": 14780, "total_steps": 15621, "loss": 0.3288, "lr": 1.7650828313095834e-08, "epoch": 0.946162217527687, "percentage": 94.62, "elapsed_time": "0:55:26", "remaining_time": "0:03:09", "throughput": 13987.65, "total_tokens": 46524224} +{"current_steps": 14785, "total_steps": 15621, "loss": 0.3101, "lr": 1.7442430681850362e-08, "epoch": 0.946482299468664, "percentage": 94.65, "elapsed_time": "0:55:26", "remaining_time": "0:03:08", "throughput": 13989.71, "total_tokens": 46539456} +{"current_steps": 14790, "total_steps": 15621, "loss": 0.4302, "lr": 1.723525978526652e-08, "epoch": 0.9468023814096409, "percentage": 94.68, "elapsed_time": "0:55:27", "remaining_time": "0:03:06", "throughput": 13991.84, "total_tokens": 46555136} +{"current_steps": 14795, "total_steps": 15621, "loss": 0.3501, "lr": 1.702931588199996e-08, "epoch": 0.9471224633506178, "percentage": 94.71, "elapsed_time": "0:55:27", "remaining_time": "0:03:05", "throughput": 13993.93, "total_tokens": 46570432} +{"current_steps": 14800, "total_steps": 15621, "loss": 0.3115, "lr": 1.6824599229173897e-08, "epoch": 0.9474425452915947, "percentage": 94.74, "elapsed_time": "0:55:28", "remaining_time": "0:03:04", "throughput": 13996.11, "total_tokens": 46586304} +{"current_steps": 14805, "total_steps": 15621, "loss": 0.2909, "lr": 1.662111008237932e-08, "epoch": 0.9477626272325715, "percentage": 94.78, "elapsed_time": "0:55:29", "remaining_time": "0:03:03", "throughput": 13998.33, "total_tokens": 46602432} +{"current_steps": 14810, "total_steps": 15621, "loss": 0.3218, "lr": 1.6418848695675003e-08, "epoch": 0.9480827091735484, "percentage": 94.81, "elapsed_time": "0:55:29", "remaining_time": "0:03:02", "throughput": 14000.34, "total_tokens": 46617472} +{"current_steps": 14815, "total_steps": 15621, "loss": 0.372, "lr": 1.6217815321586614e-08, "epoch": 0.9484027911145253, "percentage": 94.84, "elapsed_time": "0:55:30", "remaining_time": "0:03:01", "throughput": 14002.43, "total_tokens": 46632896} +{"current_steps": 14820, "total_steps": 15621, "loss": 0.355, "lr": 1.6018010211106602e-08, "epoch": 0.9487228730555022, "percentage": 94.87, "elapsed_time": "0:55:30", "remaining_time": "0:03:00", "throughput": 14004.71, "total_tokens": 46649408} +{"current_steps": 14825, "total_steps": 15621, "loss": 0.2816, "lr": 1.58194336136942e-08, "epoch": 0.9490429549964791, "percentage": 94.9, "elapsed_time": "0:55:31", "remaining_time": "0:02:58", "throughput": 14006.89, "total_tokens": 46665344} +{"current_steps": 14830, "total_steps": 15621, "loss": 0.4274, "lr": 1.5622085777274417e-08, "epoch": 0.9493630369374559, "percentage": 94.94, "elapsed_time": "0:55:32", "remaining_time": "0:02:57", "throughput": 14008.92, "total_tokens": 46680704} +{"current_steps": 14835, "total_steps": 15621, "loss": 0.3333, "lr": 1.542596694823839e-08, "epoch": 0.9496831188784329, "percentage": 94.97, "elapsed_time": "0:55:32", "remaining_time": "0:02:56", "throughput": 14010.97, "total_tokens": 46695936} +{"current_steps": 14840, "total_steps": 15621, "loss": 0.4259, "lr": 1.5231077371442914e-08, "epoch": 0.9500032008194098, "percentage": 95.0, "elapsed_time": "0:55:33", "remaining_time": "0:02:55", "throughput": 14013.11, "total_tokens": 46711680} +{"current_steps": 14845, "total_steps": 15621, "loss": 0.2888, "lr": 1.5037417290209685e-08, "epoch": 0.9503232827603867, "percentage": 95.03, "elapsed_time": "0:55:34", "remaining_time": "0:02:54", "throughput": 14015.18, "total_tokens": 46727040} +{"current_steps": 14850, "total_steps": 15621, "loss": 0.393, "lr": 1.4844986946325743e-08, "epoch": 0.9506433647013636, "percentage": 95.06, "elapsed_time": "0:55:34", "remaining_time": "0:02:53", "throughput": 14017.3, "total_tokens": 46742720} +{"current_steps": 14855, "total_steps": 15621, "loss": 0.2502, "lr": 1.4653786580042681e-08, "epoch": 0.9509634466423404, "percentage": 95.1, "elapsed_time": "0:55:35", "remaining_time": "0:02:51", "throughput": 14019.39, "total_tokens": 46758336} +{"current_steps": 14858, "total_steps": 15621, "eval_loss": 0.3537425398826599, "epoch": 0.9511554958069266, "percentage": 95.12, "elapsed_time": "0:56:24", "remaining_time": "0:02:53", "throughput": 13817.16, "total_tokens": 46767552} +{"current_steps": 14860, "total_steps": 15621, "loss": 0.3108, "lr": 1.4463816430076215e-08, "epoch": 0.9512835285833173, "percentage": 95.13, "elapsed_time": "0:56:46", "remaining_time": "0:02:54", "throughput": 13729.94, "total_tokens": 46773312} +{"current_steps": 14865, "total_steps": 15621, "loss": 0.3685, "lr": 1.4275076733606395e-08, "epoch": 0.9516036105242942, "percentage": 95.16, "elapsed_time": "0:56:47", "remaining_time": "0:02:53", "throughput": 13731.89, "total_tokens": 46787968} +{"current_steps": 14870, "total_steps": 15621, "loss": 0.2913, "lr": 1.4087567726277061e-08, "epoch": 0.9519236924652711, "percentage": 95.19, "elapsed_time": "0:56:47", "remaining_time": "0:02:52", "throughput": 13734.04, "total_tokens": 46803712} +{"current_steps": 14875, "total_steps": 15621, "loss": 0.2789, "lr": 1.390128964219528e-08, "epoch": 0.952243774406248, "percentage": 95.22, "elapsed_time": "0:56:48", "remaining_time": "0:02:50", "throughput": 13736.36, "total_tokens": 46820288} +{"current_steps": 14880, "total_steps": 15621, "loss": 0.3819, "lr": 1.3716242713931348e-08, "epoch": 0.9525638563472248, "percentage": 95.26, "elapsed_time": "0:56:49", "remaining_time": "0:02:49", "throughput": 13738.47, "total_tokens": 46835904} +{"current_steps": 14885, "total_steps": 15621, "loss": 0.3714, "lr": 1.3532427172518789e-08, "epoch": 0.9528839382882017, "percentage": 95.29, "elapsed_time": "0:56:49", "remaining_time": "0:02:48", "throughput": 13740.52, "total_tokens": 46851136} +{"current_steps": 14890, "total_steps": 15621, "loss": 0.3343, "lr": 1.3349843247453252e-08, "epoch": 0.9532040202291787, "percentage": 95.32, "elapsed_time": "0:56:50", "remaining_time": "0:02:47", "throughput": 13742.77, "total_tokens": 46867456} +{"current_steps": 14895, "total_steps": 15621, "loss": 0.2772, "lr": 1.3168491166692941e-08, "epoch": 0.9535241021701556, "percentage": 95.35, "elapsed_time": "0:56:50", "remaining_time": "0:02:46", "throughput": 13744.83, "total_tokens": 46882816} +{"current_steps": 14900, "total_steps": 15621, "loss": 0.4506, "lr": 1.2988371156658073e-08, "epoch": 0.9538441841111325, "percentage": 95.38, "elapsed_time": "0:56:51", "remaining_time": "0:02:45", "throughput": 13746.99, "total_tokens": 46898624} +{"current_steps": 14905, "total_steps": 15621, "loss": 0.282, "lr": 1.2809483442230763e-08, "epoch": 0.9541642660521094, "percentage": 95.42, "elapsed_time": "0:56:52", "remaining_time": "0:02:43", "throughput": 13749.12, "total_tokens": 46914304} +{"current_steps": 14910, "total_steps": 15621, "loss": 0.3705, "lr": 1.2631828246754128e-08, "epoch": 0.9544843479930862, "percentage": 95.45, "elapsed_time": "0:56:52", "remaining_time": "0:02:42", "throughput": 13751.32, "total_tokens": 46930368} +{"current_steps": 14915, "total_steps": 15621, "loss": 0.364, "lr": 1.2455405792032969e-08, "epoch": 0.9548044299340631, "percentage": 95.48, "elapsed_time": "0:56:53", "remaining_time": "0:02:41", "throughput": 13753.41, "total_tokens": 46945792} +{"current_steps": 14920, "total_steps": 15621, "loss": 0.342, "lr": 1.2280216298332646e-08, "epoch": 0.95512451187504, "percentage": 95.51, "elapsed_time": "0:56:54", "remaining_time": "0:02:40", "throughput": 13755.63, "total_tokens": 46962048} +{"current_steps": 14925, "total_steps": 15621, "loss": 0.4603, "lr": 1.2106259984379642e-08, "epoch": 0.9554445938160169, "percentage": 95.54, "elapsed_time": "0:56:54", "remaining_time": "0:02:39", "throughput": 13757.61, "total_tokens": 46976768} +{"current_steps": 14930, "total_steps": 15621, "loss": 0.4141, "lr": 1.1933537067359889e-08, "epoch": 0.9557646757569938, "percentage": 95.58, "elapsed_time": "0:56:55", "remaining_time": "0:02:38", "throughput": 13759.58, "total_tokens": 46991424} +{"current_steps": 14935, "total_steps": 15621, "loss": 0.3607, "lr": 1.1762047762920446e-08, "epoch": 0.9560847576979706, "percentage": 95.61, "elapsed_time": "0:56:55", "remaining_time": "0:02:36", "throughput": 13761.63, "total_tokens": 47006656} +{"current_steps": 14940, "total_steps": 15621, "loss": 0.3576, "lr": 1.1591792285167602e-08, "epoch": 0.9564048396389476, "percentage": 95.64, "elapsed_time": "0:56:56", "remaining_time": "0:02:35", "throughput": 13763.66, "total_tokens": 47021824} +{"current_steps": 14945, "total_steps": 15621, "loss": 0.3907, "lr": 1.1422770846667206e-08, "epoch": 0.9567249215799245, "percentage": 95.67, "elapsed_time": "0:56:56", "remaining_time": "0:02:34", "throughput": 13765.81, "total_tokens": 47037440} +{"current_steps": 14950, "total_steps": 15621, "loss": 0.307, "lr": 1.1254983658444572e-08, "epoch": 0.9570450035209014, "percentage": 95.7, "elapsed_time": "0:56:57", "remaining_time": "0:02:33", "throughput": 13768.01, "total_tokens": 47053760} +{"current_steps": 14955, "total_steps": 15621, "loss": 0.3148, "lr": 1.1088430929984017e-08, "epoch": 0.9573650854618783, "percentage": 95.74, "elapsed_time": "0:56:58", "remaining_time": "0:02:32", "throughput": 13770.03, "total_tokens": 47068928} +{"current_steps": 14960, "total_steps": 15621, "loss": 0.383, "lr": 1.0923112869228645e-08, "epoch": 0.9576851674028551, "percentage": 95.77, "elapsed_time": "0:56:58", "remaining_time": "0:02:31", "throughput": 13772.18, "total_tokens": 47084672} +{"current_steps": 14965, "total_steps": 15621, "loss": 0.3613, "lr": 1.0759029682579801e-08, "epoch": 0.958005249343832, "percentage": 95.8, "elapsed_time": "0:56:59", "remaining_time": "0:02:29", "throughput": 13774.52, "total_tokens": 47101632} +{"current_steps": 14970, "total_steps": 15621, "loss": 0.306, "lr": 1.0596181574897389e-08, "epoch": 0.9583253312848089, "percentage": 95.83, "elapsed_time": "0:57:00", "remaining_time": "0:02:28", "throughput": 13776.49, "total_tokens": 47116480} +{"current_steps": 14975, "total_steps": 15621, "loss": 0.3155, "lr": 1.0434568749499107e-08, "epoch": 0.9586454132257858, "percentage": 95.86, "elapsed_time": "0:57:00", "remaining_time": "0:02:27", "throughput": 13778.77, "total_tokens": 47132992} +{"current_steps": 14980, "total_steps": 15621, "loss": 0.3061, "lr": 1.027419140816066e-08, "epoch": 0.9589654951667627, "percentage": 95.9, "elapsed_time": "0:57:01", "remaining_time": "0:02:26", "throughput": 13780.97, "total_tokens": 47149056} +{"current_steps": 14985, "total_steps": 15621, "loss": 0.2984, "lr": 1.0115049751114768e-08, "epoch": 0.9592855771077395, "percentage": 95.93, "elapsed_time": "0:57:01", "remaining_time": "0:02:25", "throughput": 13783.12, "total_tokens": 47164864} +{"current_steps": 14990, "total_steps": 15621, "loss": 0.3481, "lr": 9.957143977051941e-09, "epoch": 0.9596056590487164, "percentage": 95.96, "elapsed_time": "0:57:02", "remaining_time": "0:02:24", "throughput": 13785.2, "total_tokens": 47180544} +{"current_steps": 14995, "total_steps": 15621, "loss": 0.3836, "lr": 9.800474283119142e-09, "epoch": 0.9599257409896934, "percentage": 95.99, "elapsed_time": "0:57:03", "remaining_time": "0:02:22", "throughput": 13787.39, "total_tokens": 47196608} +{"current_steps": 15000, "total_steps": 15621, "loss": 0.3701, "lr": 9.645040864920462e-09, "epoch": 0.9602458229306703, "percentage": 96.02, "elapsed_time": "0:57:03", "remaining_time": "0:02:21", "throughput": 13789.69, "total_tokens": 47213504} +{"current_steps": 15005, "total_steps": 15621, "loss": 0.4056, "lr": 9.490843916516334e-09, "epoch": 0.9605659048716472, "percentage": 96.06, "elapsed_time": "0:57:04", "remaining_time": "0:02:20", "throughput": 13791.68, "total_tokens": 47228288} +{"current_steps": 15010, "total_steps": 15621, "loss": 0.4448, "lr": 9.337883630423316e-09, "epoch": 0.960885986812624, "percentage": 96.09, "elapsed_time": "0:57:05", "remaining_time": "0:02:19", "throughput": 13793.75, "total_tokens": 47243712} +{"current_steps": 15015, "total_steps": 15621, "loss": 0.4909, "lr": 9.186160197614423e-09, "epoch": 0.9612060687536009, "percentage": 96.12, "elapsed_time": "0:57:05", "remaining_time": "0:02:18", "throughput": 13795.96, "total_tokens": 47259904} +{"current_steps": 15020, "total_steps": 15621, "loss": 0.4837, "lr": 9.035673807517795e-09, "epoch": 0.9615261506945778, "percentage": 96.15, "elapsed_time": "0:57:06", "remaining_time": "0:02:17", "throughput": 13797.96, "total_tokens": 47275072} +{"current_steps": 15025, "total_steps": 15621, "loss": 0.27, "lr": 8.886424648017698e-09, "epoch": 0.9618462326355547, "percentage": 96.18, "elapsed_time": "0:57:06", "remaining_time": "0:02:15", "throughput": 13800.06, "total_tokens": 47290688} +{"current_steps": 15030, "total_steps": 15621, "loss": 0.3408, "lr": 8.738412905453408e-09, "epoch": 0.9621663145765316, "percentage": 96.22, "elapsed_time": "0:57:07", "remaining_time": "0:02:14", "throughput": 13802.19, "total_tokens": 47306496} +{"current_steps": 15035, "total_steps": 15621, "loss": 0.3575, "lr": 8.591638764619324e-09, "epoch": 0.9624863965175084, "percentage": 96.25, "elapsed_time": "0:57:08", "remaining_time": "0:02:13", "throughput": 13804.16, "total_tokens": 47321280} +{"current_steps": 15040, "total_steps": 15621, "loss": 0.3623, "lr": 8.446102408764643e-09, "epoch": 0.9628064784584853, "percentage": 96.28, "elapsed_time": "0:57:08", "remaining_time": "0:02:12", "throughput": 13806.38, "total_tokens": 47337536} +{"current_steps": 15045, "total_steps": 15621, "loss": 0.273, "lr": 8.301804019593129e-09, "epoch": 0.9631265603994623, "percentage": 96.31, "elapsed_time": "0:57:09", "remaining_time": "0:02:11", "throughput": 13808.43, "total_tokens": 47353024} +{"current_steps": 15050, "total_steps": 15621, "loss": 0.3535, "lr": 8.158743777263333e-09, "epoch": 0.9634466423404392, "percentage": 96.34, "elapsed_time": "0:57:09", "remaining_time": "0:02:10", "throughput": 13810.58, "total_tokens": 47369088} +{"current_steps": 15055, "total_steps": 15621, "loss": 0.3678, "lr": 8.016921860387272e-09, "epoch": 0.9637667242814161, "percentage": 96.38, "elapsed_time": "0:57:10", "remaining_time": "0:02:08", "throughput": 13812.61, "total_tokens": 47384320} +{"current_steps": 15060, "total_steps": 15621, "loss": 0.3908, "lr": 7.876338446031416e-09, "epoch": 0.964086806222393, "percentage": 96.41, "elapsed_time": "0:57:11", "remaining_time": "0:02:07", "throughput": 13814.85, "total_tokens": 47400896} +{"current_steps": 15065, "total_steps": 15621, "loss": 0.3169, "lr": 7.736993709716033e-09, "epoch": 0.9644068881633698, "percentage": 96.44, "elapsed_time": "0:57:11", "remaining_time": "0:02:06", "throughput": 13817.01, "total_tokens": 47416896} +{"current_steps": 15070, "total_steps": 15621, "loss": 0.4783, "lr": 7.59888782541418e-09, "epoch": 0.9647269701043467, "percentage": 96.47, "elapsed_time": "0:57:12", "remaining_time": "0:02:05", "throughput": 13819.08, "total_tokens": 47432320} +{"current_steps": 15075, "total_steps": 15621, "loss": 0.2656, "lr": 7.462020965553151e-09, "epoch": 0.9650470520453236, "percentage": 96.5, "elapsed_time": "0:57:12", "remaining_time": "0:02:04", "throughput": 13821.27, "total_tokens": 47448320} +{"current_steps": 15080, "total_steps": 15621, "loss": 0.49, "lr": 7.32639330101259e-09, "epoch": 0.9653671339863005, "percentage": 96.54, "elapsed_time": "0:57:13", "remaining_time": "0:02:03", "throughput": 13823.27, "total_tokens": 47463488} +{"current_steps": 15085, "total_steps": 15621, "loss": 0.3886, "lr": 7.1920050011252675e-09, "epoch": 0.9656872159272774, "percentage": 96.57, "elapsed_time": "0:57:14", "remaining_time": "0:02:02", "throughput": 13825.37, "total_tokens": 47479104} +{"current_steps": 15090, "total_steps": 15621, "loss": 0.391, "lr": 7.058856233676525e-09, "epoch": 0.9660072978682542, "percentage": 96.6, "elapsed_time": "0:57:14", "remaining_time": "0:02:00", "throughput": 13827.78, "total_tokens": 47496448} +{"current_steps": 15095, "total_steps": 15621, "loss": 0.3733, "lr": 6.926947164904162e-09, "epoch": 0.9663273798092311, "percentage": 96.63, "elapsed_time": "0:57:15", "remaining_time": "0:01:59", "throughput": 13829.85, "total_tokens": 47511936} +{"current_steps": 15100, "total_steps": 15621, "loss": 0.3984, "lr": 6.796277959498331e-09, "epoch": 0.9666474617502081, "percentage": 96.66, "elapsed_time": "0:57:16", "remaining_time": "0:01:58", "throughput": 13832.07, "total_tokens": 47528320} +{"current_steps": 15105, "total_steps": 15621, "loss": 0.2793, "lr": 6.666848780600864e-09, "epoch": 0.966967543691185, "percentage": 96.7, "elapsed_time": "0:57:16", "remaining_time": "0:01:57", "throughput": 13834.07, "total_tokens": 47543296} +{"current_steps": 15110, "total_steps": 15621, "loss": 0.2751, "lr": 6.538659789805834e-09, "epoch": 0.9672876256321619, "percentage": 96.73, "elapsed_time": "0:57:17", "remaining_time": "0:01:56", "throughput": 13836.11, "total_tokens": 47558656} +{"current_steps": 15115, "total_steps": 15621, "loss": 0.3498, "lr": 6.411711147158438e-09, "epoch": 0.9676077075731387, "percentage": 96.76, "elapsed_time": "0:57:17", "remaining_time": "0:01:55", "throughput": 13838.28, "total_tokens": 47574720} +{"current_steps": 15120, "total_steps": 15621, "loss": 0.3107, "lr": 6.286003011155783e-09, "epoch": 0.9679277895141156, "percentage": 96.79, "elapsed_time": "0:57:18", "remaining_time": "0:01:53", "throughput": 13840.36, "total_tokens": 47590272} +{"current_steps": 15125, "total_steps": 15621, "loss": 0.4098, "lr": 6.161535538745877e-09, "epoch": 0.9682478714550925, "percentage": 96.82, "elapsed_time": "0:57:19", "remaining_time": "0:01:52", "throughput": 13842.38, "total_tokens": 47605696} +{"current_steps": 15130, "total_steps": 15621, "loss": 0.3975, "lr": 6.0383088853277475e-09, "epoch": 0.9685679533960694, "percentage": 96.86, "elapsed_time": "0:57:19", "remaining_time": "0:01:51", "throughput": 13844.58, "total_tokens": 47621760} +{"current_steps": 15135, "total_steps": 15621, "loss": 0.3081, "lr": 5.916323204751439e-09, "epoch": 0.9688880353370463, "percentage": 96.89, "elapsed_time": "0:57:20", "remaining_time": "0:01:50", "throughput": 13847.01, "total_tokens": 47639296} +{"current_steps": 15140, "total_steps": 15621, "loss": 0.2648, "lr": 5.795578649317345e-09, "epoch": 0.9692081172780231, "percentage": 96.92, "elapsed_time": "0:57:21", "remaining_time": "0:01:49", "throughput": 13849.02, "total_tokens": 47654656} +{"current_steps": 15145, "total_steps": 15621, "loss": 0.3157, "lr": 5.676075369776656e-09, "epoch": 0.969528199219, "percentage": 96.95, "elapsed_time": "0:57:21", "remaining_time": "0:01:48", "throughput": 13851.25, "total_tokens": 47671168} +{"current_steps": 15150, "total_steps": 15621, "loss": 0.3348, "lr": 5.557813515330468e-09, "epoch": 0.9698482811599769, "percentage": 96.98, "elapsed_time": "0:57:22", "remaining_time": "0:01:47", "throughput": 13853.26, "total_tokens": 47686400} +{"current_steps": 15155, "total_steps": 15621, "loss": 0.3439, "lr": 5.440793233630115e-09, "epoch": 0.9701683631009539, "percentage": 97.02, "elapsed_time": "0:57:22", "remaining_time": "0:01:45", "throughput": 13855.28, "total_tokens": 47701760} +{"current_steps": 15160, "total_steps": 15621, "loss": 0.3063, "lr": 5.325014670776951e-09, "epoch": 0.9704884450419308, "percentage": 97.05, "elapsed_time": "0:57:23", "remaining_time": "0:01:44", "throughput": 13857.31, "total_tokens": 47717248} +{"current_steps": 15165, "total_steps": 15621, "loss": 0.3599, "lr": 5.21047797132157e-09, "epoch": 0.9708085269829076, "percentage": 97.08, "elapsed_time": "0:57:24", "remaining_time": "0:01:43", "throughput": 13859.65, "total_tokens": 47734336} +{"current_steps": 15170, "total_steps": 15621, "loss": 0.3417, "lr": 5.097183278264694e-09, "epoch": 0.9711286089238845, "percentage": 97.11, "elapsed_time": "0:57:24", "remaining_time": "0:01:42", "throughput": 13861.8, "total_tokens": 47750464} +{"current_steps": 15175, "total_steps": 15621, "loss": 0.4364, "lr": 4.985130733055954e-09, "epoch": 0.9714486908648614, "percentage": 97.14, "elapsed_time": "0:57:25", "remaining_time": "0:01:41", "throughput": 13863.83, "total_tokens": 47765824} +{"current_steps": 15180, "total_steps": 15621, "loss": 0.3893, "lr": 4.874320475594107e-09, "epoch": 0.9717687728058383, "percentage": 97.18, "elapsed_time": "0:57:25", "remaining_time": "0:01:40", "throughput": 13865.95, "total_tokens": 47781760} +{"current_steps": 15185, "total_steps": 15621, "loss": 0.2832, "lr": 4.764752644227377e-09, "epoch": 0.9720888547468152, "percentage": 97.21, "elapsed_time": "0:57:26", "remaining_time": "0:01:38", "throughput": 13868.01, "total_tokens": 47797312} +{"current_steps": 15190, "total_steps": 15621, "loss": 0.3392, "lr": 4.656427375752336e-09, "epoch": 0.972408936687792, "percentage": 97.24, "elapsed_time": "0:57:27", "remaining_time": "0:01:37", "throughput": 13870.22, "total_tokens": 47813440} +{"current_steps": 15195, "total_steps": 15621, "loss": 0.34, "lr": 4.549344805414246e-09, "epoch": 0.9727290186287689, "percentage": 97.27, "elapsed_time": "0:57:27", "remaining_time": "0:01:36", "throughput": 13872.36, "total_tokens": 47829440} +{"current_steps": 15200, "total_steps": 15621, "loss": 0.4139, "lr": 4.443505066907049e-09, "epoch": 0.9730491005697458, "percentage": 97.3, "elapsed_time": "0:57:28", "remaining_time": "0:01:35", "throughput": 13874.35, "total_tokens": 47844608} +{"current_steps": 15205, "total_steps": 15621, "loss": 0.2823, "lr": 4.338908292372934e-09, "epoch": 0.9733691825107228, "percentage": 97.34, "elapsed_time": "0:57:29", "remaining_time": "0:01:34", "throughput": 13876.43, "total_tokens": 47860160} +{"current_steps": 15210, "total_steps": 15621, "loss": 0.3864, "lr": 4.235554612402214e-09, "epoch": 0.9736892644516997, "percentage": 97.37, "elapsed_time": "0:57:29", "remaining_time": "0:01:33", "throughput": 13878.49, "total_tokens": 47875648} +{"current_steps": 15215, "total_steps": 15621, "loss": 0.381, "lr": 4.133444156033006e-09, "epoch": 0.9740093463926766, "percentage": 97.4, "elapsed_time": "0:57:30", "remaining_time": "0:01:32", "throughput": 13880.83, "total_tokens": 47892736} +{"current_steps": 15220, "total_steps": 15621, "loss": 0.3145, "lr": 4.032577050751551e-09, "epoch": 0.9743294283336534, "percentage": 97.43, "elapsed_time": "0:57:30", "remaining_time": "0:01:30", "throughput": 13882.98, "total_tokens": 47908992} +{"current_steps": 15225, "total_steps": 15621, "loss": 0.3428, "lr": 3.932953422491669e-09, "epoch": 0.9746495102746303, "percentage": 97.46, "elapsed_time": "0:57:31", "remaining_time": "0:01:29", "throughput": 13885.1, "total_tokens": 47924736} +{"current_steps": 15230, "total_steps": 15621, "loss": 0.284, "lr": 3.8345733956345326e-09, "epoch": 0.9749695922156072, "percentage": 97.5, "elapsed_time": "0:57:32", "remaining_time": "0:01:28", "throughput": 13887.27, "total_tokens": 47941056} +{"current_steps": 15235, "total_steps": 15621, "loss": 0.3619, "lr": 3.737437093008777e-09, "epoch": 0.9752896741565841, "percentage": 97.53, "elapsed_time": "0:57:32", "remaining_time": "0:01:27", "throughput": 13889.52, "total_tokens": 47957824} +{"current_steps": 15240, "total_steps": 15621, "loss": 0.4107, "lr": 3.641544635890281e-09, "epoch": 0.975609756097561, "percentage": 97.56, "elapsed_time": "0:57:33", "remaining_time": "0:01:26", "throughput": 13891.5, "total_tokens": 47973056} +{"current_steps": 15245, "total_steps": 15621, "loss": 0.3896, "lr": 3.546896144001832e-09, "epoch": 0.9759298380385378, "percentage": 97.59, "elapsed_time": "0:57:34", "remaining_time": "0:01:25", "throughput": 13893.62, "total_tokens": 47988928} +{"current_steps": 15250, "total_steps": 15621, "loss": 0.3926, "lr": 3.4534917355132364e-09, "epoch": 0.9762499199795147, "percentage": 97.62, "elapsed_time": "0:57:34", "remaining_time": "0:01:24", "throughput": 13895.58, "total_tokens": 48004032} +{"current_steps": 15255, "total_steps": 15621, "loss": 0.4376, "lr": 3.361331527040878e-09, "epoch": 0.9765700019204916, "percentage": 97.66, "elapsed_time": "0:57:35", "remaining_time": "0:01:22", "throughput": 13897.82, "total_tokens": 48020800} +{"current_steps": 15260, "total_steps": 15621, "loss": 0.3935, "lr": 3.270415633647938e-09, "epoch": 0.9768900838614686, "percentage": 97.69, "elapsed_time": "0:57:35", "remaining_time": "0:01:21", "throughput": 13899.99, "total_tokens": 48036800} +{"current_steps": 15265, "total_steps": 15621, "loss": 0.2847, "lr": 3.180744168843952e-09, "epoch": 0.9772101658024455, "percentage": 97.72, "elapsed_time": "0:57:36", "remaining_time": "0:01:20", "throughput": 13901.85, "total_tokens": 48051264} +{"current_steps": 15270, "total_steps": 15621, "loss": 0.2318, "lr": 3.0923172445849187e-09, "epoch": 0.9775302477434223, "percentage": 97.75, "elapsed_time": "0:57:37", "remaining_time": "0:01:19", "throughput": 13903.81, "total_tokens": 48066176} +{"current_steps": 15275, "total_steps": 15621, "loss": 0.3178, "lr": 3.0051349712727493e-09, "epoch": 0.9778503296843992, "percentage": 97.79, "elapsed_time": "0:57:37", "remaining_time": "0:01:18", "throughput": 13905.9, "total_tokens": 48081984} +{"current_steps": 15280, "total_steps": 15621, "loss": 0.4072, "lr": 2.9191974577555954e-09, "epoch": 0.9781704116253761, "percentage": 97.82, "elapsed_time": "0:57:38", "remaining_time": "0:01:17", "throughput": 13907.84, "total_tokens": 48096896} +{"current_steps": 15285, "total_steps": 15621, "loss": 0.2334, "lr": 2.8345048113274096e-09, "epoch": 0.978490493566353, "percentage": 97.85, "elapsed_time": "0:57:38", "remaining_time": "0:01:16", "throughput": 13909.84, "total_tokens": 48112128} +{"current_steps": 15290, "total_steps": 15621, "loss": 0.3388, "lr": 2.751057137727941e-09, "epoch": 0.9788105755073299, "percentage": 97.88, "elapsed_time": "0:57:39", "remaining_time": "0:01:14", "throughput": 13911.89, "total_tokens": 48127616} +{"current_steps": 15295, "total_steps": 15621, "loss": 0.384, "lr": 2.66885454114274e-09, "epoch": 0.9791306574483067, "percentage": 97.91, "elapsed_time": "0:57:40", "remaining_time": "0:01:13", "throughput": 13913.78, "total_tokens": 48142144} +{"current_steps": 15300, "total_steps": 15621, "loss": 0.3776, "lr": 2.5878971242025983e-09, "epoch": 0.9794507393892836, "percentage": 97.95, "elapsed_time": "0:57:40", "remaining_time": "0:01:12", "throughput": 13915.92, "total_tokens": 48158272} +{"current_steps": 15305, "total_steps": 15621, "loss": 0.3239, "lr": 2.5081849879837746e-09, "epoch": 0.9797708213302605, "percentage": 97.98, "elapsed_time": "0:57:41", "remaining_time": "0:01:11", "throughput": 13917.84, "total_tokens": 48173120} +{"current_steps": 15310, "total_steps": 15621, "loss": 0.3428, "lr": 2.429718232007771e-09, "epoch": 0.9800909032712375, "percentage": 98.01, "elapsed_time": "0:57:41", "remaining_time": "0:01:10", "throughput": 13919.87, "total_tokens": 48188672} +{"current_steps": 15315, "total_steps": 15621, "loss": 0.2688, "lr": 2.3524969542414453e-09, "epoch": 0.9804109852122144, "percentage": 98.04, "elapsed_time": "0:57:42", "remaining_time": "0:01:09", "throughput": 13921.96, "total_tokens": 48204480} +{"current_steps": 15320, "total_steps": 15621, "loss": 0.3525, "lr": 2.2765212510963418e-09, "epoch": 0.9807310671531912, "percentage": 98.07, "elapsed_time": "0:57:43", "remaining_time": "0:01:08", "throughput": 13923.95, "total_tokens": 48219584} +{"current_steps": 15325, "total_steps": 15621, "loss": 0.2847, "lr": 2.2017912174289164e-09, "epoch": 0.9810511490941681, "percentage": 98.11, "elapsed_time": "0:57:43", "remaining_time": "0:01:06", "throughput": 13926.15, "total_tokens": 48235904} +{"current_steps": 15330, "total_steps": 15621, "loss": 0.4052, "lr": 2.128306946540648e-09, "epoch": 0.981371231035145, "percentage": 98.14, "elapsed_time": "0:57:44", "remaining_time": "0:01:05", "throughput": 13928.48, "total_tokens": 48252992} +{"current_steps": 15335, "total_steps": 15621, "loss": 0.3316, "lr": 2.0560685301774792e-09, "epoch": 0.9816913129761219, "percentage": 98.17, "elapsed_time": "0:57:44", "remaining_time": "0:01:04", "throughput": 13930.4, "total_tokens": 48267840} +{"current_steps": 15340, "total_steps": 15621, "loss": 0.3781, "lr": 1.985076058529933e-09, "epoch": 0.9820113949170988, "percentage": 98.2, "elapsed_time": "0:57:45", "remaining_time": "0:01:03", "throughput": 13932.32, "total_tokens": 48282688} +{"current_steps": 15345, "total_steps": 15621, "loss": 0.4768, "lr": 1.9153296202328863e-09, "epoch": 0.9823314768580756, "percentage": 98.23, "elapsed_time": "0:57:46", "remaining_time": "0:01:02", "throughput": 13934.71, "total_tokens": 48300096} +{"current_steps": 15350, "total_steps": 15621, "loss": 0.3929, "lr": 1.8468293023656823e-09, "epoch": 0.9826515587990525, "percentage": 98.27, "elapsed_time": "0:57:46", "remaining_time": "0:01:01", "throughput": 13936.67, "total_tokens": 48315136} +{"current_steps": 15355, "total_steps": 15621, "loss": 0.4052, "lr": 1.7795751904515766e-09, "epoch": 0.9829716407400294, "percentage": 98.3, "elapsed_time": "0:57:47", "remaining_time": "0:01:00", "throughput": 13938.65, "total_tokens": 48330240} +{"current_steps": 15360, "total_steps": 15621, "loss": 0.318, "lr": 1.7135673684584019e-09, "epoch": 0.9832917226810063, "percentage": 98.33, "elapsed_time": "0:57:47", "remaining_time": "0:00:58", "throughput": 13940.59, "total_tokens": 48345280} +{"current_steps": 15365, "total_steps": 15621, "loss": 0.3972, "lr": 1.6488059187974579e-09, "epoch": 0.9836118046219833, "percentage": 98.36, "elapsed_time": "0:57:48", "remaining_time": "0:00:57", "throughput": 13942.79, "total_tokens": 48361792} +{"current_steps": 15370, "total_steps": 15621, "loss": 0.4099, "lr": 1.5852909223242894e-09, "epoch": 0.9839318865629602, "percentage": 98.39, "elapsed_time": "0:57:49", "remaining_time": "0:00:56", "throughput": 13944.83, "total_tokens": 48377408} +{"current_steps": 15375, "total_steps": 15621, "loss": 0.3759, "lr": 1.5230224583380192e-09, "epoch": 0.984251968503937, "percentage": 98.43, "elapsed_time": "0:57:49", "remaining_time": "0:00:55", "throughput": 13946.86, "total_tokens": 48392896} +{"current_steps": 15380, "total_steps": 15621, "loss": 0.4663, "lr": 1.4620006045816813e-09, "epoch": 0.9845720504449139, "percentage": 98.46, "elapsed_time": "0:57:50", "remaining_time": "0:00:54", "throughput": 13948.75, "total_tokens": 48407552} +{"current_steps": 15385, "total_steps": 15621, "loss": 0.2785, "lr": 1.4022254372417774e-09, "epoch": 0.9848921323858908, "percentage": 98.49, "elapsed_time": "0:57:51", "remaining_time": "0:00:53", "throughput": 13951.0, "total_tokens": 48424320} +{"current_steps": 15390, "total_steps": 15621, "loss": 0.5093, "lr": 1.3436970309481655e-09, "epoch": 0.9852122143268677, "percentage": 98.52, "elapsed_time": "0:57:51", "remaining_time": "0:00:52", "throughput": 13953.4, "total_tokens": 48441984} +{"current_steps": 15395, "total_steps": 15621, "loss": 0.3442, "lr": 1.2864154587742815e-09, "epoch": 0.9855322962678446, "percentage": 98.55, "elapsed_time": "0:57:52", "remaining_time": "0:00:50", "throughput": 13955.31, "total_tokens": 48456832} +{"current_steps": 15400, "total_steps": 15621, "loss": 0.3608, "lr": 1.2303807922370292e-09, "epoch": 0.9858523782088214, "percentage": 98.59, "elapsed_time": "0:57:52", "remaining_time": "0:00:49", "throughput": 13957.36, "total_tokens": 48472512} +{"current_steps": 15405, "total_steps": 15621, "loss": 0.3122, "lr": 1.1755931012961128e-09, "epoch": 0.9861724601497983, "percentage": 98.62, "elapsed_time": "0:57:53", "remaining_time": "0:00:48", "throughput": 13959.51, "total_tokens": 48488832} +{"current_steps": 15410, "total_steps": 15621, "loss": 0.3491, "lr": 1.122052454354705e-09, "epoch": 0.9864925420907752, "percentage": 98.65, "elapsed_time": "0:57:54", "remaining_time": "0:00:47", "throughput": 13961.45, "total_tokens": 48503936} +{"current_steps": 15415, "total_steps": 15621, "loss": 0.4398, "lr": 1.0697589182590005e-09, "epoch": 0.9868126240317522, "percentage": 98.68, "elapsed_time": "0:57:54", "remaining_time": "0:00:46", "throughput": 13963.43, "total_tokens": 48519040} +{"current_steps": 15420, "total_steps": 15621, "loss": 0.5967, "lr": 1.018712558297996e-09, "epoch": 0.9871327059727291, "percentage": 98.71, "elapsed_time": "0:57:55", "remaining_time": "0:00:45", "throughput": 13965.55, "total_tokens": 48535040} +{"current_steps": 15425, "total_steps": 15621, "loss": 0.4383, "lr": 9.689134382037113e-10, "epoch": 0.9874527879137059, "percentage": 98.75, "elapsed_time": "0:57:55", "remaining_time": "0:00:44", "throughput": 13967.78, "total_tokens": 48551808} +{"current_steps": 15430, "total_steps": 15621, "loss": 0.3967, "lr": 9.203616201508557e-10, "epoch": 0.9877728698546828, "percentage": 98.78, "elapsed_time": "0:57:56", "remaining_time": "0:00:43", "throughput": 13969.71, "total_tokens": 48566592} +{"current_steps": 15435, "total_steps": 15621, "loss": 0.3159, "lr": 8.730571647570517e-10, "epoch": 0.9880929517956597, "percentage": 98.81, "elapsed_time": "0:57:57", "remaining_time": "0:00:41", "throughput": 13971.85, "total_tokens": 48582720} +{"current_steps": 15440, "total_steps": 15621, "loss": 0.4878, "lr": 8.270001310825003e-10, "epoch": 0.9884130337366366, "percentage": 98.84, "elapsed_time": "0:57:57", "remaining_time": "0:00:40", "throughput": 13974.02, "total_tokens": 48599104} +{"current_steps": 15445, "total_steps": 15621, "loss": 0.3118, "lr": 7.821905766297599e-10, "epoch": 0.9887331156776135, "percentage": 98.87, "elapsed_time": "0:57:58", "remaining_time": "0:00:39", "throughput": 13976.13, "total_tokens": 48615040} +{"current_steps": 15450, "total_steps": 15621, "loss": 0.3926, "lr": 7.386285573441897e-10, "epoch": 0.9890531976185903, "percentage": 98.91, "elapsed_time": "0:57:59", "remaining_time": "0:00:38", "throughput": 13978.22, "total_tokens": 48630976} +{"current_steps": 15455, "total_steps": 15621, "loss": 0.2862, "lr": 6.963141276136175e-10, "epoch": 0.9893732795595672, "percentage": 98.94, "elapsed_time": "0:57:59", "remaining_time": "0:00:37", "throughput": 13980.21, "total_tokens": 48646080} +{"current_steps": 15460, "total_steps": 15621, "loss": 0.2525, "lr": 6.552473402678949e-10, "epoch": 0.9896933615005441, "percentage": 98.97, "elapsed_time": "0:58:00", "remaining_time": "0:00:36", "throughput": 13982.4, "total_tokens": 48662528} +{"current_steps": 15465, "total_steps": 15621, "loss": 0.3301, "lr": 6.154282465794524e-10, "epoch": 0.990013443441521, "percentage": 99.0, "elapsed_time": "0:58:00", "remaining_time": "0:00:35", "throughput": 13984.77, "total_tokens": 48680000} +{"current_steps": 15470, "total_steps": 15621, "loss": 0.424, "lr": 5.768568962629672e-10, "epoch": 0.990333525382498, "percentage": 99.03, "elapsed_time": "0:58:01", "remaining_time": "0:00:33", "throughput": 13986.94, "total_tokens": 48696256} +{"current_steps": 15475, "total_steps": 15621, "loss": 0.3065, "lr": 5.395333374751398e-10, "epoch": 0.9906536073234748, "percentage": 99.07, "elapsed_time": "0:58:02", "remaining_time": "0:00:32", "throughput": 13988.87, "total_tokens": 48711168} +{"current_steps": 15480, "total_steps": 15621, "loss": 0.5309, "lr": 5.034576168149174e-10, "epoch": 0.9909736892644517, "percentage": 99.1, "elapsed_time": "0:58:02", "remaining_time": "0:00:31", "throughput": 13990.93, "total_tokens": 48726848} +{"current_steps": 15485, "total_steps": 15621, "loss": 0.4868, "lr": 4.686297793231597e-10, "epoch": 0.9912937712054286, "percentage": 99.13, "elapsed_time": "0:58:03", "remaining_time": "0:00:30", "throughput": 13993.12, "total_tokens": 48743232} +{"current_steps": 15490, "total_steps": 15621, "loss": 0.456, "lr": 4.350498684829729e-10, "epoch": 0.9916138531464055, "percentage": 99.16, "elapsed_time": "0:58:03", "remaining_time": "0:00:29", "throughput": 13995.04, "total_tokens": 48758080} +{"current_steps": 15495, "total_steps": 15621, "loss": 0.3105, "lr": 4.0271792621926483e-10, "epoch": 0.9919339350873824, "percentage": 99.19, "elapsed_time": "0:58:04", "remaining_time": "0:00:28", "throughput": 13996.99, "total_tokens": 48773120} +{"current_steps": 15500, "total_steps": 15621, "loss": 0.3815, "lr": 3.716339928987455e-10, "epoch": 0.9922540170283592, "percentage": 99.23, "elapsed_time": "0:58:05", "remaining_time": "0:00:27", "throughput": 13999.07, "total_tokens": 48789056} +{"current_steps": 15505, "total_steps": 15621, "loss": 0.4142, "lr": 3.41798107330149e-10, "epoch": 0.9925740989693361, "percentage": 99.26, "elapsed_time": "0:58:05", "remaining_time": "0:00:26", "throughput": 14001.03, "total_tokens": 48804288} +{"current_steps": 15510, "total_steps": 15621, "loss": 0.3715, "lr": 3.1321030676390027e-10, "epoch": 0.992894180910313, "percentage": 99.29, "elapsed_time": "0:58:06", "remaining_time": "0:00:24", "throughput": 14002.89, "total_tokens": 48818816} +{"current_steps": 15515, "total_steps": 15621, "loss": 0.2872, "lr": 2.8587062689222617e-10, "epoch": 0.9932142628512899, "percentage": 99.32, "elapsed_time": "0:58:06", "remaining_time": "0:00:23", "throughput": 14005.13, "total_tokens": 48835520} +{"current_steps": 15520, "total_steps": 15621, "loss": 0.3221, "lr": 2.5977910184904473e-10, "epoch": 0.9935343447922668, "percentage": 99.35, "elapsed_time": "0:58:07", "remaining_time": "0:00:22", "throughput": 14007.21, "total_tokens": 48851328} +{"current_steps": 15525, "total_steps": 15621, "loss": 0.3354, "lr": 2.3493576420985373e-10, "epoch": 0.9938544267332438, "percentage": 99.39, "elapsed_time": "0:58:08", "remaining_time": "0:00:21", "throughput": 14009.12, "total_tokens": 48866304} +{"current_steps": 15530, "total_steps": 15621, "loss": 0.3174, "lr": 2.11340644991842e-10, "epoch": 0.9941745086742206, "percentage": 99.42, "elapsed_time": "0:58:08", "remaining_time": "0:00:20", "throughput": 14011.3, "total_tokens": 48882752} +{"current_steps": 15535, "total_steps": 15621, "loss": 0.3041, "lr": 1.8899377365388936e-10, "epoch": 0.9944945906151975, "percentage": 99.45, "elapsed_time": "0:58:09", "remaining_time": "0:00:19", "throughput": 14013.31, "total_tokens": 48898304} +{"current_steps": 15540, "total_steps": 15621, "loss": 0.4202, "lr": 1.6789517809634447e-10, "epoch": 0.9948146725561744, "percentage": 99.48, "elapsed_time": "0:58:10", "remaining_time": "0:00:18", "throughput": 14015.37, "total_tokens": 48914048} +{"current_steps": 15545, "total_steps": 15621, "loss": 0.3127, "lr": 1.480448846609139e-10, "epoch": 0.9951347544971513, "percentage": 99.51, "elapsed_time": "0:58:10", "remaining_time": "0:00:17", "throughput": 14017.47, "total_tokens": 48930176} +{"current_steps": 15550, "total_steps": 15621, "loss": 0.3505, "lr": 1.294429181311063e-10, "epoch": 0.9954548364381282, "percentage": 99.55, "elapsed_time": "0:58:11", "remaining_time": "0:00:15", "throughput": 14019.5, "total_tokens": 48945920} +{"current_steps": 15555, "total_steps": 15621, "loss": 0.4079, "lr": 1.1208930173145503e-10, "epoch": 0.995774918379105, "percentage": 99.58, "elapsed_time": "0:58:11", "remaining_time": "0:00:14", "throughput": 14021.43, "total_tokens": 48960832} +{"current_steps": 15560, "total_steps": 15621, "loss": 0.3213, "lr": 9.598405712840651e-11, "epoch": 0.9960950003200819, "percentage": 99.61, "elapsed_time": "0:58:12", "remaining_time": "0:00:13", "throughput": 14023.56, "total_tokens": 48977280} +{"current_steps": 15565, "total_steps": 15621, "loss": 0.347, "lr": 8.1127204429432e-11, "epoch": 0.9964150822610588, "percentage": 99.64, "elapsed_time": "0:58:13", "remaining_time": "0:00:12", "throughput": 14025.47, "total_tokens": 48992512} +{"current_steps": 15570, "total_steps": 15621, "loss": 0.3524, "lr": 6.751876218336061e-11, "epoch": 0.9967351642020357, "percentage": 99.67, "elapsed_time": "0:58:13", "remaining_time": "0:00:11", "throughput": 14027.5, "total_tokens": 49008128} +{"current_steps": 15575, "total_steps": 15621, "loss": 0.3376, "lr": 5.515874738071247e-11, "epoch": 0.9970552461430127, "percentage": 99.71, "elapsed_time": "0:58:14", "remaining_time": "0:00:10", "throughput": 14029.66, "total_tokens": 49024512} +{"current_steps": 15580, "total_steps": 15621, "loss": 0.308, "lr": 4.404717545303249e-11, "epoch": 0.9973753280839895, "percentage": 99.74, "elapsed_time": "0:58:14", "remaining_time": "0:00:09", "throughput": 14031.67, "total_tokens": 49040128} +{"current_steps": 15585, "total_steps": 15621, "loss": 0.3099, "lr": 3.418406027322352e-11, "epoch": 0.9976954100249664, "percentage": 99.77, "elapsed_time": "0:58:15", "remaining_time": "0:00:08", "throughput": 14033.63, "total_tokens": 49055360} +{"current_steps": 15590, "total_steps": 15621, "loss": 0.3518, "lr": 2.5569414155546254e-11, "epoch": 0.9980154919659433, "percentage": 99.8, "elapsed_time": "0:58:16", "remaining_time": "0:00:06", "throughput": 14035.71, "total_tokens": 49071360} +{"current_steps": 15595, "total_steps": 15621, "loss": 0.2734, "lr": 1.8203247855397287e-11, "epoch": 0.9983355739069202, "percentage": 99.83, "elapsed_time": "0:58:16", "remaining_time": "0:00:05", "throughput": 14037.58, "total_tokens": 49086144} +{"current_steps": 15600, "total_steps": 15621, "loss": 0.395, "lr": 1.2085570569642101e-11, "epoch": 0.9986556558478971, "percentage": 99.87, "elapsed_time": "0:58:17", "remaining_time": "0:00:04", "throughput": 14039.52, "total_tokens": 49101312} +{"current_steps": 15605, "total_steps": 15621, "loss": 0.3097, "lr": 7.216389936171019e-12, "epoch": 0.9989757377888739, "percentage": 99.9, "elapsed_time": "0:58:17", "remaining_time": "0:00:03", "throughput": 14041.49, "total_tokens": 49116672} +{"current_steps": 15610, "total_steps": 15621, "loss": 0.1772, "lr": 3.5957120342322567e-12, "epoch": 0.9992958197298508, "percentage": 99.93, "elapsed_time": "0:58:18", "remaining_time": "0:00:02", "throughput": 14043.52, "total_tokens": 49132288} +{"current_steps": 15615, "total_steps": 15621, "loss": 0.3934, "lr": 1.2235413842098807e-12, "epoch": 0.9996159016708277, "percentage": 99.96, "elapsed_time": "0:58:19", "remaining_time": "0:00:01", "throughput": 14045.5, "total_tokens": 49148096} +{"current_steps": 15620, "total_steps": 15621, "loss": 0.2515, "lr": 9.98809480678986e-14, "epoch": 0.9999359836118046, "percentage": 99.99, "elapsed_time": "0:58:19", "remaining_time": "0:00:00", "throughput": 14047.56, "total_tokens": 49163840} +{"current_steps": 15621, "total_steps": 15621, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:59:06", "remaining_time": "0:00:00", "throughput": 13864.36, "total_tokens": 49166912} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..4bad594 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,25207 @@ +{ + "best_global_step": 14858, + "best_metric": 0.3537425398826599, + "best_model_checkpoint": "saves_bts_preliminary/freeze/llama-3.2-1b-instruct/train_record_42_1779354540/checkpoint-14858", + "epoch": 1.0, + "eval_steps": 782, + "global_step": 15621, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003200819409768901, + "grad_norm": 655.673828125, + "learning_rate": 5.118362124120281e-09, + "loss": 2.1603, + "num_input_tokens_seen": 15360, + "step": 5 + }, + { + "epoch": 0.0006401638819537802, + "grad_norm": 461.5277099609375, + "learning_rate": 1.1516314779270634e-08, + "loss": 2.344, + "num_input_tokens_seen": 31104, + "step": 10 + }, + { + "epoch": 0.0009602458229306702, + "grad_norm": 540.4111938476562, + "learning_rate": 1.7914267434420987e-08, + "loss": 2.115, + "num_input_tokens_seen": 46208, + "step": 15 + }, + { + "epoch": 0.0012803277639075604, + "grad_norm": 371.52410888671875, + "learning_rate": 2.431222008957134e-08, + "loss": 2.741, + "num_input_tokens_seen": 62464, + "step": 20 + }, + { + "epoch": 0.0016004097048844504, + "grad_norm": 420.4732666015625, + "learning_rate": 3.071017274472169e-08, + "loss": 2.0952, + "num_input_tokens_seen": 79104, + "step": 25 + }, + { + "epoch": 0.0019204916458613404, + "grad_norm": 360.0107421875, + "learning_rate": 3.710812539987204e-08, + "loss": 2.1934, + "num_input_tokens_seen": 94912, + "step": 30 + }, + { + "epoch": 0.0022405735868382304, + "grad_norm": 533.6338500976562, + "learning_rate": 4.350607805502239e-08, + "loss": 2.3371, + "num_input_tokens_seen": 110784, + "step": 35 + }, + { + "epoch": 0.002560655527815121, + "grad_norm": 316.48663330078125, + "learning_rate": 4.990403071017274e-08, + "loss": 2.1424, + "num_input_tokens_seen": 125696, + "step": 40 + }, + { + "epoch": 0.002880737468792011, + "grad_norm": 393.286865234375, + "learning_rate": 5.6301983365323095e-08, + "loss": 2.0945, + "num_input_tokens_seen": 140672, + "step": 45 + }, + { + "epoch": 0.003200819409768901, + "grad_norm": 386.58819580078125, + "learning_rate": 6.269993602047345e-08, + "loss": 2.0027, + "num_input_tokens_seen": 155456, + "step": 50 + }, + { + "epoch": 0.003520901350745791, + "grad_norm": 368.06884765625, + "learning_rate": 6.90978886756238e-08, + "loss": 1.915, + "num_input_tokens_seen": 170816, + "step": 55 + }, + { + "epoch": 0.003840983291722681, + "grad_norm": 332.34002685546875, + "learning_rate": 7.549584133077414e-08, + "loss": 2.0244, + "num_input_tokens_seen": 185088, + "step": 60 + }, + { + "epoch": 0.004161065232699571, + "grad_norm": 362.953125, + "learning_rate": 8.18937939859245e-08, + "loss": 1.6385, + "num_input_tokens_seen": 200384, + "step": 65 + }, + { + "epoch": 0.004481147173676461, + "grad_norm": 266.420166015625, + "learning_rate": 8.829174664107485e-08, + "loss": 1.6591, + "num_input_tokens_seen": 215744, + "step": 70 + }, + { + "epoch": 0.004801229114653352, + "grad_norm": 168.38560485839844, + "learning_rate": 9.468969929622521e-08, + "loss": 1.6555, + "num_input_tokens_seen": 230400, + "step": 75 + }, + { + "epoch": 0.005121311055630242, + "grad_norm": 282.5287780761719, + "learning_rate": 1.0108765195137556e-07, + "loss": 1.3232, + "num_input_tokens_seen": 246592, + "step": 80 + }, + { + "epoch": 0.005441392996607132, + "grad_norm": 106.96839141845703, + "learning_rate": 1.074856046065259e-07, + "loss": 1.1532, + "num_input_tokens_seen": 262272, + "step": 85 + }, + { + "epoch": 0.005761474937584022, + "grad_norm": 119.50403594970703, + "learning_rate": 1.1388355726167625e-07, + "loss": 1.0452, + "num_input_tokens_seen": 277760, + "step": 90 + }, + { + "epoch": 0.006081556878560912, + "grad_norm": 165.79542541503906, + "learning_rate": 1.202815099168266e-07, + "loss": 1.2493, + "num_input_tokens_seen": 292992, + "step": 95 + }, + { + "epoch": 0.006401638819537802, + "grad_norm": 155.2497100830078, + "learning_rate": 1.2667946257197694e-07, + "loss": 1.1191, + "num_input_tokens_seen": 307840, + "step": 100 + }, + { + "epoch": 0.006721720760514692, + "grad_norm": 112.60347747802734, + "learning_rate": 1.3307741522712732e-07, + "loss": 1.0359, + "num_input_tokens_seen": 323008, + "step": 105 + }, + { + "epoch": 0.007041802701491582, + "grad_norm": 88.95298767089844, + "learning_rate": 1.3947536788227767e-07, + "loss": 1.0546, + "num_input_tokens_seen": 339456, + "step": 110 + }, + { + "epoch": 0.007361884642468472, + "grad_norm": 87.05043029785156, + "learning_rate": 1.45873320537428e-07, + "loss": 1.1286, + "num_input_tokens_seen": 354816, + "step": 115 + }, + { + "epoch": 0.007681966583445362, + "grad_norm": 77.32754516601562, + "learning_rate": 1.5227127319257838e-07, + "loss": 0.8243, + "num_input_tokens_seen": 369472, + "step": 120 + }, + { + "epoch": 0.008002048524422252, + "grad_norm": 69.6989974975586, + "learning_rate": 1.586692258477287e-07, + "loss": 0.9582, + "num_input_tokens_seen": 384768, + "step": 125 + }, + { + "epoch": 0.008322130465399142, + "grad_norm": 96.46429443359375, + "learning_rate": 1.6506717850287908e-07, + "loss": 1.0307, + "num_input_tokens_seen": 400192, + "step": 130 + }, + { + "epoch": 0.008642212406376032, + "grad_norm": 118.02337646484375, + "learning_rate": 1.7146513115802943e-07, + "loss": 0.8953, + "num_input_tokens_seen": 416640, + "step": 135 + }, + { + "epoch": 0.008962294347352922, + "grad_norm": 65.88743591308594, + "learning_rate": 1.7786308381317976e-07, + "loss": 0.8263, + "num_input_tokens_seen": 432640, + "step": 140 + }, + { + "epoch": 0.009282376288329812, + "grad_norm": 77.22103881835938, + "learning_rate": 1.8426103646833014e-07, + "loss": 0.8971, + "num_input_tokens_seen": 448640, + "step": 145 + }, + { + "epoch": 0.009602458229306703, + "grad_norm": 88.69629669189453, + "learning_rate": 1.9065898912348046e-07, + "loss": 0.9544, + "num_input_tokens_seen": 464448, + "step": 150 + }, + { + "epoch": 0.009922540170283593, + "grad_norm": 86.22632598876953, + "learning_rate": 1.9705694177863084e-07, + "loss": 0.8598, + "num_input_tokens_seen": 479488, + "step": 155 + }, + { + "epoch": 0.010242622111260483, + "grad_norm": 55.39344787597656, + "learning_rate": 2.034548944337812e-07, + "loss": 0.7343, + "num_input_tokens_seen": 495296, + "step": 160 + }, + { + "epoch": 0.010562704052237373, + "grad_norm": 87.78097534179688, + "learning_rate": 2.0985284708893152e-07, + "loss": 0.7845, + "num_input_tokens_seen": 510144, + "step": 165 + }, + { + "epoch": 0.010882785993214263, + "grad_norm": 80.47422790527344, + "learning_rate": 2.162507997440819e-07, + "loss": 0.8491, + "num_input_tokens_seen": 524928, + "step": 170 + }, + { + "epoch": 0.011202867934191153, + "grad_norm": 45.75130081176758, + "learning_rate": 2.2264875239923222e-07, + "loss": 0.7122, + "num_input_tokens_seen": 541504, + "step": 175 + }, + { + "epoch": 0.011522949875168043, + "grad_norm": 81.46015167236328, + "learning_rate": 2.290467050543826e-07, + "loss": 0.7354, + "num_input_tokens_seen": 556096, + "step": 180 + }, + { + "epoch": 0.011843031816144933, + "grad_norm": 77.93597412109375, + "learning_rate": 2.3544465770953295e-07, + "loss": 0.734, + "num_input_tokens_seen": 572736, + "step": 185 + }, + { + "epoch": 0.012163113757121823, + "grad_norm": 73.0274658203125, + "learning_rate": 2.418426103646833e-07, + "loss": 0.8565, + "num_input_tokens_seen": 588352, + "step": 190 + }, + { + "epoch": 0.012483195698098713, + "grad_norm": 56.91474533081055, + "learning_rate": 2.4824056301983363e-07, + "loss": 0.9816, + "num_input_tokens_seen": 603520, + "step": 195 + }, + { + "epoch": 0.012803277639075603, + "grad_norm": 66.9703369140625, + "learning_rate": 2.54638515674984e-07, + "loss": 0.8158, + "num_input_tokens_seen": 619392, + "step": 200 + }, + { + "epoch": 0.013123359580052493, + "grad_norm": 59.1487922668457, + "learning_rate": 2.6103646833013433e-07, + "loss": 0.8032, + "num_input_tokens_seen": 635456, + "step": 205 + }, + { + "epoch": 0.013443441521029383, + "grad_norm": 121.4522705078125, + "learning_rate": 2.6743442098528466e-07, + "loss": 0.8716, + "num_input_tokens_seen": 650880, + "step": 210 + }, + { + "epoch": 0.013763523462006273, + "grad_norm": 50.31541442871094, + "learning_rate": 2.7383237364043504e-07, + "loss": 0.8278, + "num_input_tokens_seen": 666688, + "step": 215 + }, + { + "epoch": 0.014083605402983163, + "grad_norm": 70.05236053466797, + "learning_rate": 2.802303262955854e-07, + "loss": 0.7898, + "num_input_tokens_seen": 682112, + "step": 220 + }, + { + "epoch": 0.014403687343960053, + "grad_norm": 64.9844741821289, + "learning_rate": 2.866282789507358e-07, + "loss": 0.8381, + "num_input_tokens_seen": 697728, + "step": 225 + }, + { + "epoch": 0.014723769284936943, + "grad_norm": 53.501747131347656, + "learning_rate": 2.9302623160588607e-07, + "loss": 0.6829, + "num_input_tokens_seen": 712704, + "step": 230 + }, + { + "epoch": 0.015043851225913833, + "grad_norm": 91.16888427734375, + "learning_rate": 2.9942418426103644e-07, + "loss": 0.9619, + "num_input_tokens_seen": 729408, + "step": 235 + }, + { + "epoch": 0.015363933166890723, + "grad_norm": 89.74860382080078, + "learning_rate": 3.058221369161868e-07, + "loss": 0.7854, + "num_input_tokens_seen": 745344, + "step": 240 + }, + { + "epoch": 0.015684015107867613, + "grad_norm": 56.365665435791016, + "learning_rate": 3.1222008957133715e-07, + "loss": 0.6965, + "num_input_tokens_seen": 762688, + "step": 245 + }, + { + "epoch": 0.016004097048844503, + "grad_norm": 62.77731704711914, + "learning_rate": 3.186180422264875e-07, + "loss": 0.7105, + "num_input_tokens_seen": 779392, + "step": 250 + }, + { + "epoch": 0.016324178989821393, + "grad_norm": 80.97101593017578, + "learning_rate": 3.2501599488163785e-07, + "loss": 0.7964, + "num_input_tokens_seen": 794112, + "step": 255 + }, + { + "epoch": 0.016644260930798283, + "grad_norm": 50.28890609741211, + "learning_rate": 3.314139475367882e-07, + "loss": 0.8427, + "num_input_tokens_seen": 810112, + "step": 260 + }, + { + "epoch": 0.016964342871775173, + "grad_norm": 79.30187225341797, + "learning_rate": 3.3781190019193855e-07, + "loss": 0.8614, + "num_input_tokens_seen": 825472, + "step": 265 + }, + { + "epoch": 0.017284424812752063, + "grad_norm": 69.35704803466797, + "learning_rate": 3.4420985284708893e-07, + "loss": 0.9819, + "num_input_tokens_seen": 840128, + "step": 270 + }, + { + "epoch": 0.017604506753728953, + "grad_norm": 70.34232330322266, + "learning_rate": 3.5060780550223926e-07, + "loss": 0.7825, + "num_input_tokens_seen": 855104, + "step": 275 + }, + { + "epoch": 0.017924588694705843, + "grad_norm": 67.7530517578125, + "learning_rate": 3.570057581573896e-07, + "loss": 0.8069, + "num_input_tokens_seen": 870848, + "step": 280 + }, + { + "epoch": 0.018244670635682733, + "grad_norm": 46.21129608154297, + "learning_rate": 3.6340371081253996e-07, + "loss": 0.7403, + "num_input_tokens_seen": 885760, + "step": 285 + }, + { + "epoch": 0.018564752576659623, + "grad_norm": 44.078643798828125, + "learning_rate": 3.6980166346769034e-07, + "loss": 0.7078, + "num_input_tokens_seen": 900928, + "step": 290 + }, + { + "epoch": 0.018884834517636517, + "grad_norm": 54.419532775878906, + "learning_rate": 3.7619961612284067e-07, + "loss": 0.793, + "num_input_tokens_seen": 915968, + "step": 295 + }, + { + "epoch": 0.019204916458613407, + "grad_norm": 107.00920867919922, + "learning_rate": 3.8259756877799104e-07, + "loss": 0.9919, + "num_input_tokens_seen": 933056, + "step": 300 + }, + { + "epoch": 0.019524998399590297, + "grad_norm": 84.30803680419922, + "learning_rate": 3.889955214331414e-07, + "loss": 0.7373, + "num_input_tokens_seen": 948416, + "step": 305 + }, + { + "epoch": 0.019845080340567187, + "grad_norm": 65.89620971679688, + "learning_rate": 3.953934740882917e-07, + "loss": 0.7694, + "num_input_tokens_seen": 962880, + "step": 310 + }, + { + "epoch": 0.020165162281544077, + "grad_norm": 58.68693923950195, + "learning_rate": 4.0179142674344207e-07, + "loss": 0.8088, + "num_input_tokens_seen": 979904, + "step": 315 + }, + { + "epoch": 0.020485244222520967, + "grad_norm": 64.4815902709961, + "learning_rate": 4.0818937939859245e-07, + "loss": 0.8251, + "num_input_tokens_seen": 995136, + "step": 320 + }, + { + "epoch": 0.020805326163497857, + "grad_norm": 59.8892707824707, + "learning_rate": 4.145873320537428e-07, + "loss": 0.7695, + "num_input_tokens_seen": 1011008, + "step": 325 + }, + { + "epoch": 0.021125408104474747, + "grad_norm": 61.05699157714844, + "learning_rate": 4.2098528470889315e-07, + "loss": 0.8335, + "num_input_tokens_seen": 1025792, + "step": 330 + }, + { + "epoch": 0.021445490045451637, + "grad_norm": 54.53645324707031, + "learning_rate": 4.273832373640435e-07, + "loss": 0.6901, + "num_input_tokens_seen": 1042944, + "step": 335 + }, + { + "epoch": 0.021765571986428527, + "grad_norm": 69.49205017089844, + "learning_rate": 4.3378119001919386e-07, + "loss": 0.8267, + "num_input_tokens_seen": 1058688, + "step": 340 + }, + { + "epoch": 0.022085653927405417, + "grad_norm": 52.010841369628906, + "learning_rate": 4.401791426743442e-07, + "loss": 0.7233, + "num_input_tokens_seen": 1074560, + "step": 345 + }, + { + "epoch": 0.022405735868382307, + "grad_norm": 76.15229034423828, + "learning_rate": 4.4657709532949456e-07, + "loss": 0.6991, + "num_input_tokens_seen": 1089728, + "step": 350 + }, + { + "epoch": 0.022725817809359197, + "grad_norm": 93.6746597290039, + "learning_rate": 4.5297504798464494e-07, + "loss": 0.9114, + "num_input_tokens_seen": 1105024, + "step": 355 + }, + { + "epoch": 0.023045899750336087, + "grad_norm": 51.18860626220703, + "learning_rate": 4.593730006397952e-07, + "loss": 0.7824, + "num_input_tokens_seen": 1121088, + "step": 360 + }, + { + "epoch": 0.023365981691312977, + "grad_norm": 51.50726318359375, + "learning_rate": 4.657709532949456e-07, + "loss": 0.7048, + "num_input_tokens_seen": 1136896, + "step": 365 + }, + { + "epoch": 0.023686063632289867, + "grad_norm": 61.41284942626953, + "learning_rate": 4.7216890595009597e-07, + "loss": 0.7082, + "num_input_tokens_seen": 1153280, + "step": 370 + }, + { + "epoch": 0.024006145573266757, + "grad_norm": 56.15473175048828, + "learning_rate": 4.785668586052463e-07, + "loss": 0.8338, + "num_input_tokens_seen": 1169536, + "step": 375 + }, + { + "epoch": 0.024326227514243647, + "grad_norm": 87.2235107421875, + "learning_rate": 4.849648112603967e-07, + "loss": 0.7577, + "num_input_tokens_seen": 1185088, + "step": 380 + }, + { + "epoch": 0.024646309455220537, + "grad_norm": 35.1290397644043, + "learning_rate": 4.91362763915547e-07, + "loss": 0.6664, + "num_input_tokens_seen": 1200832, + "step": 385 + }, + { + "epoch": 0.024966391396197427, + "grad_norm": 50.34434509277344, + "learning_rate": 4.977607165706974e-07, + "loss": 0.6605, + "num_input_tokens_seen": 1216320, + "step": 390 + }, + { + "epoch": 0.025286473337174317, + "grad_norm": 61.464664459228516, + "learning_rate": 5.041586692258478e-07, + "loss": 0.7361, + "num_input_tokens_seen": 1232832, + "step": 395 + }, + { + "epoch": 0.025606555278151207, + "grad_norm": 49.942779541015625, + "learning_rate": 5.10556621880998e-07, + "loss": 0.7037, + "num_input_tokens_seen": 1248384, + "step": 400 + }, + { + "epoch": 0.025926637219128097, + "grad_norm": 35.994441986083984, + "learning_rate": 5.169545745361484e-07, + "loss": 0.6727, + "num_input_tokens_seen": 1263936, + "step": 405 + }, + { + "epoch": 0.026246719160104987, + "grad_norm": 65.4025650024414, + "learning_rate": 5.233525271912988e-07, + "loss": 1.118, + "num_input_tokens_seen": 1294208, + "step": 410 + }, + { + "epoch": 0.026566801101081877, + "grad_norm": 63.024566650390625, + "learning_rate": 5.297504798464492e-07, + "loss": 0.7921, + "num_input_tokens_seen": 1309120, + "step": 415 + }, + { + "epoch": 0.026886883042058767, + "grad_norm": 56.8184814453125, + "learning_rate": 5.361484325015994e-07, + "loss": 0.8592, + "num_input_tokens_seen": 1324224, + "step": 420 + }, + { + "epoch": 0.027206964983035656, + "grad_norm": 74.34542846679688, + "learning_rate": 5.425463851567498e-07, + "loss": 0.6829, + "num_input_tokens_seen": 1341056, + "step": 425 + }, + { + "epoch": 0.027527046924012546, + "grad_norm": 52.57733154296875, + "learning_rate": 5.489443378119002e-07, + "loss": 0.7533, + "num_input_tokens_seen": 1356544, + "step": 430 + }, + { + "epoch": 0.027847128864989436, + "grad_norm": 62.9859733581543, + "learning_rate": 5.553422904670505e-07, + "loss": 0.6696, + "num_input_tokens_seen": 1371840, + "step": 435 + }, + { + "epoch": 0.028167210805966326, + "grad_norm": 55.78180694580078, + "learning_rate": 5.61740243122201e-07, + "loss": 0.6825, + "num_input_tokens_seen": 1386816, + "step": 440 + }, + { + "epoch": 0.028487292746943216, + "grad_norm": 64.4063720703125, + "learning_rate": 5.681381957773512e-07, + "loss": 0.7438, + "num_input_tokens_seen": 1401792, + "step": 445 + }, + { + "epoch": 0.028807374687920106, + "grad_norm": 66.20137023925781, + "learning_rate": 5.745361484325015e-07, + "loss": 0.6214, + "num_input_tokens_seen": 1416896, + "step": 450 + }, + { + "epoch": 0.029127456628896996, + "grad_norm": 82.4999008178711, + "learning_rate": 5.80934101087652e-07, + "loss": 0.7517, + "num_input_tokens_seen": 1432704, + "step": 455 + }, + { + "epoch": 0.029447538569873886, + "grad_norm": 85.98738861083984, + "learning_rate": 5.873320537428022e-07, + "loss": 0.7009, + "num_input_tokens_seen": 1448384, + "step": 460 + }, + { + "epoch": 0.029767620510850776, + "grad_norm": 60.4025764465332, + "learning_rate": 5.937300063979526e-07, + "loss": 0.7179, + "num_input_tokens_seen": 1464832, + "step": 465 + }, + { + "epoch": 0.030087702451827666, + "grad_norm": 69.6055908203125, + "learning_rate": 6.00127959053103e-07, + "loss": 0.6785, + "num_input_tokens_seen": 1479424, + "step": 470 + }, + { + "epoch": 0.030407784392804556, + "grad_norm": 40.376953125, + "learning_rate": 6.065259117082533e-07, + "loss": 0.7292, + "num_input_tokens_seen": 1494336, + "step": 475 + }, + { + "epoch": 0.030727866333781446, + "grad_norm": 53.5233154296875, + "learning_rate": 6.129238643634037e-07, + "loss": 0.6741, + "num_input_tokens_seen": 1509184, + "step": 480 + }, + { + "epoch": 0.031047948274758336, + "grad_norm": 36.17082214355469, + "learning_rate": 6.19321817018554e-07, + "loss": 0.8032, + "num_input_tokens_seen": 1525504, + "step": 485 + }, + { + "epoch": 0.031368030215735226, + "grad_norm": 39.83842468261719, + "learning_rate": 6.257197696737044e-07, + "loss": 0.5911, + "num_input_tokens_seen": 1541504, + "step": 490 + }, + { + "epoch": 0.03168811215671212, + "grad_norm": 38.20148849487305, + "learning_rate": 6.321177223288548e-07, + "loss": 0.6188, + "num_input_tokens_seen": 1557184, + "step": 495 + }, + { + "epoch": 0.032008194097689006, + "grad_norm": 58.953765869140625, + "learning_rate": 6.385156749840051e-07, + "loss": 0.7662, + "num_input_tokens_seen": 1573440, + "step": 500 + }, + { + "epoch": 0.0323282760386659, + "grad_norm": 34.08373260498047, + "learning_rate": 6.449136276391554e-07, + "loss": 0.8712, + "num_input_tokens_seen": 1588736, + "step": 505 + }, + { + "epoch": 0.032648357979642786, + "grad_norm": 42.26185989379883, + "learning_rate": 6.513115802943058e-07, + "loss": 0.6979, + "num_input_tokens_seen": 1604352, + "step": 510 + }, + { + "epoch": 0.03296843992061968, + "grad_norm": 39.405391693115234, + "learning_rate": 6.577095329494562e-07, + "loss": 0.6574, + "num_input_tokens_seen": 1618816, + "step": 515 + }, + { + "epoch": 0.033288521861596566, + "grad_norm": 68.6015853881836, + "learning_rate": 6.641074856046065e-07, + "loss": 0.7462, + "num_input_tokens_seen": 1635648, + "step": 520 + }, + { + "epoch": 0.03360860380257346, + "grad_norm": 51.983734130859375, + "learning_rate": 6.705054382597568e-07, + "loss": 0.719, + "num_input_tokens_seen": 1651328, + "step": 525 + }, + { + "epoch": 0.033928685743550346, + "grad_norm": 41.2055549621582, + "learning_rate": 6.769033909149072e-07, + "loss": 0.7345, + "num_input_tokens_seen": 1668928, + "step": 530 + }, + { + "epoch": 0.03424876768452724, + "grad_norm": 49.78337478637695, + "learning_rate": 6.833013435700575e-07, + "loss": 0.6202, + "num_input_tokens_seen": 1685504, + "step": 535 + }, + { + "epoch": 0.034568849625504126, + "grad_norm": 56.454078674316406, + "learning_rate": 6.89699296225208e-07, + "loss": 0.7053, + "num_input_tokens_seen": 1701952, + "step": 540 + }, + { + "epoch": 0.03488893156648102, + "grad_norm": 57.62031173706055, + "learning_rate": 6.960972488803583e-07, + "loss": 0.7308, + "num_input_tokens_seen": 1716992, + "step": 545 + }, + { + "epoch": 0.035209013507457906, + "grad_norm": 26.579238891601562, + "learning_rate": 7.024952015355085e-07, + "loss": 0.5835, + "num_input_tokens_seen": 1732160, + "step": 550 + }, + { + "epoch": 0.0355290954484348, + "grad_norm": 62.191402435302734, + "learning_rate": 7.08893154190659e-07, + "loss": 0.6553, + "num_input_tokens_seen": 1748416, + "step": 555 + }, + { + "epoch": 0.035849177389411686, + "grad_norm": 47.643890380859375, + "learning_rate": 7.152911068458093e-07, + "loss": 0.7096, + "num_input_tokens_seen": 1763776, + "step": 560 + }, + { + "epoch": 0.03616925933038858, + "grad_norm": 53.94837188720703, + "learning_rate": 7.216890595009597e-07, + "loss": 0.6985, + "num_input_tokens_seen": 1780160, + "step": 565 + }, + { + "epoch": 0.036489341271365466, + "grad_norm": 49.82310104370117, + "learning_rate": 7.2808701215611e-07, + "loss": 0.6057, + "num_input_tokens_seen": 1795968, + "step": 570 + }, + { + "epoch": 0.03680942321234236, + "grad_norm": 45.038936614990234, + "learning_rate": 7.344849648112603e-07, + "loss": 0.6327, + "num_input_tokens_seen": 1815424, + "step": 575 + }, + { + "epoch": 0.037129505153319246, + "grad_norm": 86.25282287597656, + "learning_rate": 7.408829174664107e-07, + "loss": 0.8275, + "num_input_tokens_seen": 1831936, + "step": 580 + }, + { + "epoch": 0.03744958709429614, + "grad_norm": 33.02293014526367, + "learning_rate": 7.472808701215611e-07, + "loss": 0.6155, + "num_input_tokens_seen": 1847424, + "step": 585 + }, + { + "epoch": 0.03776966903527303, + "grad_norm": 46.377925872802734, + "learning_rate": 7.536788227767114e-07, + "loss": 0.7381, + "num_input_tokens_seen": 1862400, + "step": 590 + }, + { + "epoch": 0.03808975097624992, + "grad_norm": 83.56999969482422, + "learning_rate": 7.600767754318617e-07, + "loss": 0.7694, + "num_input_tokens_seen": 1876928, + "step": 595 + }, + { + "epoch": 0.03840983291722681, + "grad_norm": 52.600372314453125, + "learning_rate": 7.664747280870121e-07, + "loss": 0.6363, + "num_input_tokens_seen": 1892608, + "step": 600 + }, + { + "epoch": 0.0387299148582037, + "grad_norm": 35.62962341308594, + "learning_rate": 7.728726807421625e-07, + "loss": 0.7292, + "num_input_tokens_seen": 1909696, + "step": 605 + }, + { + "epoch": 0.03904999679918059, + "grad_norm": 38.61429214477539, + "learning_rate": 7.792706333973129e-07, + "loss": 0.7601, + "num_input_tokens_seen": 1924864, + "step": 610 + }, + { + "epoch": 0.03937007874015748, + "grad_norm": 35.40009689331055, + "learning_rate": 7.856685860524632e-07, + "loss": 0.5592, + "num_input_tokens_seen": 1939968, + "step": 615 + }, + { + "epoch": 0.03969016068113437, + "grad_norm": 69.34685516357422, + "learning_rate": 7.920665387076135e-07, + "loss": 0.7152, + "num_input_tokens_seen": 1955136, + "step": 620 + }, + { + "epoch": 0.04001024262211126, + "grad_norm": 42.532005310058594, + "learning_rate": 7.984644913627639e-07, + "loss": 0.7036, + "num_input_tokens_seen": 1970880, + "step": 625 + }, + { + "epoch": 0.04033032456308815, + "grad_norm": 61.369667053222656, + "learning_rate": 8.048624440179143e-07, + "loss": 0.5794, + "num_input_tokens_seen": 1986752, + "step": 630 + }, + { + "epoch": 0.04065040650406504, + "grad_norm": 39.555450439453125, + "learning_rate": 8.112603966730645e-07, + "loss": 0.5994, + "num_input_tokens_seen": 2001856, + "step": 635 + }, + { + "epoch": 0.04097048844504193, + "grad_norm": 48.69257354736328, + "learning_rate": 8.17658349328215e-07, + "loss": 0.6586, + "num_input_tokens_seen": 2019968, + "step": 640 + }, + { + "epoch": 0.04129057038601882, + "grad_norm": 57.668907165527344, + "learning_rate": 8.240563019833653e-07, + "loss": 0.7047, + "num_input_tokens_seen": 2035328, + "step": 645 + }, + { + "epoch": 0.04161065232699571, + "grad_norm": 43.12187576293945, + "learning_rate": 8.304542546385156e-07, + "loss": 0.6282, + "num_input_tokens_seen": 2055168, + "step": 650 + }, + { + "epoch": 0.0419307342679726, + "grad_norm": 43.47643280029297, + "learning_rate": 8.36852207293666e-07, + "loss": 0.7521, + "num_input_tokens_seen": 2071808, + "step": 655 + }, + { + "epoch": 0.04225081620894949, + "grad_norm": 44.587730407714844, + "learning_rate": 8.432501599488163e-07, + "loss": 0.6527, + "num_input_tokens_seen": 2087424, + "step": 660 + }, + { + "epoch": 0.04257089814992638, + "grad_norm": 35.20018768310547, + "learning_rate": 8.496481126039667e-07, + "loss": 0.7682, + "num_input_tokens_seen": 2102592, + "step": 665 + }, + { + "epoch": 0.04289098009090327, + "grad_norm": 44.10483169555664, + "learning_rate": 8.560460652591171e-07, + "loss": 0.6517, + "num_input_tokens_seen": 2119488, + "step": 670 + }, + { + "epoch": 0.04321106203188016, + "grad_norm": 37.69010543823242, + "learning_rate": 8.624440179142674e-07, + "loss": 0.6454, + "num_input_tokens_seen": 2136000, + "step": 675 + }, + { + "epoch": 0.04353114397285705, + "grad_norm": 47.20091247558594, + "learning_rate": 8.688419705694177e-07, + "loss": 0.7404, + "num_input_tokens_seen": 2152448, + "step": 680 + }, + { + "epoch": 0.04385122591383394, + "grad_norm": 44.33426284790039, + "learning_rate": 8.752399232245681e-07, + "loss": 0.6177, + "num_input_tokens_seen": 2168000, + "step": 685 + }, + { + "epoch": 0.04417130785481083, + "grad_norm": 42.24176025390625, + "learning_rate": 8.816378758797185e-07, + "loss": 0.5953, + "num_input_tokens_seen": 2183552, + "step": 690 + }, + { + "epoch": 0.04449138979578772, + "grad_norm": 52.65016174316406, + "learning_rate": 8.880358285348688e-07, + "loss": 0.7135, + "num_input_tokens_seen": 2199488, + "step": 695 + }, + { + "epoch": 0.04481147173676461, + "grad_norm": 36.20340347290039, + "learning_rate": 8.944337811900191e-07, + "loss": 0.6167, + "num_input_tokens_seen": 2215296, + "step": 700 + }, + { + "epoch": 0.0451315536777415, + "grad_norm": 50.2882080078125, + "learning_rate": 9.008317338451695e-07, + "loss": 0.7051, + "num_input_tokens_seen": 2230016, + "step": 705 + }, + { + "epoch": 0.04545163561871839, + "grad_norm": 48.945701599121094, + "learning_rate": 9.072296865003198e-07, + "loss": 0.6629, + "num_input_tokens_seen": 2245056, + "step": 710 + }, + { + "epoch": 0.04577171755969528, + "grad_norm": 35.3903923034668, + "learning_rate": 9.136276391554703e-07, + "loss": 0.6166, + "num_input_tokens_seen": 2261248, + "step": 715 + }, + { + "epoch": 0.04609179950067217, + "grad_norm": 57.04933547973633, + "learning_rate": 9.200255918106205e-07, + "loss": 0.6516, + "num_input_tokens_seen": 2278016, + "step": 720 + }, + { + "epoch": 0.04641188144164906, + "grad_norm": 46.2874755859375, + "learning_rate": 9.264235444657708e-07, + "loss": 0.5696, + "num_input_tokens_seen": 2292800, + "step": 725 + }, + { + "epoch": 0.04673196338262595, + "grad_norm": 34.89374542236328, + "learning_rate": 9.328214971209213e-07, + "loss": 0.6049, + "num_input_tokens_seen": 2308224, + "step": 730 + }, + { + "epoch": 0.04705204532360284, + "grad_norm": 39.93567657470703, + "learning_rate": 9.392194497760716e-07, + "loss": 0.7005, + "num_input_tokens_seen": 2325760, + "step": 735 + }, + { + "epoch": 0.04737212726457973, + "grad_norm": 56.7358512878418, + "learning_rate": 9.456174024312221e-07, + "loss": 0.6971, + "num_input_tokens_seen": 2341632, + "step": 740 + }, + { + "epoch": 0.04769220920555662, + "grad_norm": 38.78962326049805, + "learning_rate": 9.520153550863723e-07, + "loss": 0.7066, + "num_input_tokens_seen": 2357504, + "step": 745 + }, + { + "epoch": 0.04801229114653351, + "grad_norm": 42.24749755859375, + "learning_rate": 9.584133077415226e-07, + "loss": 0.7294, + "num_input_tokens_seen": 2372608, + "step": 750 + }, + { + "epoch": 0.0483323730875104, + "grad_norm": 42.99443817138672, + "learning_rate": 9.64811260396673e-07, + "loss": 0.587, + "num_input_tokens_seen": 2388352, + "step": 755 + }, + { + "epoch": 0.04865245502848729, + "grad_norm": 46.318416595458984, + "learning_rate": 9.712092130518234e-07, + "loss": 0.6934, + "num_input_tokens_seen": 2404480, + "step": 760 + }, + { + "epoch": 0.04897253696946418, + "grad_norm": 32.9005126953125, + "learning_rate": 9.776071657069737e-07, + "loss": 0.518, + "num_input_tokens_seen": 2419648, + "step": 765 + }, + { + "epoch": 0.04929261891044107, + "grad_norm": 45.313751220703125, + "learning_rate": 9.840051183621241e-07, + "loss": 0.7121, + "num_input_tokens_seen": 2435584, + "step": 770 + }, + { + "epoch": 0.04961270085141796, + "grad_norm": 57.734039306640625, + "learning_rate": 9.904030710172743e-07, + "loss": 0.6265, + "num_input_tokens_seen": 2451072, + "step": 775 + }, + { + "epoch": 0.04993278279239485, + "grad_norm": 60.701107025146484, + "learning_rate": 9.968010236724249e-07, + "loss": 0.764, + "num_input_tokens_seen": 2467968, + "step": 780 + }, + { + "epoch": 0.05006081556878561, + "eval_loss": 0.6362079381942749, + "eval_runtime": 49.1703, + "eval_samples_per_second": 282.406, + "eval_steps_per_second": 35.306, + "num_input_tokens_seen": 2474432, + "step": 782 + }, + { + "epoch": 0.05025286473337175, + "grad_norm": 52.689231872558594, + "learning_rate": 1.0031989763275752e-06, + "loss": 0.669, + "num_input_tokens_seen": 2484928, + "step": 785 + }, + { + "epoch": 0.05057294667434863, + "grad_norm": 42.69588851928711, + "learning_rate": 1.0095969289827256e-06, + "loss": 0.6777, + "num_input_tokens_seen": 2501504, + "step": 790 + }, + { + "epoch": 0.050893028615325527, + "grad_norm": 32.48566436767578, + "learning_rate": 1.0159948816378758e-06, + "loss": 0.5188, + "num_input_tokens_seen": 2518848, + "step": 795 + }, + { + "epoch": 0.05121311055630241, + "grad_norm": 33.27299880981445, + "learning_rate": 1.0223928342930262e-06, + "loss": 0.5482, + "num_input_tokens_seen": 2535680, + "step": 800 + }, + { + "epoch": 0.051533192497279307, + "grad_norm": 51.12800979614258, + "learning_rate": 1.0287907869481766e-06, + "loss": 0.676, + "num_input_tokens_seen": 2550976, + "step": 805 + }, + { + "epoch": 0.05185327443825619, + "grad_norm": 39.38006591796875, + "learning_rate": 1.035188739603327e-06, + "loss": 0.5562, + "num_input_tokens_seen": 2566656, + "step": 810 + }, + { + "epoch": 0.052173356379233086, + "grad_norm": 49.9570426940918, + "learning_rate": 1.0415866922584773e-06, + "loss": 0.6315, + "num_input_tokens_seen": 2581568, + "step": 815 + }, + { + "epoch": 0.05249343832020997, + "grad_norm": 51.84290313720703, + "learning_rate": 1.0479846449136277e-06, + "loss": 0.6426, + "num_input_tokens_seen": 2596608, + "step": 820 + }, + { + "epoch": 0.052813520261186866, + "grad_norm": 42.30448532104492, + "learning_rate": 1.0543825975687779e-06, + "loss": 0.6719, + "num_input_tokens_seen": 2612032, + "step": 825 + }, + { + "epoch": 0.05313360220216375, + "grad_norm": 54.049774169921875, + "learning_rate": 1.0607805502239282e-06, + "loss": 0.7313, + "num_input_tokens_seen": 2627264, + "step": 830 + }, + { + "epoch": 0.053453684143140646, + "grad_norm": 43.845027923583984, + "learning_rate": 1.0671785028790788e-06, + "loss": 0.548, + "num_input_tokens_seen": 2643264, + "step": 835 + }, + { + "epoch": 0.05377376608411753, + "grad_norm": 43.62913131713867, + "learning_rate": 1.073576455534229e-06, + "loss": 0.5474, + "num_input_tokens_seen": 2659264, + "step": 840 + }, + { + "epoch": 0.054093848025094426, + "grad_norm": 37.99971389770508, + "learning_rate": 1.0799744081893794e-06, + "loss": 0.5737, + "num_input_tokens_seen": 2673856, + "step": 845 + }, + { + "epoch": 0.05441392996607131, + "grad_norm": 35.17848587036133, + "learning_rate": 1.0863723608445297e-06, + "loss": 0.4779, + "num_input_tokens_seen": 2688448, + "step": 850 + }, + { + "epoch": 0.054734011907048206, + "grad_norm": 69.50128173828125, + "learning_rate": 1.09277031349968e-06, + "loss": 0.6201, + "num_input_tokens_seen": 2703872, + "step": 855 + }, + { + "epoch": 0.05505409384802509, + "grad_norm": 49.573143005371094, + "learning_rate": 1.0991682661548305e-06, + "loss": 0.6104, + "num_input_tokens_seen": 2719040, + "step": 860 + }, + { + "epoch": 0.055374175789001986, + "grad_norm": 35.63096618652344, + "learning_rate": 1.1055662188099809e-06, + "loss": 0.6205, + "num_input_tokens_seen": 2735168, + "step": 865 + }, + { + "epoch": 0.05569425772997887, + "grad_norm": 38.10055160522461, + "learning_rate": 1.111964171465131e-06, + "loss": 0.5224, + "num_input_tokens_seen": 2750592, + "step": 870 + }, + { + "epoch": 0.056014339670955766, + "grad_norm": 21.403268814086914, + "learning_rate": 1.1183621241202814e-06, + "loss": 0.6572, + "num_input_tokens_seen": 2767232, + "step": 875 + }, + { + "epoch": 0.05633442161193265, + "grad_norm": 55.04920959472656, + "learning_rate": 1.1247600767754318e-06, + "loss": 0.665, + "num_input_tokens_seen": 2784768, + "step": 880 + }, + { + "epoch": 0.056654503552909546, + "grad_norm": 39.130226135253906, + "learning_rate": 1.1311580294305822e-06, + "loss": 0.5809, + "num_input_tokens_seen": 2799872, + "step": 885 + }, + { + "epoch": 0.05697458549388643, + "grad_norm": 51.871341705322266, + "learning_rate": 1.1375559820857326e-06, + "loss": 0.6481, + "num_input_tokens_seen": 2816000, + "step": 890 + }, + { + "epoch": 0.057294667434863326, + "grad_norm": 46.604705810546875, + "learning_rate": 1.143953934740883e-06, + "loss": 0.5859, + "num_input_tokens_seen": 2831744, + "step": 895 + }, + { + "epoch": 0.05761474937584021, + "grad_norm": 56.78334426879883, + "learning_rate": 1.150351887396033e-06, + "loss": 0.6183, + "num_input_tokens_seen": 2847424, + "step": 900 + }, + { + "epoch": 0.057934831316817106, + "grad_norm": 51.35699462890625, + "learning_rate": 1.1567498400511835e-06, + "loss": 0.616, + "num_input_tokens_seen": 2862272, + "step": 905 + }, + { + "epoch": 0.05825491325779399, + "grad_norm": 38.57978820800781, + "learning_rate": 1.163147792706334e-06, + "loss": 0.4927, + "num_input_tokens_seen": 2877120, + "step": 910 + }, + { + "epoch": 0.058574995198770886, + "grad_norm": 41.00065612792969, + "learning_rate": 1.1695457453614842e-06, + "loss": 0.5249, + "num_input_tokens_seen": 2894592, + "step": 915 + }, + { + "epoch": 0.05889507713974777, + "grad_norm": 40.363075256347656, + "learning_rate": 1.1759436980166346e-06, + "loss": 0.6159, + "num_input_tokens_seen": 2909888, + "step": 920 + }, + { + "epoch": 0.059215159080724666, + "grad_norm": 49.1600456237793, + "learning_rate": 1.182341650671785e-06, + "loss": 0.6195, + "num_input_tokens_seen": 2925632, + "step": 925 + }, + { + "epoch": 0.05953524102170155, + "grad_norm": 47.78977966308594, + "learning_rate": 1.1887396033269352e-06, + "loss": 0.6153, + "num_input_tokens_seen": 2941760, + "step": 930 + }, + { + "epoch": 0.059855322962678446, + "grad_norm": 47.449405670166016, + "learning_rate": 1.1951375559820858e-06, + "loss": 0.7076, + "num_input_tokens_seen": 2957376, + "step": 935 + }, + { + "epoch": 0.06017540490365533, + "grad_norm": 66.98524475097656, + "learning_rate": 1.2015355086372361e-06, + "loss": 0.5704, + "num_input_tokens_seen": 2972800, + "step": 940 + }, + { + "epoch": 0.060495486844632226, + "grad_norm": 48.29072952270508, + "learning_rate": 1.2079334612923863e-06, + "loss": 0.7172, + "num_input_tokens_seen": 2988480, + "step": 945 + }, + { + "epoch": 0.06081556878560911, + "grad_norm": 43.3856086730957, + "learning_rate": 1.2143314139475367e-06, + "loss": 0.6613, + "num_input_tokens_seen": 3004480, + "step": 950 + }, + { + "epoch": 0.061135650726586006, + "grad_norm": 38.56562423706055, + "learning_rate": 1.220729366602687e-06, + "loss": 0.444, + "num_input_tokens_seen": 3020288, + "step": 955 + }, + { + "epoch": 0.06145573266756289, + "grad_norm": 60.62529373168945, + "learning_rate": 1.2271273192578374e-06, + "loss": 0.6011, + "num_input_tokens_seen": 3035968, + "step": 960 + }, + { + "epoch": 0.061775814608539786, + "grad_norm": 61.26271438598633, + "learning_rate": 1.2335252719129878e-06, + "loss": 0.7411, + "num_input_tokens_seen": 3051776, + "step": 965 + }, + { + "epoch": 0.06209589654951667, + "grad_norm": 52.55011749267578, + "learning_rate": 1.2399232245681382e-06, + "loss": 0.5575, + "num_input_tokens_seen": 3066560, + "step": 970 + }, + { + "epoch": 0.062415978490493566, + "grad_norm": 52.49790954589844, + "learning_rate": 1.2463211772232884e-06, + "loss": 0.6357, + "num_input_tokens_seen": 3082496, + "step": 975 + }, + { + "epoch": 0.06273606043147045, + "grad_norm": 43.31839370727539, + "learning_rate": 1.2527191298784387e-06, + "loss": 0.6233, + "num_input_tokens_seen": 3097856, + "step": 980 + }, + { + "epoch": 0.06305614237244735, + "grad_norm": 25.353742599487305, + "learning_rate": 1.2591170825335893e-06, + "loss": 0.5062, + "num_input_tokens_seen": 3113664, + "step": 985 + }, + { + "epoch": 0.06337622431342424, + "grad_norm": 37.9774169921875, + "learning_rate": 1.2655150351887395e-06, + "loss": 0.6242, + "num_input_tokens_seen": 3129792, + "step": 990 + }, + { + "epoch": 0.06369630625440113, + "grad_norm": 30.752185821533203, + "learning_rate": 1.2719129878438899e-06, + "loss": 0.5901, + "num_input_tokens_seen": 3145024, + "step": 995 + }, + { + "epoch": 0.06401638819537801, + "grad_norm": 41.19409942626953, + "learning_rate": 1.2783109404990402e-06, + "loss": 0.7747, + "num_input_tokens_seen": 3161216, + "step": 1000 + }, + { + "epoch": 0.0643364701363549, + "grad_norm": 27.8523006439209, + "learning_rate": 1.2847088931541904e-06, + "loss": 0.4118, + "num_input_tokens_seen": 3176960, + "step": 1005 + }, + { + "epoch": 0.0646565520773318, + "grad_norm": 39.628929138183594, + "learning_rate": 1.291106845809341e-06, + "loss": 0.607, + "num_input_tokens_seen": 3193088, + "step": 1010 + }, + { + "epoch": 0.0649766340183087, + "grad_norm": 62.03862762451172, + "learning_rate": 1.2975047984644914e-06, + "loss": 0.6808, + "num_input_tokens_seen": 3210112, + "step": 1015 + }, + { + "epoch": 0.06529671595928557, + "grad_norm": 41.16059494018555, + "learning_rate": 1.3039027511196418e-06, + "loss": 0.5044, + "num_input_tokens_seen": 3224768, + "step": 1020 + }, + { + "epoch": 0.06561679790026247, + "grad_norm": 45.047080993652344, + "learning_rate": 1.310300703774792e-06, + "loss": 0.6235, + "num_input_tokens_seen": 3240128, + "step": 1025 + }, + { + "epoch": 0.06593687984123936, + "grad_norm": 41.879398345947266, + "learning_rate": 1.3166986564299423e-06, + "loss": 0.5605, + "num_input_tokens_seen": 3256576, + "step": 1030 + }, + { + "epoch": 0.06625696178221625, + "grad_norm": 34.385223388671875, + "learning_rate": 1.3230966090850929e-06, + "loss": 0.5942, + "num_input_tokens_seen": 3272384, + "step": 1035 + }, + { + "epoch": 0.06657704372319313, + "grad_norm": 38.94369125366211, + "learning_rate": 1.329494561740243e-06, + "loss": 0.4108, + "num_input_tokens_seen": 3288512, + "step": 1040 + }, + { + "epoch": 0.06689712566417003, + "grad_norm": 40.253990173339844, + "learning_rate": 1.3358925143953934e-06, + "loss": 0.4897, + "num_input_tokens_seen": 3306304, + "step": 1045 + }, + { + "epoch": 0.06721720760514692, + "grad_norm": 42.53627395629883, + "learning_rate": 1.3422904670505438e-06, + "loss": 0.4785, + "num_input_tokens_seen": 3321344, + "step": 1050 + }, + { + "epoch": 0.06753728954612381, + "grad_norm": 38.27849197387695, + "learning_rate": 1.348688419705694e-06, + "loss": 0.6127, + "num_input_tokens_seen": 3338560, + "step": 1055 + }, + { + "epoch": 0.06785737148710069, + "grad_norm": 26.670169830322266, + "learning_rate": 1.3550863723608446e-06, + "loss": 0.5135, + "num_input_tokens_seen": 3353152, + "step": 1060 + }, + { + "epoch": 0.06817745342807759, + "grad_norm": 46.529396057128906, + "learning_rate": 1.361484325015995e-06, + "loss": 0.5401, + "num_input_tokens_seen": 3369536, + "step": 1065 + }, + { + "epoch": 0.06849753536905448, + "grad_norm": 45.95737075805664, + "learning_rate": 1.3678822776711451e-06, + "loss": 0.6023, + "num_input_tokens_seen": 3384832, + "step": 1070 + }, + { + "epoch": 0.06881761731003137, + "grad_norm": 38.86219787597656, + "learning_rate": 1.3742802303262955e-06, + "loss": 0.4881, + "num_input_tokens_seen": 3399424, + "step": 1075 + }, + { + "epoch": 0.06913769925100825, + "grad_norm": 30.497953414916992, + "learning_rate": 1.3806781829814459e-06, + "loss": 0.6565, + "num_input_tokens_seen": 3416704, + "step": 1080 + }, + { + "epoch": 0.06945778119198515, + "grad_norm": 59.77437210083008, + "learning_rate": 1.3870761356365963e-06, + "loss": 0.5553, + "num_input_tokens_seen": 3431552, + "step": 1085 + }, + { + "epoch": 0.06977786313296204, + "grad_norm": 36.94731521606445, + "learning_rate": 1.3934740882917466e-06, + "loss": 0.6472, + "num_input_tokens_seen": 3447488, + "step": 1090 + }, + { + "epoch": 0.07009794507393893, + "grad_norm": 39.8687744140625, + "learning_rate": 1.399872040946897e-06, + "loss": 0.5137, + "num_input_tokens_seen": 3463424, + "step": 1095 + }, + { + "epoch": 0.07041802701491581, + "grad_norm": 51.21504211425781, + "learning_rate": 1.4062699936020472e-06, + "loss": 0.6527, + "num_input_tokens_seen": 3479680, + "step": 1100 + }, + { + "epoch": 0.0707381089558927, + "grad_norm": 49.46668243408203, + "learning_rate": 1.4126679462571976e-06, + "loss": 0.5117, + "num_input_tokens_seen": 3495552, + "step": 1105 + }, + { + "epoch": 0.0710581908968696, + "grad_norm": 56.50544357299805, + "learning_rate": 1.4190658989123481e-06, + "loss": 0.4748, + "num_input_tokens_seen": 3510976, + "step": 1110 + }, + { + "epoch": 0.0713782728378465, + "grad_norm": 49.386070251464844, + "learning_rate": 1.4254638515674983e-06, + "loss": 0.6499, + "num_input_tokens_seen": 3526016, + "step": 1115 + }, + { + "epoch": 0.07169835477882337, + "grad_norm": 22.4860782623291, + "learning_rate": 1.4318618042226487e-06, + "loss": 0.5645, + "num_input_tokens_seen": 3540544, + "step": 1120 + }, + { + "epoch": 0.07201843671980027, + "grad_norm": 43.12958908081055, + "learning_rate": 1.438259756877799e-06, + "loss": 0.6069, + "num_input_tokens_seen": 3556416, + "step": 1125 + }, + { + "epoch": 0.07233851866077716, + "grad_norm": 43.865108489990234, + "learning_rate": 1.4446577095329492e-06, + "loss": 0.5077, + "num_input_tokens_seen": 3572096, + "step": 1130 + }, + { + "epoch": 0.07265860060175405, + "grad_norm": 41.96502685546875, + "learning_rate": 1.4510556621880998e-06, + "loss": 0.4993, + "num_input_tokens_seen": 3587712, + "step": 1135 + }, + { + "epoch": 0.07297868254273093, + "grad_norm": 30.780799865722656, + "learning_rate": 1.4574536148432502e-06, + "loss": 0.5417, + "num_input_tokens_seen": 3605056, + "step": 1140 + }, + { + "epoch": 0.07329876448370783, + "grad_norm": 42.194156646728516, + "learning_rate": 1.4638515674984004e-06, + "loss": 0.6805, + "num_input_tokens_seen": 3621184, + "step": 1145 + }, + { + "epoch": 0.07361884642468472, + "grad_norm": 25.724376678466797, + "learning_rate": 1.4702495201535507e-06, + "loss": 0.5834, + "num_input_tokens_seen": 3635392, + "step": 1150 + }, + { + "epoch": 0.07393892836566161, + "grad_norm": 32.53746795654297, + "learning_rate": 1.4766474728087011e-06, + "loss": 0.5049, + "num_input_tokens_seen": 3649984, + "step": 1155 + }, + { + "epoch": 0.07425901030663849, + "grad_norm": 34.3016471862793, + "learning_rate": 1.4830454254638515e-06, + "loss": 0.5276, + "num_input_tokens_seen": 3665920, + "step": 1160 + }, + { + "epoch": 0.07457909224761539, + "grad_norm": 32.034515380859375, + "learning_rate": 1.4894433781190019e-06, + "loss": 0.4587, + "num_input_tokens_seen": 3680256, + "step": 1165 + }, + { + "epoch": 0.07489917418859228, + "grad_norm": 49.901329040527344, + "learning_rate": 1.4958413307741523e-06, + "loss": 0.5255, + "num_input_tokens_seen": 3697536, + "step": 1170 + }, + { + "epoch": 0.07521925612956917, + "grad_norm": 35.28968048095703, + "learning_rate": 1.5022392834293024e-06, + "loss": 0.6111, + "num_input_tokens_seen": 3713088, + "step": 1175 + }, + { + "epoch": 0.07553933807054607, + "grad_norm": 56.491756439208984, + "learning_rate": 1.5086372360844528e-06, + "loss": 0.6712, + "num_input_tokens_seen": 3729920, + "step": 1180 + }, + { + "epoch": 0.07585942001152295, + "grad_norm": 45.67325210571289, + "learning_rate": 1.5150351887396034e-06, + "loss": 0.5489, + "num_input_tokens_seen": 3745664, + "step": 1185 + }, + { + "epoch": 0.07617950195249984, + "grad_norm": 35.20317840576172, + "learning_rate": 1.5214331413947536e-06, + "loss": 0.5258, + "num_input_tokens_seen": 3760576, + "step": 1190 + }, + { + "epoch": 0.07649958389347673, + "grad_norm": 29.504152297973633, + "learning_rate": 1.527831094049904e-06, + "loss": 0.5085, + "num_input_tokens_seen": 3776576, + "step": 1195 + }, + { + "epoch": 0.07681966583445363, + "grad_norm": 43.33934783935547, + "learning_rate": 1.5342290467050543e-06, + "loss": 0.5857, + "num_input_tokens_seen": 3792384, + "step": 1200 + }, + { + "epoch": 0.0771397477754305, + "grad_norm": 44.849308013916016, + "learning_rate": 1.5406269993602045e-06, + "loss": 0.6438, + "num_input_tokens_seen": 3806592, + "step": 1205 + }, + { + "epoch": 0.0774598297164074, + "grad_norm": 52.07255935668945, + "learning_rate": 1.547024952015355e-06, + "loss": 0.5775, + "num_input_tokens_seen": 3822080, + "step": 1210 + }, + { + "epoch": 0.07777991165738429, + "grad_norm": 37.863677978515625, + "learning_rate": 1.5534229046705055e-06, + "loss": 0.5269, + "num_input_tokens_seen": 3837120, + "step": 1215 + }, + { + "epoch": 0.07809999359836119, + "grad_norm": 37.92720413208008, + "learning_rate": 1.5598208573256556e-06, + "loss": 0.6994, + "num_input_tokens_seen": 3852864, + "step": 1220 + }, + { + "epoch": 0.07842007553933807, + "grad_norm": 34.05339431762695, + "learning_rate": 1.566218809980806e-06, + "loss": 0.515, + "num_input_tokens_seen": 3869184, + "step": 1225 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 31.917217254638672, + "learning_rate": 1.5726167626359564e-06, + "loss": 0.5388, + "num_input_tokens_seen": 3885248, + "step": 1230 + }, + { + "epoch": 0.07906023942129185, + "grad_norm": 32.81400680541992, + "learning_rate": 1.5790147152911068e-06, + "loss": 0.4306, + "num_input_tokens_seen": 3900416, + "step": 1235 + }, + { + "epoch": 0.07938032136226875, + "grad_norm": 38.30088806152344, + "learning_rate": 1.5854126679462571e-06, + "loss": 0.5503, + "num_input_tokens_seen": 3916096, + "step": 1240 + }, + { + "epoch": 0.07970040330324563, + "grad_norm": 50.25246810913086, + "learning_rate": 1.5918106206014075e-06, + "loss": 0.6993, + "num_input_tokens_seen": 3933312, + "step": 1245 + }, + { + "epoch": 0.08002048524422252, + "grad_norm": 74.49282836914062, + "learning_rate": 1.5982085732565577e-06, + "loss": 0.6197, + "num_input_tokens_seen": 3949440, + "step": 1250 + }, + { + "epoch": 0.08034056718519941, + "grad_norm": 36.928924560546875, + "learning_rate": 1.604606525911708e-06, + "loss": 0.6799, + "num_input_tokens_seen": 3964992, + "step": 1255 + }, + { + "epoch": 0.0806606491261763, + "grad_norm": 56.78390884399414, + "learning_rate": 1.6110044785668586e-06, + "loss": 0.7324, + "num_input_tokens_seen": 3981696, + "step": 1260 + }, + { + "epoch": 0.08098073106715319, + "grad_norm": 38.05080795288086, + "learning_rate": 1.617402431222009e-06, + "loss": 0.6136, + "num_input_tokens_seen": 3997248, + "step": 1265 + }, + { + "epoch": 0.08130081300813008, + "grad_norm": 27.51533317565918, + "learning_rate": 1.6238003838771592e-06, + "loss": 0.6689, + "num_input_tokens_seen": 4011648, + "step": 1270 + }, + { + "epoch": 0.08162089494910697, + "grad_norm": 42.947906494140625, + "learning_rate": 1.6301983365323096e-06, + "loss": 0.5254, + "num_input_tokens_seen": 4028160, + "step": 1275 + }, + { + "epoch": 0.08194097689008387, + "grad_norm": 47.13071060180664, + "learning_rate": 1.63659628918746e-06, + "loss": 0.5398, + "num_input_tokens_seen": 4043584, + "step": 1280 + }, + { + "epoch": 0.08226105883106075, + "grad_norm": 47.630218505859375, + "learning_rate": 1.6429942418426103e-06, + "loss": 0.7076, + "num_input_tokens_seen": 4059456, + "step": 1285 + }, + { + "epoch": 0.08258114077203764, + "grad_norm": 26.62889289855957, + "learning_rate": 1.6493921944977607e-06, + "loss": 0.6103, + "num_input_tokens_seen": 4076096, + "step": 1290 + }, + { + "epoch": 0.08290122271301453, + "grad_norm": 41.755088806152344, + "learning_rate": 1.655790147152911e-06, + "loss": 0.6111, + "num_input_tokens_seen": 4093568, + "step": 1295 + }, + { + "epoch": 0.08322130465399143, + "grad_norm": 36.05648422241211, + "learning_rate": 1.6621880998080612e-06, + "loss": 0.6676, + "num_input_tokens_seen": 4108864, + "step": 1300 + }, + { + "epoch": 0.0835413865949683, + "grad_norm": 37.495201110839844, + "learning_rate": 1.6685860524632116e-06, + "loss": 0.6425, + "num_input_tokens_seen": 4124224, + "step": 1305 + }, + { + "epoch": 0.0838614685359452, + "grad_norm": 40.898502349853516, + "learning_rate": 1.6749840051183622e-06, + "loss": 0.5516, + "num_input_tokens_seen": 4139008, + "step": 1310 + }, + { + "epoch": 0.08418155047692209, + "grad_norm": 24.442567825317383, + "learning_rate": 1.6813819577735124e-06, + "loss": 0.5551, + "num_input_tokens_seen": 4155008, + "step": 1315 + }, + { + "epoch": 0.08450163241789899, + "grad_norm": 26.63324737548828, + "learning_rate": 1.6877799104286628e-06, + "loss": 0.4792, + "num_input_tokens_seen": 4172544, + "step": 1320 + }, + { + "epoch": 0.08482171435887587, + "grad_norm": 48.432395935058594, + "learning_rate": 1.6941778630838131e-06, + "loss": 0.6306, + "num_input_tokens_seen": 4188416, + "step": 1325 + }, + { + "epoch": 0.08514179629985276, + "grad_norm": 19.108352661132812, + "learning_rate": 1.7005758157389633e-06, + "loss": 0.5031, + "num_input_tokens_seen": 4202560, + "step": 1330 + }, + { + "epoch": 0.08546187824082965, + "grad_norm": 35.99553680419922, + "learning_rate": 1.706973768394114e-06, + "loss": 0.5574, + "num_input_tokens_seen": 4219392, + "step": 1335 + }, + { + "epoch": 0.08578196018180655, + "grad_norm": 50.857059478759766, + "learning_rate": 1.7133717210492643e-06, + "loss": 0.4844, + "num_input_tokens_seen": 4235328, + "step": 1340 + }, + { + "epoch": 0.08610204212278343, + "grad_norm": 52.725589752197266, + "learning_rate": 1.7197696737044144e-06, + "loss": 0.5778, + "num_input_tokens_seen": 4250368, + "step": 1345 + }, + { + "epoch": 0.08642212406376032, + "grad_norm": 27.934179306030273, + "learning_rate": 1.7261676263595648e-06, + "loss": 0.4549, + "num_input_tokens_seen": 4265856, + "step": 1350 + }, + { + "epoch": 0.08674220600473721, + "grad_norm": 41.11848068237305, + "learning_rate": 1.7325655790147152e-06, + "loss": 0.6627, + "num_input_tokens_seen": 4281792, + "step": 1355 + }, + { + "epoch": 0.0870622879457141, + "grad_norm": 38.61765670776367, + "learning_rate": 1.7389635316698656e-06, + "loss": 0.5873, + "num_input_tokens_seen": 4297088, + "step": 1360 + }, + { + "epoch": 0.087382369886691, + "grad_norm": 27.60044288635254, + "learning_rate": 1.745361484325016e-06, + "loss": 0.5028, + "num_input_tokens_seen": 4312192, + "step": 1365 + }, + { + "epoch": 0.08770245182766788, + "grad_norm": 27.299213409423828, + "learning_rate": 1.7517594369801663e-06, + "loss": 0.4819, + "num_input_tokens_seen": 4326720, + "step": 1370 + }, + { + "epoch": 0.08802253376864477, + "grad_norm": 58.7935791015625, + "learning_rate": 1.7581573896353165e-06, + "loss": 0.7894, + "num_input_tokens_seen": 4341760, + "step": 1375 + }, + { + "epoch": 0.08834261570962167, + "grad_norm": 36.60477828979492, + "learning_rate": 1.7645553422904669e-06, + "loss": 0.6215, + "num_input_tokens_seen": 4357760, + "step": 1380 + }, + { + "epoch": 0.08866269765059856, + "grad_norm": 36.011505126953125, + "learning_rate": 1.7709532949456175e-06, + "loss": 0.6267, + "num_input_tokens_seen": 4373824, + "step": 1385 + }, + { + "epoch": 0.08898277959157544, + "grad_norm": 32.17240524291992, + "learning_rate": 1.7773512476007676e-06, + "loss": 0.4739, + "num_input_tokens_seen": 4388992, + "step": 1390 + }, + { + "epoch": 0.08930286153255233, + "grad_norm": 29.726274490356445, + "learning_rate": 1.783749200255918e-06, + "loss": 0.5295, + "num_input_tokens_seen": 4404288, + "step": 1395 + }, + { + "epoch": 0.08962294347352923, + "grad_norm": 49.96647644042969, + "learning_rate": 1.7901471529110684e-06, + "loss": 0.5366, + "num_input_tokens_seen": 4419840, + "step": 1400 + }, + { + "epoch": 0.08994302541450612, + "grad_norm": 55.71930694580078, + "learning_rate": 1.7965451055662186e-06, + "loss": 0.5109, + "num_input_tokens_seen": 4435200, + "step": 1405 + }, + { + "epoch": 0.090263107355483, + "grad_norm": 54.367244720458984, + "learning_rate": 1.8029430582213691e-06, + "loss": 0.6082, + "num_input_tokens_seen": 4450368, + "step": 1410 + }, + { + "epoch": 0.09058318929645989, + "grad_norm": 42.54631042480469, + "learning_rate": 1.8093410108765195e-06, + "loss": 0.4889, + "num_input_tokens_seen": 4466048, + "step": 1415 + }, + { + "epoch": 0.09090327123743679, + "grad_norm": 56.23736572265625, + "learning_rate": 1.8157389635316697e-06, + "loss": 0.5985, + "num_input_tokens_seen": 4481920, + "step": 1420 + }, + { + "epoch": 0.09122335317841368, + "grad_norm": 34.284244537353516, + "learning_rate": 1.82213691618682e-06, + "loss": 0.5671, + "num_input_tokens_seen": 4498112, + "step": 1425 + }, + { + "epoch": 0.09154343511939056, + "grad_norm": 32.31144714355469, + "learning_rate": 1.8285348688419704e-06, + "loss": 0.4306, + "num_input_tokens_seen": 4515648, + "step": 1430 + }, + { + "epoch": 0.09186351706036745, + "grad_norm": 34.67725372314453, + "learning_rate": 1.8349328214971208e-06, + "loss": 0.5719, + "num_input_tokens_seen": 4531840, + "step": 1435 + }, + { + "epoch": 0.09218359900134435, + "grad_norm": 48.15701675415039, + "learning_rate": 1.8413307741522712e-06, + "loss": 0.5478, + "num_input_tokens_seen": 4547456, + "step": 1440 + }, + { + "epoch": 0.09250368094232124, + "grad_norm": 40.82353210449219, + "learning_rate": 1.8477287268074216e-06, + "loss": 0.557, + "num_input_tokens_seen": 4563328, + "step": 1445 + }, + { + "epoch": 0.09282376288329812, + "grad_norm": 28.479816436767578, + "learning_rate": 1.8541266794625718e-06, + "loss": 0.5856, + "num_input_tokens_seen": 4579392, + "step": 1450 + }, + { + "epoch": 0.09314384482427501, + "grad_norm": 80.24234008789062, + "learning_rate": 1.8605246321177221e-06, + "loss": 0.6149, + "num_input_tokens_seen": 4595584, + "step": 1455 + }, + { + "epoch": 0.0934639267652519, + "grad_norm": 33.8602294921875, + "learning_rate": 1.8669225847728727e-06, + "loss": 0.5711, + "num_input_tokens_seen": 4610112, + "step": 1460 + }, + { + "epoch": 0.0937840087062288, + "grad_norm": 53.692935943603516, + "learning_rate": 1.8733205374280229e-06, + "loss": 0.6948, + "num_input_tokens_seen": 4626432, + "step": 1465 + }, + { + "epoch": 0.09410409064720568, + "grad_norm": 38.61556625366211, + "learning_rate": 1.8797184900831733e-06, + "loss": 0.5771, + "num_input_tokens_seen": 4641792, + "step": 1470 + }, + { + "epoch": 0.09442417258818257, + "grad_norm": 18.766170501708984, + "learning_rate": 1.8861164427383236e-06, + "loss": 0.4046, + "num_input_tokens_seen": 4656896, + "step": 1475 + }, + { + "epoch": 0.09474425452915947, + "grad_norm": 47.406803131103516, + "learning_rate": 1.8925143953934738e-06, + "loss": 0.605, + "num_input_tokens_seen": 4673472, + "step": 1480 + }, + { + "epoch": 0.09506433647013636, + "grad_norm": 26.30023765563965, + "learning_rate": 1.8989123480486244e-06, + "loss": 0.426, + "num_input_tokens_seen": 4688896, + "step": 1485 + }, + { + "epoch": 0.09538441841111324, + "grad_norm": 43.65274429321289, + "learning_rate": 1.9053103007037748e-06, + "loss": 0.6785, + "num_input_tokens_seen": 4704576, + "step": 1490 + }, + { + "epoch": 0.09570450035209013, + "grad_norm": 38.707481384277344, + "learning_rate": 1.911708253358925e-06, + "loss": 0.6069, + "num_input_tokens_seen": 4719040, + "step": 1495 + }, + { + "epoch": 0.09602458229306703, + "grad_norm": 30.288116455078125, + "learning_rate": 1.9181062060140753e-06, + "loss": 0.4831, + "num_input_tokens_seen": 4733696, + "step": 1500 + }, + { + "epoch": 0.09634466423404392, + "grad_norm": 39.540462493896484, + "learning_rate": 1.9245041586692255e-06, + "loss": 0.6045, + "num_input_tokens_seen": 4748992, + "step": 1505 + }, + { + "epoch": 0.0966647461750208, + "grad_norm": 50.21097183227539, + "learning_rate": 1.930902111324376e-06, + "loss": 0.6876, + "num_input_tokens_seen": 4764992, + "step": 1510 + }, + { + "epoch": 0.09698482811599769, + "grad_norm": 45.92460632324219, + "learning_rate": 1.9373000639795267e-06, + "loss": 0.6773, + "num_input_tokens_seen": 4780352, + "step": 1515 + }, + { + "epoch": 0.09730491005697459, + "grad_norm": 31.217529296875, + "learning_rate": 1.943698016634677e-06, + "loss": 0.5393, + "num_input_tokens_seen": 4796224, + "step": 1520 + }, + { + "epoch": 0.09762499199795148, + "grad_norm": 30.454994201660156, + "learning_rate": 1.950095969289827e-06, + "loss": 0.5401, + "num_input_tokens_seen": 4811840, + "step": 1525 + }, + { + "epoch": 0.09794507393892836, + "grad_norm": 26.992660522460938, + "learning_rate": 1.9564939219449776e-06, + "loss": 0.5811, + "num_input_tokens_seen": 4826432, + "step": 1530 + }, + { + "epoch": 0.09826515587990525, + "grad_norm": 28.93795394897461, + "learning_rate": 1.9628918746001278e-06, + "loss": 0.393, + "num_input_tokens_seen": 4841920, + "step": 1535 + }, + { + "epoch": 0.09858523782088215, + "grad_norm": 30.038558959960938, + "learning_rate": 1.9692898272552783e-06, + "loss": 0.5971, + "num_input_tokens_seen": 4857536, + "step": 1540 + }, + { + "epoch": 0.09890531976185904, + "grad_norm": 56.7470588684082, + "learning_rate": 1.9756877799104285e-06, + "loss": 0.6844, + "num_input_tokens_seen": 4873408, + "step": 1545 + }, + { + "epoch": 0.09922540170283592, + "grad_norm": 43.21520233154297, + "learning_rate": 1.9820857325655787e-06, + "loss": 0.5973, + "num_input_tokens_seen": 4889536, + "step": 1550 + }, + { + "epoch": 0.09954548364381281, + "grad_norm": 59.145320892333984, + "learning_rate": 1.9884836852207293e-06, + "loss": 0.627, + "num_input_tokens_seen": 4904448, + "step": 1555 + }, + { + "epoch": 0.0998655655847897, + "grad_norm": 25.44906997680664, + "learning_rate": 1.99488163787588e-06, + "loss": 0.6569, + "num_input_tokens_seen": 4919616, + "step": 1560 + }, + { + "epoch": 0.10012163113757122, + "eval_loss": 0.5394634008407593, + "eval_runtime": 49.1959, + "eval_samples_per_second": 282.259, + "eval_steps_per_second": 35.288, + "num_input_tokens_seen": 4931328, + "step": 1564 + }, + { + "epoch": 0.1001856475257666, + "grad_norm": 30.809673309326172, + "learning_rate": 1.9999999750297625e-06, + "loss": 0.516, + "num_input_tokens_seen": 4934144, + "step": 1565 + }, + { + "epoch": 0.1005057294667435, + "grad_norm": 31.836828231811523, + "learning_rate": 1.9999991010715873e-06, + "loss": 0.523, + "num_input_tokens_seen": 4950272, + "step": 1570 + }, + { + "epoch": 0.10082581140772037, + "grad_norm": 43.53628921508789, + "learning_rate": 1.999996978602793e-06, + "loss": 0.5346, + "num_input_tokens_seen": 4965056, + "step": 1575 + }, + { + "epoch": 0.10114589334869727, + "grad_norm": 33.710304260253906, + "learning_rate": 1.99999360762603e-06, + "loss": 0.5489, + "num_input_tokens_seen": 4980160, + "step": 1580 + }, + { + "epoch": 0.10146597528967416, + "grad_norm": 36.39333724975586, + "learning_rate": 1.9999889881455065e-06, + "loss": 0.453, + "num_input_tokens_seen": 4996992, + "step": 1585 + }, + { + "epoch": 0.10178605723065105, + "grad_norm": 35.11768341064453, + "learning_rate": 1.9999831201669897e-06, + "loss": 0.5146, + "num_input_tokens_seen": 5012608, + "step": 1590 + }, + { + "epoch": 0.10210613917162793, + "grad_norm": 38.27321243286133, + "learning_rate": 1.9999760036978067e-06, + "loss": 0.4848, + "num_input_tokens_seen": 5027840, + "step": 1595 + }, + { + "epoch": 0.10242622111260483, + "grad_norm": 33.53286361694336, + "learning_rate": 1.9999676387468417e-06, + "loss": 0.5746, + "num_input_tokens_seen": 5042752, + "step": 1600 + }, + { + "epoch": 0.10274630305358172, + "grad_norm": 26.00925636291504, + "learning_rate": 1.999958025324539e-06, + "loss": 0.5487, + "num_input_tokens_seen": 5058624, + "step": 1605 + }, + { + "epoch": 0.10306638499455861, + "grad_norm": 36.0686149597168, + "learning_rate": 1.999947163442901e-06, + "loss": 0.6233, + "num_input_tokens_seen": 5075008, + "step": 1610 + }, + { + "epoch": 0.10338646693553549, + "grad_norm": 48.770294189453125, + "learning_rate": 1.9999350531154884e-06, + "loss": 0.5332, + "num_input_tokens_seen": 5090880, + "step": 1615 + }, + { + "epoch": 0.10370654887651239, + "grad_norm": 51.00628662109375, + "learning_rate": 1.9999216943574223e-06, + "loss": 0.5713, + "num_input_tokens_seen": 5106816, + "step": 1620 + }, + { + "epoch": 0.10402663081748928, + "grad_norm": 22.478723526000977, + "learning_rate": 1.9999070871853796e-06, + "loss": 0.4563, + "num_input_tokens_seen": 5123904, + "step": 1625 + }, + { + "epoch": 0.10434671275846617, + "grad_norm": 40.58604049682617, + "learning_rate": 1.9998912316175986e-06, + "loss": 0.4954, + "num_input_tokens_seen": 5140160, + "step": 1630 + }, + { + "epoch": 0.10466679469944305, + "grad_norm": 46.91875457763672, + "learning_rate": 1.9998741276738752e-06, + "loss": 0.5159, + "num_input_tokens_seen": 5156288, + "step": 1635 + }, + { + "epoch": 0.10498687664041995, + "grad_norm": 38.68816375732422, + "learning_rate": 1.999855775375563e-06, + "loss": 0.5823, + "num_input_tokens_seen": 5171776, + "step": 1640 + }, + { + "epoch": 0.10530695858139684, + "grad_norm": 40.31874465942383, + "learning_rate": 1.999836174745576e-06, + "loss": 0.683, + "num_input_tokens_seen": 5189504, + "step": 1645 + }, + { + "epoch": 0.10562704052237373, + "grad_norm": 45.76553726196289, + "learning_rate": 1.9998153258083853e-06, + "loss": 0.5783, + "num_input_tokens_seen": 5205056, + "step": 1650 + }, + { + "epoch": 0.10594712246335061, + "grad_norm": 37.635047912597656, + "learning_rate": 1.9997932285900214e-06, + "loss": 0.586, + "num_input_tokens_seen": 5222656, + "step": 1655 + }, + { + "epoch": 0.1062672044043275, + "grad_norm": 53.1572380065918, + "learning_rate": 1.9997698831180726e-06, + "loss": 0.6272, + "num_input_tokens_seen": 5238848, + "step": 1660 + }, + { + "epoch": 0.1065872863453044, + "grad_norm": 33.89291000366211, + "learning_rate": 1.999745289421686e-06, + "loss": 0.5203, + "num_input_tokens_seen": 5255296, + "step": 1665 + }, + { + "epoch": 0.10690736828628129, + "grad_norm": 58.435916900634766, + "learning_rate": 1.9997194475315674e-06, + "loss": 0.7716, + "num_input_tokens_seen": 5270336, + "step": 1670 + }, + { + "epoch": 0.10722745022725817, + "grad_norm": 36.215858459472656, + "learning_rate": 1.9996923574799808e-06, + "loss": 0.4842, + "num_input_tokens_seen": 5286720, + "step": 1675 + }, + { + "epoch": 0.10754753216823507, + "grad_norm": 32.55356979370117, + "learning_rate": 1.9996640193007476e-06, + "loss": 0.6428, + "num_input_tokens_seen": 5301632, + "step": 1680 + }, + { + "epoch": 0.10786761410921196, + "grad_norm": 49.88198471069336, + "learning_rate": 1.9996344330292495e-06, + "loss": 0.403, + "num_input_tokens_seen": 5316544, + "step": 1685 + }, + { + "epoch": 0.10818769605018885, + "grad_norm": 34.35507583618164, + "learning_rate": 1.9996035987024245e-06, + "loss": 0.5503, + "num_input_tokens_seen": 5332544, + "step": 1690 + }, + { + "epoch": 0.10850777799116573, + "grad_norm": 35.31010437011719, + "learning_rate": 1.99957151635877e-06, + "loss": 0.5388, + "num_input_tokens_seen": 5348096, + "step": 1695 + }, + { + "epoch": 0.10882785993214263, + "grad_norm": 50.9265022277832, + "learning_rate": 1.999538186038341e-06, + "loss": 0.6275, + "num_input_tokens_seen": 5362368, + "step": 1700 + }, + { + "epoch": 0.10914794187311952, + "grad_norm": 34.14656066894531, + "learning_rate": 1.999503607782751e-06, + "loss": 0.5426, + "num_input_tokens_seen": 5378176, + "step": 1705 + }, + { + "epoch": 0.10946802381409641, + "grad_norm": 42.861480712890625, + "learning_rate": 1.999467781635171e-06, + "loss": 0.5163, + "num_input_tokens_seen": 5394752, + "step": 1710 + }, + { + "epoch": 0.10978810575507329, + "grad_norm": 31.575403213500977, + "learning_rate": 1.9994307076403306e-06, + "loss": 0.6991, + "num_input_tokens_seen": 5412160, + "step": 1715 + }, + { + "epoch": 0.11010818769605019, + "grad_norm": 35.84833908081055, + "learning_rate": 1.999392385844517e-06, + "loss": 0.5245, + "num_input_tokens_seen": 5427840, + "step": 1720 + }, + { + "epoch": 0.11042826963702708, + "grad_norm": 36.32638931274414, + "learning_rate": 1.9993528162955753e-06, + "loss": 0.4035, + "num_input_tokens_seen": 5444224, + "step": 1725 + }, + { + "epoch": 0.11074835157800397, + "grad_norm": 60.70829391479492, + "learning_rate": 1.9993119990429095e-06, + "loss": 0.5767, + "num_input_tokens_seen": 5459648, + "step": 1730 + }, + { + "epoch": 0.11106843351898085, + "grad_norm": 47.9375114440918, + "learning_rate": 1.9992699341374794e-06, + "loss": 0.7821, + "num_input_tokens_seen": 5475008, + "step": 1735 + }, + { + "epoch": 0.11138851545995775, + "grad_norm": 32.798091888427734, + "learning_rate": 1.9992266216318033e-06, + "loss": 0.5285, + "num_input_tokens_seen": 5491456, + "step": 1740 + }, + { + "epoch": 0.11170859740093464, + "grad_norm": 35.342803955078125, + "learning_rate": 1.9991820615799583e-06, + "loss": 0.5674, + "num_input_tokens_seen": 5507520, + "step": 1745 + }, + { + "epoch": 0.11202867934191153, + "grad_norm": 49.72675704956055, + "learning_rate": 1.999136254037578e-06, + "loss": 0.6917, + "num_input_tokens_seen": 5523072, + "step": 1750 + }, + { + "epoch": 0.11234876128288843, + "grad_norm": 37.71804428100586, + "learning_rate": 1.999089199061853e-06, + "loss": 0.5094, + "num_input_tokens_seen": 5538304, + "step": 1755 + }, + { + "epoch": 0.1126688432238653, + "grad_norm": 35.397056579589844, + "learning_rate": 1.9990408967115326e-06, + "loss": 0.4612, + "num_input_tokens_seen": 5553920, + "step": 1760 + }, + { + "epoch": 0.1129889251648422, + "grad_norm": 26.074499130249023, + "learning_rate": 1.998991347046922e-06, + "loss": 0.4599, + "num_input_tokens_seen": 5569344, + "step": 1765 + }, + { + "epoch": 0.11330900710581909, + "grad_norm": 33.73558044433594, + "learning_rate": 1.9989405501298857e-06, + "loss": 0.5104, + "num_input_tokens_seen": 5585856, + "step": 1770 + }, + { + "epoch": 0.11362908904679599, + "grad_norm": 50.979820251464844, + "learning_rate": 1.9988885060238436e-06, + "loss": 0.5755, + "num_input_tokens_seen": 5603840, + "step": 1775 + }, + { + "epoch": 0.11394917098777287, + "grad_norm": 25.762378692626953, + "learning_rate": 1.9988352147937735e-06, + "loss": 0.5167, + "num_input_tokens_seen": 5620352, + "step": 1780 + }, + { + "epoch": 0.11426925292874976, + "grad_norm": 44.76283645629883, + "learning_rate": 1.99878067650621e-06, + "loss": 0.5382, + "num_input_tokens_seen": 5636544, + "step": 1785 + }, + { + "epoch": 0.11458933486972665, + "grad_norm": 38.91508102416992, + "learning_rate": 1.998724891229245e-06, + "loss": 0.5438, + "num_input_tokens_seen": 5652672, + "step": 1790 + }, + { + "epoch": 0.11490941681070355, + "grad_norm": 42.947147369384766, + "learning_rate": 1.998667859032527e-06, + "loss": 0.4956, + "num_input_tokens_seen": 5668224, + "step": 1795 + }, + { + "epoch": 0.11522949875168043, + "grad_norm": 21.492956161499023, + "learning_rate": 1.9986095799872613e-06, + "loss": 0.4506, + "num_input_tokens_seen": 5684480, + "step": 1800 + }, + { + "epoch": 0.11554958069265732, + "grad_norm": 65.54640197753906, + "learning_rate": 1.99855005416621e-06, + "loss": 0.472, + "num_input_tokens_seen": 5700864, + "step": 1805 + }, + { + "epoch": 0.11586966263363421, + "grad_norm": 40.5084114074707, + "learning_rate": 1.998489281643692e-06, + "loss": 0.5965, + "num_input_tokens_seen": 5716224, + "step": 1810 + }, + { + "epoch": 0.1161897445746111, + "grad_norm": 29.857545852661133, + "learning_rate": 1.998427262495582e-06, + "loss": 0.4977, + "num_input_tokens_seen": 5733056, + "step": 1815 + }, + { + "epoch": 0.11650982651558799, + "grad_norm": 36.56293487548828, + "learning_rate": 1.9983639967993124e-06, + "loss": 0.6683, + "num_input_tokens_seen": 5749120, + "step": 1820 + }, + { + "epoch": 0.11682990845656488, + "grad_norm": 30.559627532958984, + "learning_rate": 1.99829948463387e-06, + "loss": 0.7297, + "num_input_tokens_seen": 5763968, + "step": 1825 + }, + { + "epoch": 0.11714999039754177, + "grad_norm": 31.007530212402344, + "learning_rate": 1.9982337260798e-06, + "loss": 0.543, + "num_input_tokens_seen": 5779520, + "step": 1830 + }, + { + "epoch": 0.11747007233851867, + "grad_norm": 36.148040771484375, + "learning_rate": 1.998166721219203e-06, + "loss": 0.5856, + "num_input_tokens_seen": 5798848, + "step": 1835 + }, + { + "epoch": 0.11779015427949555, + "grad_norm": 31.41288185119629, + "learning_rate": 1.9980984701357338e-06, + "loss": 0.5155, + "num_input_tokens_seen": 5813952, + "step": 1840 + }, + { + "epoch": 0.11811023622047244, + "grad_norm": 36.17179489135742, + "learning_rate": 1.998028972914606e-06, + "loss": 0.4362, + "num_input_tokens_seen": 5830016, + "step": 1845 + }, + { + "epoch": 0.11843031816144933, + "grad_norm": 38.044334411621094, + "learning_rate": 1.9979582296425877e-06, + "loss": 0.5893, + "num_input_tokens_seen": 5845312, + "step": 1850 + }, + { + "epoch": 0.11875040010242623, + "grad_norm": 22.015993118286133, + "learning_rate": 1.9978862404080022e-06, + "loss": 0.5851, + "num_input_tokens_seen": 5860672, + "step": 1855 + }, + { + "epoch": 0.1190704820434031, + "grad_norm": 34.7830696105957, + "learning_rate": 1.9978130053007295e-06, + "loss": 0.5376, + "num_input_tokens_seen": 5875776, + "step": 1860 + }, + { + "epoch": 0.11939056398438, + "grad_norm": 37.767024993896484, + "learning_rate": 1.9977385244122034e-06, + "loss": 0.4319, + "num_input_tokens_seen": 5891200, + "step": 1865 + }, + { + "epoch": 0.11971064592535689, + "grad_norm": 35.13771438598633, + "learning_rate": 1.997662797835415e-06, + "loss": 0.4821, + "num_input_tokens_seen": 5907008, + "step": 1870 + }, + { + "epoch": 0.12003072786633379, + "grad_norm": 34.38051986694336, + "learning_rate": 1.9975858256649097e-06, + "loss": 0.4645, + "num_input_tokens_seen": 5923264, + "step": 1875 + }, + { + "epoch": 0.12035080980731067, + "grad_norm": 52.10721206665039, + "learning_rate": 1.997507607996788e-06, + "loss": 0.4911, + "num_input_tokens_seen": 5939648, + "step": 1880 + }, + { + "epoch": 0.12067089174828756, + "grad_norm": 25.434171676635742, + "learning_rate": 1.997428144928706e-06, + "loss": 0.4557, + "num_input_tokens_seen": 5955520, + "step": 1885 + }, + { + "epoch": 0.12099097368926445, + "grad_norm": 41.22515106201172, + "learning_rate": 1.9973474365598736e-06, + "loss": 0.5237, + "num_input_tokens_seen": 5971072, + "step": 1890 + }, + { + "epoch": 0.12131105563024135, + "grad_norm": 40.53886413574219, + "learning_rate": 1.9972654829910568e-06, + "loss": 0.5787, + "num_input_tokens_seen": 5987264, + "step": 1895 + }, + { + "epoch": 0.12163113757121823, + "grad_norm": 53.25776672363281, + "learning_rate": 1.9971822843245748e-06, + "loss": 0.6193, + "num_input_tokens_seen": 6002880, + "step": 1900 + }, + { + "epoch": 0.12195121951219512, + "grad_norm": 40.23493957519531, + "learning_rate": 1.997097840664303e-06, + "loss": 0.5277, + "num_input_tokens_seen": 6019520, + "step": 1905 + }, + { + "epoch": 0.12227130145317201, + "grad_norm": 64.40421295166016, + "learning_rate": 1.99701215211567e-06, + "loss": 0.5641, + "num_input_tokens_seen": 6035904, + "step": 1910 + }, + { + "epoch": 0.1225913833941489, + "grad_norm": 28.99512481689453, + "learning_rate": 1.9969252187856587e-06, + "loss": 0.6009, + "num_input_tokens_seen": 6050816, + "step": 1915 + }, + { + "epoch": 0.12291146533512579, + "grad_norm": 26.199125289916992, + "learning_rate": 1.9968370407828065e-06, + "loss": 0.4204, + "num_input_tokens_seen": 6065920, + "step": 1920 + }, + { + "epoch": 0.12323154727610268, + "grad_norm": 25.35918426513672, + "learning_rate": 1.996747618217205e-06, + "loss": 0.5962, + "num_input_tokens_seen": 6081728, + "step": 1925 + }, + { + "epoch": 0.12355162921707957, + "grad_norm": 27.474023818969727, + "learning_rate": 1.9966569512004987e-06, + "loss": 0.4945, + "num_input_tokens_seen": 6097472, + "step": 1930 + }, + { + "epoch": 0.12387171115805647, + "grad_norm": 32.64793395996094, + "learning_rate": 1.996565039845887e-06, + "loss": 0.5101, + "num_input_tokens_seen": 6113152, + "step": 1935 + }, + { + "epoch": 0.12419179309903335, + "grad_norm": 47.92166519165039, + "learning_rate": 1.996471884268122e-06, + "loss": 0.614, + "num_input_tokens_seen": 6129408, + "step": 1940 + }, + { + "epoch": 0.12451187504001024, + "grad_norm": 20.421428680419922, + "learning_rate": 1.9963774845835097e-06, + "loss": 0.545, + "num_input_tokens_seen": 6144896, + "step": 1945 + }, + { + "epoch": 0.12483195698098713, + "grad_norm": 50.85639572143555, + "learning_rate": 1.996281840909909e-06, + "loss": 0.5868, + "num_input_tokens_seen": 6160256, + "step": 1950 + }, + { + "epoch": 0.12515203892196403, + "grad_norm": 36.815921783447266, + "learning_rate": 1.9961849533667322e-06, + "loss": 0.6354, + "num_input_tokens_seen": 6175104, + "step": 1955 + }, + { + "epoch": 0.1254721208629409, + "grad_norm": 30.276325225830078, + "learning_rate": 1.9960868220749447e-06, + "loss": 0.5185, + "num_input_tokens_seen": 6190272, + "step": 1960 + }, + { + "epoch": 0.1257922028039178, + "grad_norm": 39.995033264160156, + "learning_rate": 1.9959874471570644e-06, + "loss": 0.5855, + "num_input_tokens_seen": 6205952, + "step": 1965 + }, + { + "epoch": 0.1261122847448947, + "grad_norm": 44.66065216064453, + "learning_rate": 1.9958868287371625e-06, + "loss": 0.56, + "num_input_tokens_seen": 6222592, + "step": 1970 + }, + { + "epoch": 0.12643236668587157, + "grad_norm": 37.10478591918945, + "learning_rate": 1.9957849669408617e-06, + "loss": 0.4803, + "num_input_tokens_seen": 6237696, + "step": 1975 + }, + { + "epoch": 0.12675244862684848, + "grad_norm": 53.23179244995117, + "learning_rate": 1.995681861895338e-06, + "loss": 0.4858, + "num_input_tokens_seen": 6254080, + "step": 1980 + }, + { + "epoch": 0.12707253056782536, + "grad_norm": 27.040605545043945, + "learning_rate": 1.9955775137293187e-06, + "loss": 0.5741, + "num_input_tokens_seen": 6270016, + "step": 1985 + }, + { + "epoch": 0.12739261250880227, + "grad_norm": 50.933433532714844, + "learning_rate": 1.9954719225730845e-06, + "loss": 0.6124, + "num_input_tokens_seen": 6285184, + "step": 1990 + }, + { + "epoch": 0.12771269444977915, + "grad_norm": 47.980018615722656, + "learning_rate": 1.9953650885584666e-06, + "loss": 0.4774, + "num_input_tokens_seen": 6300992, + "step": 1995 + }, + { + "epoch": 0.12803277639075603, + "grad_norm": 15.962865829467773, + "learning_rate": 1.995257011818849e-06, + "loss": 0.5445, + "num_input_tokens_seen": 6315392, + "step": 2000 + }, + { + "epoch": 0.12835285833173293, + "grad_norm": 38.00539779663086, + "learning_rate": 1.9951476924891666e-06, + "loss": 0.4739, + "num_input_tokens_seen": 6331136, + "step": 2005 + }, + { + "epoch": 0.1286729402727098, + "grad_norm": 33.02660369873047, + "learning_rate": 1.9950371307059056e-06, + "loss": 0.5553, + "num_input_tokens_seen": 6347584, + "step": 2010 + }, + { + "epoch": 0.1289930222136867, + "grad_norm": 55.436187744140625, + "learning_rate": 1.9949253266071036e-06, + "loss": 0.5728, + "num_input_tokens_seen": 6362560, + "step": 2015 + }, + { + "epoch": 0.1293131041546636, + "grad_norm": 28.438800811767578, + "learning_rate": 1.9948122803323503e-06, + "loss": 0.5075, + "num_input_tokens_seen": 6378304, + "step": 2020 + }, + { + "epoch": 0.12963318609564048, + "grad_norm": 39.819091796875, + "learning_rate": 1.9946979920227844e-06, + "loss": 0.5147, + "num_input_tokens_seen": 6393280, + "step": 2025 + }, + { + "epoch": 0.1299532680366174, + "grad_norm": 61.551517486572266, + "learning_rate": 1.994582461821096e-06, + "loss": 0.5251, + "num_input_tokens_seen": 6409472, + "step": 2030 + }, + { + "epoch": 0.13027334997759427, + "grad_norm": 71.56808471679688, + "learning_rate": 1.9944656898715267e-06, + "loss": 0.7157, + "num_input_tokens_seen": 6424960, + "step": 2035 + }, + { + "epoch": 0.13059343191857115, + "grad_norm": 31.08255386352539, + "learning_rate": 1.994347676319867e-06, + "loss": 0.6057, + "num_input_tokens_seen": 6440000, + "step": 2040 + }, + { + "epoch": 0.13091351385954805, + "grad_norm": 24.44256019592285, + "learning_rate": 1.994228421313459e-06, + "loss": 0.453, + "num_input_tokens_seen": 6457600, + "step": 2045 + }, + { + "epoch": 0.13123359580052493, + "grad_norm": 39.8853759765625, + "learning_rate": 1.994107925001193e-06, + "loss": 0.5143, + "num_input_tokens_seen": 6473088, + "step": 2050 + }, + { + "epoch": 0.1315536777415018, + "grad_norm": 51.965187072753906, + "learning_rate": 1.9939861875335108e-06, + "loss": 0.6013, + "num_input_tokens_seen": 6487680, + "step": 2055 + }, + { + "epoch": 0.13187375968247872, + "grad_norm": 35.12892532348633, + "learning_rate": 1.9938632090624025e-06, + "loss": 0.4831, + "num_input_tokens_seen": 6503296, + "step": 2060 + }, + { + "epoch": 0.1321938416234556, + "grad_norm": 14.63175106048584, + "learning_rate": 1.9937389897414087e-06, + "loss": 0.5363, + "num_input_tokens_seen": 6518912, + "step": 2065 + }, + { + "epoch": 0.1325139235644325, + "grad_norm": 43.0014533996582, + "learning_rate": 1.993613529725618e-06, + "loss": 0.5631, + "num_input_tokens_seen": 6534784, + "step": 2070 + }, + { + "epoch": 0.13283400550540939, + "grad_norm": 51.7596435546875, + "learning_rate": 1.99348682917167e-06, + "loss": 0.5248, + "num_input_tokens_seen": 6550528, + "step": 2075 + }, + { + "epoch": 0.13315408744638627, + "grad_norm": 34.12824630737305, + "learning_rate": 1.99335888823775e-06, + "loss": 0.5344, + "num_input_tokens_seen": 6566144, + "step": 2080 + }, + { + "epoch": 0.13347416938736317, + "grad_norm": 51.184452056884766, + "learning_rate": 1.993229707083595e-06, + "loss": 0.5605, + "num_input_tokens_seen": 6583872, + "step": 2085 + }, + { + "epoch": 0.13379425132834005, + "grad_norm": 19.79715919494629, + "learning_rate": 1.993099285870489e-06, + "loss": 0.4144, + "num_input_tokens_seen": 6602304, + "step": 2090 + }, + { + "epoch": 0.13411433326931693, + "grad_norm": 41.58517074584961, + "learning_rate": 1.992967624761264e-06, + "loss": 0.4607, + "num_input_tokens_seen": 6618112, + "step": 2095 + }, + { + "epoch": 0.13443441521029384, + "grad_norm": 47.04132080078125, + "learning_rate": 1.9928347239203014e-06, + "loss": 0.6174, + "num_input_tokens_seen": 6635584, + "step": 2100 + }, + { + "epoch": 0.13475449715127072, + "grad_norm": 34.34235763549805, + "learning_rate": 1.9927005835135282e-06, + "loss": 0.5339, + "num_input_tokens_seen": 6653568, + "step": 2105 + }, + { + "epoch": 0.13507457909224763, + "grad_norm": 30.708681106567383, + "learning_rate": 1.9925652037084214e-06, + "loss": 0.4604, + "num_input_tokens_seen": 6668864, + "step": 2110 + }, + { + "epoch": 0.1353946610332245, + "grad_norm": 25.8023738861084, + "learning_rate": 1.9924285846740037e-06, + "loss": 0.4852, + "num_input_tokens_seen": 6684416, + "step": 2115 + }, + { + "epoch": 0.13571474297420139, + "grad_norm": 49.56015396118164, + "learning_rate": 1.9922907265808452e-06, + "loss": 0.5927, + "num_input_tokens_seen": 6699392, + "step": 2120 + }, + { + "epoch": 0.1360348249151783, + "grad_norm": 43.24879455566406, + "learning_rate": 1.9921516296010643e-06, + "loss": 0.5477, + "num_input_tokens_seen": 6714560, + "step": 2125 + }, + { + "epoch": 0.13635490685615517, + "grad_norm": 50.094120025634766, + "learning_rate": 1.9920112939083246e-06, + "loss": 0.5584, + "num_input_tokens_seen": 6729920, + "step": 2130 + }, + { + "epoch": 0.13667498879713205, + "grad_norm": 27.34825897216797, + "learning_rate": 1.9918697196778367e-06, + "loss": 0.5555, + "num_input_tokens_seen": 6744768, + "step": 2135 + }, + { + "epoch": 0.13699507073810896, + "grad_norm": 26.479101181030273, + "learning_rate": 1.9917269070863578e-06, + "loss": 0.4607, + "num_input_tokens_seen": 6759680, + "step": 2140 + }, + { + "epoch": 0.13731515267908584, + "grad_norm": 35.83186340332031, + "learning_rate": 1.9915828563121915e-06, + "loss": 0.5094, + "num_input_tokens_seen": 6775168, + "step": 2145 + }, + { + "epoch": 0.13763523462006275, + "grad_norm": 43.06388473510742, + "learning_rate": 1.9914375675351865e-06, + "loss": 0.5364, + "num_input_tokens_seen": 6791296, + "step": 2150 + }, + { + "epoch": 0.13795531656103963, + "grad_norm": 18.07638168334961, + "learning_rate": 1.991291040936738e-06, + "loss": 0.43, + "num_input_tokens_seen": 6808640, + "step": 2155 + }, + { + "epoch": 0.1382753985020165, + "grad_norm": 41.67695236206055, + "learning_rate": 1.9911432766997857e-06, + "loss": 0.6627, + "num_input_tokens_seen": 6824064, + "step": 2160 + }, + { + "epoch": 0.1385954804429934, + "grad_norm": 56.66114807128906, + "learning_rate": 1.990994275008815e-06, + "loss": 0.4426, + "num_input_tokens_seen": 6839872, + "step": 2165 + }, + { + "epoch": 0.1389155623839703, + "grad_norm": 58.23060989379883, + "learning_rate": 1.9908440360498565e-06, + "loss": 0.5081, + "num_input_tokens_seen": 6855744, + "step": 2170 + }, + { + "epoch": 0.1392356443249472, + "grad_norm": 45.5991096496582, + "learning_rate": 1.990692560010485e-06, + "loss": 0.5566, + "num_input_tokens_seen": 6869632, + "step": 2175 + }, + { + "epoch": 0.13955572626592408, + "grad_norm": 27.95288848876953, + "learning_rate": 1.9905398470798206e-06, + "loss": 0.448, + "num_input_tokens_seen": 6885696, + "step": 2180 + }, + { + "epoch": 0.13987580820690096, + "grad_norm": 29.043428421020508, + "learning_rate": 1.990385897448527e-06, + "loss": 0.3634, + "num_input_tokens_seen": 6901504, + "step": 2185 + }, + { + "epoch": 0.14019589014787787, + "grad_norm": 36.92293167114258, + "learning_rate": 1.9902307113088114e-06, + "loss": 0.5822, + "num_input_tokens_seen": 6916480, + "step": 2190 + }, + { + "epoch": 0.14051597208885475, + "grad_norm": 49.32163619995117, + "learning_rate": 1.9900742888544264e-06, + "loss": 0.4818, + "num_input_tokens_seen": 6932416, + "step": 2195 + }, + { + "epoch": 0.14083605402983163, + "grad_norm": 46.43427658081055, + "learning_rate": 1.989916630280667e-06, + "loss": 0.534, + "num_input_tokens_seen": 6948992, + "step": 2200 + }, + { + "epoch": 0.14115613597080853, + "grad_norm": 57.13213348388672, + "learning_rate": 1.989757735784372e-06, + "loss": 0.4636, + "num_input_tokens_seen": 6964416, + "step": 2205 + }, + { + "epoch": 0.1414762179117854, + "grad_norm": 39.992496490478516, + "learning_rate": 1.989597605563923e-06, + "loss": 0.4218, + "num_input_tokens_seen": 6980544, + "step": 2210 + }, + { + "epoch": 0.14179629985276232, + "grad_norm": 29.56856918334961, + "learning_rate": 1.9894362398192437e-06, + "loss": 0.5658, + "num_input_tokens_seen": 6997440, + "step": 2215 + }, + { + "epoch": 0.1421163817937392, + "grad_norm": 22.893774032592773, + "learning_rate": 1.9892736387518023e-06, + "loss": 0.4163, + "num_input_tokens_seen": 7012672, + "step": 2220 + }, + { + "epoch": 0.14243646373471608, + "grad_norm": 58.755828857421875, + "learning_rate": 1.9891098025646075e-06, + "loss": 0.4773, + "num_input_tokens_seen": 7027648, + "step": 2225 + }, + { + "epoch": 0.142756545675693, + "grad_norm": 28.471839904785156, + "learning_rate": 1.9889447314622105e-06, + "loss": 0.5303, + "num_input_tokens_seen": 7043200, + "step": 2230 + }, + { + "epoch": 0.14307662761666987, + "grad_norm": 41.83107376098633, + "learning_rate": 1.9887784256507046e-06, + "loss": 0.7152, + "num_input_tokens_seen": 7058688, + "step": 2235 + }, + { + "epoch": 0.14339670955764675, + "grad_norm": 32.69862365722656, + "learning_rate": 1.988610885337725e-06, + "loss": 0.6679, + "num_input_tokens_seen": 7074048, + "step": 2240 + }, + { + "epoch": 0.14371679149862365, + "grad_norm": 32.3195686340332, + "learning_rate": 1.9884421107324476e-06, + "loss": 0.5261, + "num_input_tokens_seen": 7089792, + "step": 2245 + }, + { + "epoch": 0.14403687343960053, + "grad_norm": 39.98912811279297, + "learning_rate": 1.9882721020455893e-06, + "loss": 0.4755, + "num_input_tokens_seen": 7104640, + "step": 2250 + }, + { + "epoch": 0.14435695538057744, + "grad_norm": 31.75237464904785, + "learning_rate": 1.988100859489408e-06, + "loss": 0.5019, + "num_input_tokens_seen": 7120064, + "step": 2255 + }, + { + "epoch": 0.14467703732155432, + "grad_norm": 34.01973342895508, + "learning_rate": 1.9879283832777017e-06, + "loss": 0.4754, + "num_input_tokens_seen": 7135232, + "step": 2260 + }, + { + "epoch": 0.1449971192625312, + "grad_norm": 48.68187713623047, + "learning_rate": 1.9877546736258096e-06, + "loss": 0.5075, + "num_input_tokens_seen": 7149632, + "step": 2265 + }, + { + "epoch": 0.1453172012035081, + "grad_norm": 38.213932037353516, + "learning_rate": 1.98757973075061e-06, + "loss": 0.4107, + "num_input_tokens_seen": 7164352, + "step": 2270 + }, + { + "epoch": 0.14563728314448499, + "grad_norm": 35.81578063964844, + "learning_rate": 1.987403554870521e-06, + "loss": 0.5188, + "num_input_tokens_seen": 7179776, + "step": 2275 + }, + { + "epoch": 0.14595736508546187, + "grad_norm": 36.673587799072266, + "learning_rate": 1.9872261462055003e-06, + "loss": 0.4212, + "num_input_tokens_seen": 7194240, + "step": 2280 + }, + { + "epoch": 0.14627744702643877, + "grad_norm": 21.095298767089844, + "learning_rate": 1.987047504977045e-06, + "loss": 0.4335, + "num_input_tokens_seen": 7209472, + "step": 2285 + }, + { + "epoch": 0.14659752896741565, + "grad_norm": 50.36029052734375, + "learning_rate": 1.9868676314081902e-06, + "loss": 0.414, + "num_input_tokens_seen": 7225088, + "step": 2290 + }, + { + "epoch": 0.14691761090839256, + "grad_norm": 74.21929931640625, + "learning_rate": 1.9866865257235107e-06, + "loss": 0.6901, + "num_input_tokens_seen": 7240704, + "step": 2295 + }, + { + "epoch": 0.14723769284936944, + "grad_norm": 29.289196014404297, + "learning_rate": 1.9865041881491188e-06, + "loss": 0.4177, + "num_input_tokens_seen": 7256000, + "step": 2300 + }, + { + "epoch": 0.14755777479034632, + "grad_norm": 50.457210540771484, + "learning_rate": 1.9863206189126653e-06, + "loss": 0.6016, + "num_input_tokens_seen": 7270336, + "step": 2305 + }, + { + "epoch": 0.14787785673132323, + "grad_norm": 49.66255187988281, + "learning_rate": 1.9861358182433382e-06, + "loss": 0.5612, + "num_input_tokens_seen": 7285440, + "step": 2310 + }, + { + "epoch": 0.1481979386723001, + "grad_norm": 37.03299331665039, + "learning_rate": 1.9859497863718634e-06, + "loss": 0.4711, + "num_input_tokens_seen": 7301120, + "step": 2315 + }, + { + "epoch": 0.14851802061327699, + "grad_norm": 22.66673469543457, + "learning_rate": 1.985762523530504e-06, + "loss": 0.5204, + "num_input_tokens_seen": 7316416, + "step": 2320 + }, + { + "epoch": 0.1488381025542539, + "grad_norm": 27.409502029418945, + "learning_rate": 1.98557402995306e-06, + "loss": 0.5051, + "num_input_tokens_seen": 7332160, + "step": 2325 + }, + { + "epoch": 0.14915818449523077, + "grad_norm": 40.534942626953125, + "learning_rate": 1.985384305874868e-06, + "loss": 0.7069, + "num_input_tokens_seen": 7347776, + "step": 2330 + }, + { + "epoch": 0.14947826643620768, + "grad_norm": 33.9571647644043, + "learning_rate": 1.9851933515328e-06, + "loss": 0.5467, + "num_input_tokens_seen": 7363200, + "step": 2335 + }, + { + "epoch": 0.14979834837718456, + "grad_norm": 40.87738037109375, + "learning_rate": 1.985001167165265e-06, + "loss": 0.4699, + "num_input_tokens_seen": 7378752, + "step": 2340 + }, + { + "epoch": 0.15011843031816144, + "grad_norm": 36.36539840698242, + "learning_rate": 1.984807753012208e-06, + "loss": 0.5165, + "num_input_tokens_seen": 7393984, + "step": 2345 + }, + { + "epoch": 0.15018244670635683, + "eval_loss": 0.5076366662979126, + "eval_runtime": 49.1845, + "eval_samples_per_second": 282.325, + "eval_steps_per_second": 35.296, + "num_input_tokens_seen": 7397056, + "step": 2346 + }, + { + "epoch": 0.15043851225913835, + "grad_norm": 25.733684539794922, + "learning_rate": 1.9846131093151086e-06, + "loss": 0.5902, + "num_input_tokens_seen": 7408832, + "step": 2350 + }, + { + "epoch": 0.15075859420011523, + "grad_norm": 16.574737548828125, + "learning_rate": 1.9844172363169808e-06, + "loss": 0.4582, + "num_input_tokens_seen": 7423040, + "step": 2355 + }, + { + "epoch": 0.15107867614109213, + "grad_norm": 53.731632232666016, + "learning_rate": 1.9842201342623756e-06, + "loss": 0.5117, + "num_input_tokens_seen": 7438464, + "step": 2360 + }, + { + "epoch": 0.151398758082069, + "grad_norm": 28.75635528564453, + "learning_rate": 1.9840218033973766e-06, + "loss": 0.5205, + "num_input_tokens_seen": 7453824, + "step": 2365 + }, + { + "epoch": 0.1517188400230459, + "grad_norm": 36.89908981323242, + "learning_rate": 1.9838222439696027e-06, + "loss": 0.5717, + "num_input_tokens_seen": 7469312, + "step": 2370 + }, + { + "epoch": 0.1520389219640228, + "grad_norm": 53.630462646484375, + "learning_rate": 1.9836214562282058e-06, + "loss": 0.7065, + "num_input_tokens_seen": 7485120, + "step": 2375 + }, + { + "epoch": 0.15235900390499968, + "grad_norm": 36.9291877746582, + "learning_rate": 1.9834194404238715e-06, + "loss": 0.4971, + "num_input_tokens_seen": 7500416, + "step": 2380 + }, + { + "epoch": 0.15267908584597656, + "grad_norm": 41.09784698486328, + "learning_rate": 1.9832161968088193e-06, + "loss": 0.4125, + "num_input_tokens_seen": 7516672, + "step": 2385 + }, + { + "epoch": 0.15299916778695347, + "grad_norm": 53.901432037353516, + "learning_rate": 1.9830117256368015e-06, + "loss": 0.4764, + "num_input_tokens_seen": 7532800, + "step": 2390 + }, + { + "epoch": 0.15331924972793035, + "grad_norm": 38.6842041015625, + "learning_rate": 1.982806027163102e-06, + "loss": 0.4924, + "num_input_tokens_seen": 7547776, + "step": 2395 + }, + { + "epoch": 0.15363933166890725, + "grad_norm": 28.168846130371094, + "learning_rate": 1.9825991016445386e-06, + "loss": 0.5579, + "num_input_tokens_seen": 7562496, + "step": 2400 + }, + { + "epoch": 0.15395941360988413, + "grad_norm": 41.71428298950195, + "learning_rate": 1.9823909493394594e-06, + "loss": 0.5286, + "num_input_tokens_seen": 7577920, + "step": 2405 + }, + { + "epoch": 0.154279495550861, + "grad_norm": 41.26945114135742, + "learning_rate": 1.9821815705077455e-06, + "loss": 0.5331, + "num_input_tokens_seen": 7593216, + "step": 2410 + }, + { + "epoch": 0.15459957749183792, + "grad_norm": 63.113800048828125, + "learning_rate": 1.9819709654108087e-06, + "loss": 0.5768, + "num_input_tokens_seen": 7608192, + "step": 2415 + }, + { + "epoch": 0.1549196594328148, + "grad_norm": 51.21147537231445, + "learning_rate": 1.981759134311592e-06, + "loss": 0.4652, + "num_input_tokens_seen": 7624448, + "step": 2420 + }, + { + "epoch": 0.15523974137379168, + "grad_norm": 45.952392578125, + "learning_rate": 1.981546077474569e-06, + "loss": 0.4847, + "num_input_tokens_seen": 7640192, + "step": 2425 + }, + { + "epoch": 0.15555982331476859, + "grad_norm": 33.45967483520508, + "learning_rate": 1.981331795165744e-06, + "loss": 0.5143, + "num_input_tokens_seen": 7654848, + "step": 2430 + }, + { + "epoch": 0.15587990525574547, + "grad_norm": 64.05781555175781, + "learning_rate": 1.9811162876526498e-06, + "loss": 0.6067, + "num_input_tokens_seen": 7670848, + "step": 2435 + }, + { + "epoch": 0.15619998719672237, + "grad_norm": 28.034521102905273, + "learning_rate": 1.9808995552043515e-06, + "loss": 0.6387, + "num_input_tokens_seen": 7686016, + "step": 2440 + }, + { + "epoch": 0.15652006913769925, + "grad_norm": 33.880714416503906, + "learning_rate": 1.9806815980914413e-06, + "loss": 0.5478, + "num_input_tokens_seen": 7701760, + "step": 2445 + }, + { + "epoch": 0.15684015107867613, + "grad_norm": 35.8829231262207, + "learning_rate": 1.9804624165860417e-06, + "loss": 0.5624, + "num_input_tokens_seen": 7717760, + "step": 2450 + }, + { + "epoch": 0.15716023301965304, + "grad_norm": 17.44162368774414, + "learning_rate": 1.9802420109618028e-06, + "loss": 0.3852, + "num_input_tokens_seen": 7733376, + "step": 2455 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 15.591707229614258, + "learning_rate": 1.980020381493904e-06, + "loss": 0.4984, + "num_input_tokens_seen": 7750464, + "step": 2460 + }, + { + "epoch": 0.1578003969016068, + "grad_norm": 38.971927642822266, + "learning_rate": 1.979797528459052e-06, + "loss": 0.4942, + "num_input_tokens_seen": 7768576, + "step": 2465 + }, + { + "epoch": 0.1581204788425837, + "grad_norm": 44.95268249511719, + "learning_rate": 1.979573452135482e-06, + "loss": 0.5334, + "num_input_tokens_seen": 7784256, + "step": 2470 + }, + { + "epoch": 0.15844056078356059, + "grad_norm": 33.37703323364258, + "learning_rate": 1.979348152802955e-06, + "loss": 0.3186, + "num_input_tokens_seen": 7799232, + "step": 2475 + }, + { + "epoch": 0.1587606427245375, + "grad_norm": 48.24396896362305, + "learning_rate": 1.979121630742761e-06, + "loss": 0.592, + "num_input_tokens_seen": 7815040, + "step": 2480 + }, + { + "epoch": 0.15908072466551437, + "grad_norm": 18.306211471557617, + "learning_rate": 1.9788938862377146e-06, + "loss": 0.4479, + "num_input_tokens_seen": 7830400, + "step": 2485 + }, + { + "epoch": 0.15940080660649125, + "grad_norm": 30.219003677368164, + "learning_rate": 1.9786649195721577e-06, + "loss": 0.4818, + "num_input_tokens_seen": 7846336, + "step": 2490 + }, + { + "epoch": 0.15972088854746816, + "grad_norm": 42.44570541381836, + "learning_rate": 1.978434731031958e-06, + "loss": 0.6323, + "num_input_tokens_seen": 7862528, + "step": 2495 + }, + { + "epoch": 0.16004097048844504, + "grad_norm": 36.15270233154297, + "learning_rate": 1.9782033209045085e-06, + "loss": 0.4541, + "num_input_tokens_seen": 7880000, + "step": 2500 + }, + { + "epoch": 0.16036105242942192, + "grad_norm": 18.829133987426758, + "learning_rate": 1.977970689478727e-06, + "loss": 0.4053, + "num_input_tokens_seen": 7895296, + "step": 2505 + }, + { + "epoch": 0.16068113437039883, + "grad_norm": 54.07673645019531, + "learning_rate": 1.9777368370450577e-06, + "loss": 0.5884, + "num_input_tokens_seen": 7911104, + "step": 2510 + }, + { + "epoch": 0.1610012163113757, + "grad_norm": 31.81148910522461, + "learning_rate": 1.9775017638954674e-06, + "loss": 0.521, + "num_input_tokens_seen": 7925952, + "step": 2515 + }, + { + "epoch": 0.1613212982523526, + "grad_norm": 31.94769287109375, + "learning_rate": 1.9772654703234476e-06, + "loss": 0.5943, + "num_input_tokens_seen": 7940928, + "step": 2520 + }, + { + "epoch": 0.1616413801933295, + "grad_norm": 43.36374282836914, + "learning_rate": 1.977027956624014e-06, + "loss": 0.5665, + "num_input_tokens_seen": 7955200, + "step": 2525 + }, + { + "epoch": 0.16196146213430637, + "grad_norm": 40.16360855102539, + "learning_rate": 1.9767892230937046e-06, + "loss": 0.5819, + "num_input_tokens_seen": 7970944, + "step": 2530 + }, + { + "epoch": 0.16228154407528328, + "grad_norm": 57.72364044189453, + "learning_rate": 1.976549270030581e-06, + "loss": 0.4311, + "num_input_tokens_seen": 7985856, + "step": 2535 + }, + { + "epoch": 0.16260162601626016, + "grad_norm": 38.951045989990234, + "learning_rate": 1.9763080977342286e-06, + "loss": 0.4678, + "num_input_tokens_seen": 8001088, + "step": 2540 + }, + { + "epoch": 0.16292170795723707, + "grad_norm": 41.949275970458984, + "learning_rate": 1.9760657065057527e-06, + "loss": 0.4965, + "num_input_tokens_seen": 8017856, + "step": 2545 + }, + { + "epoch": 0.16324178989821395, + "grad_norm": 40.579071044921875, + "learning_rate": 1.975822096647782e-06, + "loss": 0.4527, + "num_input_tokens_seen": 8033792, + "step": 2550 + }, + { + "epoch": 0.16356187183919083, + "grad_norm": 38.93642807006836, + "learning_rate": 1.975577268464466e-06, + "loss": 0.4821, + "num_input_tokens_seen": 8048256, + "step": 2555 + }, + { + "epoch": 0.16388195378016773, + "grad_norm": 30.569536209106445, + "learning_rate": 1.9753312222614765e-06, + "loss": 0.5626, + "num_input_tokens_seen": 8063680, + "step": 2560 + }, + { + "epoch": 0.1642020357211446, + "grad_norm": 53.63691329956055, + "learning_rate": 1.9750839583460036e-06, + "loss": 0.4853, + "num_input_tokens_seen": 8079744, + "step": 2565 + }, + { + "epoch": 0.1645221176621215, + "grad_norm": 32.5906982421875, + "learning_rate": 1.9748354770267603e-06, + "loss": 0.502, + "num_input_tokens_seen": 8094656, + "step": 2570 + }, + { + "epoch": 0.1648421996030984, + "grad_norm": 24.61626434326172, + "learning_rate": 1.9745857786139777e-06, + "loss": 0.5116, + "num_input_tokens_seen": 8110528, + "step": 2575 + }, + { + "epoch": 0.16516228154407528, + "grad_norm": 48.395931243896484, + "learning_rate": 1.974334863419408e-06, + "loss": 0.6028, + "num_input_tokens_seen": 8126720, + "step": 2580 + }, + { + "epoch": 0.1654823634850522, + "grad_norm": 34.782806396484375, + "learning_rate": 1.9740827317563212e-06, + "loss": 0.518, + "num_input_tokens_seen": 8141312, + "step": 2585 + }, + { + "epoch": 0.16580244542602907, + "grad_norm": 35.59202575683594, + "learning_rate": 1.973829383939507e-06, + "loss": 0.4889, + "num_input_tokens_seen": 8156736, + "step": 2590 + }, + { + "epoch": 0.16612252736700595, + "grad_norm": 49.05874252319336, + "learning_rate": 1.973574820285273e-06, + "loss": 0.4987, + "num_input_tokens_seen": 8172480, + "step": 2595 + }, + { + "epoch": 0.16644260930798285, + "grad_norm": 39.507137298583984, + "learning_rate": 1.9733190411114443e-06, + "loss": 0.5702, + "num_input_tokens_seen": 8188224, + "step": 2600 + }, + { + "epoch": 0.16676269124895973, + "grad_norm": 36.02799987792969, + "learning_rate": 1.9730620467373654e-06, + "loss": 0.438, + "num_input_tokens_seen": 8204352, + "step": 2605 + }, + { + "epoch": 0.1670827731899366, + "grad_norm": 44.20855712890625, + "learning_rate": 1.9728038374838958e-06, + "loss": 0.5744, + "num_input_tokens_seen": 8219328, + "step": 2610 + }, + { + "epoch": 0.16740285513091352, + "grad_norm": 20.6259822845459, + "learning_rate": 1.972544413673413e-06, + "loss": 0.3913, + "num_input_tokens_seen": 8234560, + "step": 2615 + }, + { + "epoch": 0.1677229370718904, + "grad_norm": 28.986614227294922, + "learning_rate": 1.9722837756298108e-06, + "loss": 0.5779, + "num_input_tokens_seen": 8249344, + "step": 2620 + }, + { + "epoch": 0.1680430190128673, + "grad_norm": 53.51920700073242, + "learning_rate": 1.972021923678499e-06, + "loss": 0.5548, + "num_input_tokens_seen": 8265600, + "step": 2625 + }, + { + "epoch": 0.16836310095384419, + "grad_norm": 27.421762466430664, + "learning_rate": 1.971758858146403e-06, + "loss": 0.4861, + "num_input_tokens_seen": 8280384, + "step": 2630 + }, + { + "epoch": 0.16868318289482107, + "grad_norm": 41.67002868652344, + "learning_rate": 1.9714945793619626e-06, + "loss": 0.4897, + "num_input_tokens_seen": 8295744, + "step": 2635 + }, + { + "epoch": 0.16900326483579797, + "grad_norm": 27.60586929321289, + "learning_rate": 1.971229087655133e-06, + "loss": 0.5052, + "num_input_tokens_seen": 8311680, + "step": 2640 + }, + { + "epoch": 0.16932334677677485, + "grad_norm": 29.15129280090332, + "learning_rate": 1.9709623833573842e-06, + "loss": 0.4678, + "num_input_tokens_seen": 8326592, + "step": 2645 + }, + { + "epoch": 0.16964342871775173, + "grad_norm": 54.205875396728516, + "learning_rate": 1.9706944668016994e-06, + "loss": 0.4588, + "num_input_tokens_seen": 8341632, + "step": 2650 + }, + { + "epoch": 0.16996351065872864, + "grad_norm": 38.538326263427734, + "learning_rate": 1.9704253383225756e-06, + "loss": 0.4627, + "num_input_tokens_seen": 8358400, + "step": 2655 + }, + { + "epoch": 0.17028359259970552, + "grad_norm": 33.1207275390625, + "learning_rate": 1.970154998256023e-06, + "loss": 0.4845, + "num_input_tokens_seen": 8374144, + "step": 2660 + }, + { + "epoch": 0.17060367454068243, + "grad_norm": 35.72023010253906, + "learning_rate": 1.9698834469395644e-06, + "loss": 0.4215, + "num_input_tokens_seen": 8389440, + "step": 2665 + }, + { + "epoch": 0.1709237564816593, + "grad_norm": 33.63475036621094, + "learning_rate": 1.969610684712234e-06, + "loss": 0.5408, + "num_input_tokens_seen": 8404672, + "step": 2670 + }, + { + "epoch": 0.17124383842263619, + "grad_norm": 59.44383239746094, + "learning_rate": 1.9693367119145794e-06, + "loss": 0.5508, + "num_input_tokens_seen": 8420096, + "step": 2675 + }, + { + "epoch": 0.1715639203636131, + "grad_norm": 42.37469482421875, + "learning_rate": 1.969061528888659e-06, + "loss": 0.6684, + "num_input_tokens_seen": 8436288, + "step": 2680 + }, + { + "epoch": 0.17188400230458997, + "grad_norm": 23.906444549560547, + "learning_rate": 1.9687851359780415e-06, + "loss": 0.5401, + "num_input_tokens_seen": 8452672, + "step": 2685 + }, + { + "epoch": 0.17220408424556685, + "grad_norm": 19.489620208740234, + "learning_rate": 1.968507533527807e-06, + "loss": 0.4867, + "num_input_tokens_seen": 8469120, + "step": 2690 + }, + { + "epoch": 0.17252416618654376, + "grad_norm": 46.37827682495117, + "learning_rate": 1.9682287218845455e-06, + "loss": 0.4748, + "num_input_tokens_seen": 8484736, + "step": 2695 + }, + { + "epoch": 0.17284424812752064, + "grad_norm": 38.747093200683594, + "learning_rate": 1.967948701396356e-06, + "loss": 0.7367, + "num_input_tokens_seen": 8500480, + "step": 2700 + }, + { + "epoch": 0.17316433006849755, + "grad_norm": 28.16217803955078, + "learning_rate": 1.9676674724128485e-06, + "loss": 0.3977, + "num_input_tokens_seen": 8514624, + "step": 2705 + }, + { + "epoch": 0.17348441200947443, + "grad_norm": 19.507436752319336, + "learning_rate": 1.9673850352851397e-06, + "loss": 0.4543, + "num_input_tokens_seen": 8529664, + "step": 2710 + }, + { + "epoch": 0.1738044939504513, + "grad_norm": 31.663122177124023, + "learning_rate": 1.967101390365856e-06, + "loss": 0.5825, + "num_input_tokens_seen": 8545280, + "step": 2715 + }, + { + "epoch": 0.1741245758914282, + "grad_norm": 29.334657669067383, + "learning_rate": 1.966816538009131e-06, + "loss": 0.492, + "num_input_tokens_seen": 8560384, + "step": 2720 + }, + { + "epoch": 0.1744446578324051, + "grad_norm": 41.919986724853516, + "learning_rate": 1.966530478570607e-06, + "loss": 0.5425, + "num_input_tokens_seen": 8576960, + "step": 2725 + }, + { + "epoch": 0.174764739773382, + "grad_norm": 31.315555572509766, + "learning_rate": 1.9662432124074325e-06, + "loss": 0.4635, + "num_input_tokens_seen": 8592384, + "step": 2730 + }, + { + "epoch": 0.17508482171435888, + "grad_norm": 29.594783782958984, + "learning_rate": 1.965954739878262e-06, + "loss": 0.4836, + "num_input_tokens_seen": 8609024, + "step": 2735 + }, + { + "epoch": 0.17540490365533576, + "grad_norm": 46.86975860595703, + "learning_rate": 1.965665061343257e-06, + "loss": 0.4283, + "num_input_tokens_seen": 8624768, + "step": 2740 + }, + { + "epoch": 0.17572498559631267, + "grad_norm": 25.347562789916992, + "learning_rate": 1.965374177164085e-06, + "loss": 0.4646, + "num_input_tokens_seen": 8640448, + "step": 2745 + }, + { + "epoch": 0.17604506753728955, + "grad_norm": 27.5438232421875, + "learning_rate": 1.9650820877039182e-06, + "loss": 0.5427, + "num_input_tokens_seen": 8655296, + "step": 2750 + }, + { + "epoch": 0.17636514947826643, + "grad_norm": 69.62262725830078, + "learning_rate": 1.9647887933274334e-06, + "loss": 0.4878, + "num_input_tokens_seen": 8671872, + "step": 2755 + }, + { + "epoch": 0.17668523141924333, + "grad_norm": 21.517606735229492, + "learning_rate": 1.9644942944008124e-06, + "loss": 0.4822, + "num_input_tokens_seen": 8687680, + "step": 2760 + }, + { + "epoch": 0.1770053133602202, + "grad_norm": 57.37998962402344, + "learning_rate": 1.96419859129174e-06, + "loss": 0.5914, + "num_input_tokens_seen": 8702912, + "step": 2765 + }, + { + "epoch": 0.17732539530119712, + "grad_norm": 25.293439865112305, + "learning_rate": 1.963901684369406e-06, + "loss": 0.4702, + "num_input_tokens_seen": 8718144, + "step": 2770 + }, + { + "epoch": 0.177645477242174, + "grad_norm": 36.15742874145508, + "learning_rate": 1.9636035740045013e-06, + "loss": 0.4989, + "num_input_tokens_seen": 8732992, + "step": 2775 + }, + { + "epoch": 0.17796555918315088, + "grad_norm": 26.592554092407227, + "learning_rate": 1.9633042605692207e-06, + "loss": 0.6024, + "num_input_tokens_seen": 8749056, + "step": 2780 + }, + { + "epoch": 0.17828564112412779, + "grad_norm": 22.61241912841797, + "learning_rate": 1.9630037444372597e-06, + "loss": 0.4879, + "num_input_tokens_seen": 8765184, + "step": 2785 + }, + { + "epoch": 0.17860572306510467, + "grad_norm": 43.24379348754883, + "learning_rate": 1.9627020259838177e-06, + "loss": 0.4133, + "num_input_tokens_seen": 8780480, + "step": 2790 + }, + { + "epoch": 0.17892580500608155, + "grad_norm": 33.002906799316406, + "learning_rate": 1.9623991055855925e-06, + "loss": 0.5539, + "num_input_tokens_seen": 8796352, + "step": 2795 + }, + { + "epoch": 0.17924588694705845, + "grad_norm": 27.26972770690918, + "learning_rate": 1.962094983620784e-06, + "loss": 0.443, + "num_input_tokens_seen": 8810688, + "step": 2800 + }, + { + "epoch": 0.17956596888803533, + "grad_norm": 49.42767333984375, + "learning_rate": 1.9617896604690925e-06, + "loss": 0.4279, + "num_input_tokens_seen": 8826304, + "step": 2805 + }, + { + "epoch": 0.17988605082901224, + "grad_norm": 22.84317970275879, + "learning_rate": 1.961483136511717e-06, + "loss": 0.4628, + "num_input_tokens_seen": 8841344, + "step": 2810 + }, + { + "epoch": 0.18020613276998912, + "grad_norm": 47.95643997192383, + "learning_rate": 1.9611754121313567e-06, + "loss": 0.6058, + "num_input_tokens_seen": 8857664, + "step": 2815 + }, + { + "epoch": 0.180526214710966, + "grad_norm": 52.1284294128418, + "learning_rate": 1.960866487712209e-06, + "loss": 0.5762, + "num_input_tokens_seen": 8873408, + "step": 2820 + }, + { + "epoch": 0.1808462966519429, + "grad_norm": 31.013389587402344, + "learning_rate": 1.9605563636399695e-06, + "loss": 0.425, + "num_input_tokens_seen": 8889472, + "step": 2825 + }, + { + "epoch": 0.18116637859291979, + "grad_norm": 60.00368118286133, + "learning_rate": 1.9602450403018315e-06, + "loss": 0.5908, + "num_input_tokens_seen": 8904640, + "step": 2830 + }, + { + "epoch": 0.18148646053389667, + "grad_norm": 35.06608200073242, + "learning_rate": 1.9599325180864864e-06, + "loss": 0.4446, + "num_input_tokens_seen": 8919680, + "step": 2835 + }, + { + "epoch": 0.18180654247487357, + "grad_norm": 31.069002151489258, + "learning_rate": 1.9596187973841216e-06, + "loss": 0.4418, + "num_input_tokens_seen": 8935360, + "step": 2840 + }, + { + "epoch": 0.18212662441585045, + "grad_norm": 26.10578727722168, + "learning_rate": 1.959303878586421e-06, + "loss": 0.4892, + "num_input_tokens_seen": 8951552, + "step": 2845 + }, + { + "epoch": 0.18244670635682736, + "grad_norm": 42.628684997558594, + "learning_rate": 1.9589877620865647e-06, + "loss": 0.5694, + "num_input_tokens_seen": 8968576, + "step": 2850 + }, + { + "epoch": 0.18276678829780424, + "grad_norm": 27.467554092407227, + "learning_rate": 1.9586704482792277e-06, + "loss": 0.4559, + "num_input_tokens_seen": 8983744, + "step": 2855 + }, + { + "epoch": 0.18308687023878112, + "grad_norm": 30.344791412353516, + "learning_rate": 1.95835193756058e-06, + "loss": 0.4376, + "num_input_tokens_seen": 8999040, + "step": 2860 + }, + { + "epoch": 0.18340695217975803, + "grad_norm": 37.68637466430664, + "learning_rate": 1.9580322303282858e-06, + "loss": 0.4186, + "num_input_tokens_seen": 9015872, + "step": 2865 + }, + { + "epoch": 0.1837270341207349, + "grad_norm": 26.828548431396484, + "learning_rate": 1.9577113269815038e-06, + "loss": 0.4001, + "num_input_tokens_seen": 9031744, + "step": 2870 + }, + { + "epoch": 0.18404711606171179, + "grad_norm": 34.85321807861328, + "learning_rate": 1.957389227920885e-06, + "loss": 0.5877, + "num_input_tokens_seen": 9047872, + "step": 2875 + }, + { + "epoch": 0.1843671980026887, + "grad_norm": 33.741172790527344, + "learning_rate": 1.957065933548574e-06, + "loss": 0.5101, + "num_input_tokens_seen": 9062976, + "step": 2880 + }, + { + "epoch": 0.18468727994366557, + "grad_norm": 56.83228302001953, + "learning_rate": 1.956741444268208e-06, + "loss": 0.5899, + "num_input_tokens_seen": 9078208, + "step": 2885 + }, + { + "epoch": 0.18500736188464248, + "grad_norm": 30.513900756835938, + "learning_rate": 1.9564157604849154e-06, + "loss": 0.4744, + "num_input_tokens_seen": 9094720, + "step": 2890 + }, + { + "epoch": 0.18532744382561936, + "grad_norm": 28.41360092163086, + "learning_rate": 1.9560888826053163e-06, + "loss": 0.5274, + "num_input_tokens_seen": 9110336, + "step": 2895 + }, + { + "epoch": 0.18564752576659624, + "grad_norm": 25.244827270507812, + "learning_rate": 1.9557608110375212e-06, + "loss": 0.5573, + "num_input_tokens_seen": 9126912, + "step": 2900 + }, + { + "epoch": 0.18596760770757315, + "grad_norm": 26.246530532836914, + "learning_rate": 1.955431546191132e-06, + "loss": 0.549, + "num_input_tokens_seen": 9142400, + "step": 2905 + }, + { + "epoch": 0.18628768964855003, + "grad_norm": 44.32508087158203, + "learning_rate": 1.95510108847724e-06, + "loss": 0.5161, + "num_input_tokens_seen": 9157184, + "step": 2910 + }, + { + "epoch": 0.1866077715895269, + "grad_norm": 28.210281372070312, + "learning_rate": 1.954769438308424e-06, + "loss": 0.5237, + "num_input_tokens_seen": 9173696, + "step": 2915 + }, + { + "epoch": 0.1869278535305038, + "grad_norm": 36.434974670410156, + "learning_rate": 1.954436596098754e-06, + "loss": 0.4992, + "num_input_tokens_seen": 9190080, + "step": 2920 + }, + { + "epoch": 0.1872479354714807, + "grad_norm": 59.13997268676758, + "learning_rate": 1.9541025622637875e-06, + "loss": 0.5761, + "num_input_tokens_seen": 9204352, + "step": 2925 + }, + { + "epoch": 0.1875680174124576, + "grad_norm": 50.34525680541992, + "learning_rate": 1.95376733722057e-06, + "loss": 0.6098, + "num_input_tokens_seen": 9219200, + "step": 2930 + }, + { + "epoch": 0.18788809935343448, + "grad_norm": 33.083404541015625, + "learning_rate": 1.9534309213876337e-06, + "loss": 0.4702, + "num_input_tokens_seen": 9233600, + "step": 2935 + }, + { + "epoch": 0.18820818129441136, + "grad_norm": 40.38674545288086, + "learning_rate": 1.953093315184997e-06, + "loss": 0.4343, + "num_input_tokens_seen": 9249536, + "step": 2940 + }, + { + "epoch": 0.18852826323538827, + "grad_norm": 39.487579345703125, + "learning_rate": 1.952754519034166e-06, + "loss": 0.6391, + "num_input_tokens_seen": 9264256, + "step": 2945 + }, + { + "epoch": 0.18884834517636515, + "grad_norm": 58.533199310302734, + "learning_rate": 1.9524145333581313e-06, + "loss": 0.4487, + "num_input_tokens_seen": 9279488, + "step": 2950 + }, + { + "epoch": 0.18916842711734205, + "grad_norm": 26.437389373779297, + "learning_rate": 1.952073358581369e-06, + "loss": 0.5122, + "num_input_tokens_seen": 9294336, + "step": 2955 + }, + { + "epoch": 0.18948850905831893, + "grad_norm": 34.934356689453125, + "learning_rate": 1.95173099512984e-06, + "loss": 0.5552, + "num_input_tokens_seen": 9309376, + "step": 2960 + }, + { + "epoch": 0.1898085909992958, + "grad_norm": 22.976945877075195, + "learning_rate": 1.9513874434309894e-06, + "loss": 0.4579, + "num_input_tokens_seen": 9324224, + "step": 2965 + }, + { + "epoch": 0.19012867294027272, + "grad_norm": 27.009410858154297, + "learning_rate": 1.951042703913745e-06, + "loss": 0.4466, + "num_input_tokens_seen": 9339136, + "step": 2970 + }, + { + "epoch": 0.1904487548812496, + "grad_norm": 26.152063369750977, + "learning_rate": 1.950696777008518e-06, + "loss": 0.4491, + "num_input_tokens_seen": 9354688, + "step": 2975 + }, + { + "epoch": 0.19076883682222648, + "grad_norm": 23.096553802490234, + "learning_rate": 1.9503496631472025e-06, + "loss": 0.4917, + "num_input_tokens_seen": 9369664, + "step": 2980 + }, + { + "epoch": 0.19108891876320339, + "grad_norm": 42.896331787109375, + "learning_rate": 1.9500013627631746e-06, + "loss": 0.6324, + "num_input_tokens_seen": 9384768, + "step": 2985 + }, + { + "epoch": 0.19140900070418027, + "grad_norm": 34.10990524291992, + "learning_rate": 1.949651876291291e-06, + "loss": 0.3728, + "num_input_tokens_seen": 9400320, + "step": 2990 + }, + { + "epoch": 0.19172908264515717, + "grad_norm": 56.81764221191406, + "learning_rate": 1.9493012041678894e-06, + "loss": 0.4739, + "num_input_tokens_seen": 9415872, + "step": 2995 + }, + { + "epoch": 0.19204916458613405, + "grad_norm": 31.37006187438965, + "learning_rate": 1.9489493468307883e-06, + "loss": 0.6013, + "num_input_tokens_seen": 9432704, + "step": 3000 + }, + { + "epoch": 0.19236924652711093, + "grad_norm": 52.02330017089844, + "learning_rate": 1.948596304719286e-06, + "loss": 0.5159, + "num_input_tokens_seen": 9448192, + "step": 3005 + }, + { + "epoch": 0.19268932846808784, + "grad_norm": 44.85215759277344, + "learning_rate": 1.9482420782741594e-06, + "loss": 0.4322, + "num_input_tokens_seen": 9464576, + "step": 3010 + }, + { + "epoch": 0.19300941040906472, + "grad_norm": 30.883983612060547, + "learning_rate": 1.9478866679376647e-06, + "loss": 0.5546, + "num_input_tokens_seen": 9479936, + "step": 3015 + }, + { + "epoch": 0.1933294923500416, + "grad_norm": 29.6319637298584, + "learning_rate": 1.9475300741535353e-06, + "loss": 0.5447, + "num_input_tokens_seen": 9497280, + "step": 3020 + }, + { + "epoch": 0.1936495742910185, + "grad_norm": 36.820396423339844, + "learning_rate": 1.9471722973669833e-06, + "loss": 0.4568, + "num_input_tokens_seen": 9514496, + "step": 3025 + }, + { + "epoch": 0.19396965623199539, + "grad_norm": 23.96208953857422, + "learning_rate": 1.946813338024697e-06, + "loss": 0.3932, + "num_input_tokens_seen": 9529536, + "step": 3030 + }, + { + "epoch": 0.1942897381729723, + "grad_norm": 55.99610137939453, + "learning_rate": 1.9464531965748414e-06, + "loss": 0.526, + "num_input_tokens_seen": 9545472, + "step": 3035 + }, + { + "epoch": 0.19460982011394917, + "grad_norm": 39.6732292175293, + "learning_rate": 1.9460918734670573e-06, + "loss": 0.585, + "num_input_tokens_seen": 9560960, + "step": 3040 + }, + { + "epoch": 0.19492990205492605, + "grad_norm": 29.82390022277832, + "learning_rate": 1.945729369152461e-06, + "loss": 0.5221, + "num_input_tokens_seen": 9576320, + "step": 3045 + }, + { + "epoch": 0.19524998399590296, + "grad_norm": 44.162254333496094, + "learning_rate": 1.945365684083643e-06, + "loss": 0.5632, + "num_input_tokens_seen": 9592192, + "step": 3050 + }, + { + "epoch": 0.19557006593687984, + "grad_norm": 52.55691146850586, + "learning_rate": 1.945000818714668e-06, + "loss": 0.6164, + "num_input_tokens_seen": 9608128, + "step": 3055 + }, + { + "epoch": 0.19589014787785672, + "grad_norm": 27.91643714904785, + "learning_rate": 1.944634773501076e-06, + "loss": 0.5338, + "num_input_tokens_seen": 9623872, + "step": 3060 + }, + { + "epoch": 0.19621022981883363, + "grad_norm": 51.04069900512695, + "learning_rate": 1.9442675488998783e-06, + "loss": 0.5496, + "num_input_tokens_seen": 9639488, + "step": 3065 + }, + { + "epoch": 0.1965303117598105, + "grad_norm": 28.205469131469727, + "learning_rate": 1.9438991453695587e-06, + "loss": 0.4913, + "num_input_tokens_seen": 9655680, + "step": 3070 + }, + { + "epoch": 0.1968503937007874, + "grad_norm": 36.26915740966797, + "learning_rate": 1.943529563370073e-06, + "loss": 0.5489, + "num_input_tokens_seen": 9670400, + "step": 3075 + }, + { + "epoch": 0.1971704756417643, + "grad_norm": 21.7237606048584, + "learning_rate": 1.9431588033628495e-06, + "loss": 0.3868, + "num_input_tokens_seen": 9685504, + "step": 3080 + }, + { + "epoch": 0.19749055758274117, + "grad_norm": 44.26191329956055, + "learning_rate": 1.9427868658107862e-06, + "loss": 0.635, + "num_input_tokens_seen": 9701952, + "step": 3085 + }, + { + "epoch": 0.19781063952371808, + "grad_norm": 22.945430755615234, + "learning_rate": 1.942413751178251e-06, + "loss": 0.4485, + "num_input_tokens_seen": 9716928, + "step": 3090 + }, + { + "epoch": 0.19813072146469496, + "grad_norm": 55.33934783935547, + "learning_rate": 1.9420394599310826e-06, + "loss": 0.6516, + "num_input_tokens_seen": 9732096, + "step": 3095 + }, + { + "epoch": 0.19845080340567184, + "grad_norm": 27.51698112487793, + "learning_rate": 1.941663992536588e-06, + "loss": 0.5307, + "num_input_tokens_seen": 9747648, + "step": 3100 + }, + { + "epoch": 0.19877088534664875, + "grad_norm": 14.455513954162598, + "learning_rate": 1.941287349463542e-06, + "loss": 0.4371, + "num_input_tokens_seen": 9763072, + "step": 3105 + }, + { + "epoch": 0.19909096728762563, + "grad_norm": 28.985132217407227, + "learning_rate": 1.940909531182188e-06, + "loss": 0.4726, + "num_input_tokens_seen": 9778176, + "step": 3110 + }, + { + "epoch": 0.19941104922860253, + "grad_norm": 45.77129364013672, + "learning_rate": 1.9405305381642375e-06, + "loss": 0.6129, + "num_input_tokens_seen": 9793536, + "step": 3115 + }, + { + "epoch": 0.1997311311695794, + "grad_norm": 24.09324836730957, + "learning_rate": 1.9401503708828665e-06, + "loss": 0.4986, + "num_input_tokens_seen": 9808192, + "step": 3120 + }, + { + "epoch": 0.2000512131105563, + "grad_norm": 32.09850311279297, + "learning_rate": 1.939769029812719e-06, + "loss": 0.5774, + "num_input_tokens_seen": 9823232, + "step": 3125 + }, + { + "epoch": 0.20024326227514244, + "eval_loss": 0.48840755224227905, + "eval_runtime": 49.2154, + "eval_samples_per_second": 282.148, + "eval_steps_per_second": 35.274, + "num_input_tokens_seen": 9832064, + "step": 3128 + }, + { + "epoch": 0.2003712950515332, + "grad_norm": 42.523658752441406, + "learning_rate": 1.939386515429904e-06, + "loss": 0.5893, + "num_input_tokens_seen": 9839488, + "step": 3130 + }, + { + "epoch": 0.20069137699251008, + "grad_norm": 20.936914443969727, + "learning_rate": 1.9390028282119942e-06, + "loss": 0.421, + "num_input_tokens_seen": 9856192, + "step": 3135 + }, + { + "epoch": 0.201011458933487, + "grad_norm": 37.659271240234375, + "learning_rate": 1.938617968638029e-06, + "loss": 0.5122, + "num_input_tokens_seen": 9871552, + "step": 3140 + }, + { + "epoch": 0.20133154087446387, + "grad_norm": 40.56658172607422, + "learning_rate": 1.938231937188509e-06, + "loss": 0.5077, + "num_input_tokens_seen": 9886016, + "step": 3145 + }, + { + "epoch": 0.20165162281544075, + "grad_norm": 43.369693756103516, + "learning_rate": 1.9378447343453995e-06, + "loss": 0.6156, + "num_input_tokens_seen": 9903552, + "step": 3150 + }, + { + "epoch": 0.20197170475641765, + "grad_norm": 43.882118225097656, + "learning_rate": 1.9374563605921275e-06, + "loss": 0.3458, + "num_input_tokens_seen": 9920320, + "step": 3155 + }, + { + "epoch": 0.20229178669739453, + "grad_norm": 30.69708251953125, + "learning_rate": 1.937066816413582e-06, + "loss": 0.5926, + "num_input_tokens_seen": 9935936, + "step": 3160 + }, + { + "epoch": 0.2026118686383714, + "grad_norm": 30.447908401489258, + "learning_rate": 1.9366761022961146e-06, + "loss": 0.4757, + "num_input_tokens_seen": 9950912, + "step": 3165 + }, + { + "epoch": 0.20293195057934832, + "grad_norm": 40.40016174316406, + "learning_rate": 1.9362842187275354e-06, + "loss": 0.5615, + "num_input_tokens_seen": 9966080, + "step": 3170 + }, + { + "epoch": 0.2032520325203252, + "grad_norm": 29.704164505004883, + "learning_rate": 1.9358911661971155e-06, + "loss": 0.4789, + "num_input_tokens_seen": 9982080, + "step": 3175 + }, + { + "epoch": 0.2035721144613021, + "grad_norm": 28.506755828857422, + "learning_rate": 1.9354969451955864e-06, + "loss": 0.4647, + "num_input_tokens_seen": 9996544, + "step": 3180 + }, + { + "epoch": 0.20389219640227899, + "grad_norm": 27.22804832458496, + "learning_rate": 1.9351015562151375e-06, + "loss": 0.5497, + "num_input_tokens_seen": 10011776, + "step": 3185 + }, + { + "epoch": 0.20421227834325587, + "grad_norm": 25.4746150970459, + "learning_rate": 1.934704999749416e-06, + "loss": 0.4331, + "num_input_tokens_seen": 10027264, + "step": 3190 + }, + { + "epoch": 0.20453236028423277, + "grad_norm": 23.414485931396484, + "learning_rate": 1.9343072762935274e-06, + "loss": 0.4203, + "num_input_tokens_seen": 10042432, + "step": 3195 + }, + { + "epoch": 0.20485244222520965, + "grad_norm": 28.72736167907715, + "learning_rate": 1.933908386344035e-06, + "loss": 0.4135, + "num_input_tokens_seen": 10057792, + "step": 3200 + }, + { + "epoch": 0.20517252416618653, + "grad_norm": 33.854576110839844, + "learning_rate": 1.9335083303989565e-06, + "loss": 0.5222, + "num_input_tokens_seen": 10074752, + "step": 3205 + }, + { + "epoch": 0.20549260610716344, + "grad_norm": 37.276336669921875, + "learning_rate": 1.9331071089577674e-06, + "loss": 0.576, + "num_input_tokens_seen": 10090752, + "step": 3210 + }, + { + "epoch": 0.20581268804814032, + "grad_norm": 51.40751647949219, + "learning_rate": 1.9327047225213963e-06, + "loss": 0.4961, + "num_input_tokens_seen": 10106240, + "step": 3215 + }, + { + "epoch": 0.20613276998911723, + "grad_norm": 35.04685974121094, + "learning_rate": 1.9323011715922283e-06, + "loss": 0.4128, + "num_input_tokens_seen": 10121856, + "step": 3220 + }, + { + "epoch": 0.2064528519300941, + "grad_norm": 67.41058349609375, + "learning_rate": 1.931896456674101e-06, + "loss": 0.4764, + "num_input_tokens_seen": 10137408, + "step": 3225 + }, + { + "epoch": 0.20677293387107099, + "grad_norm": 32.64918899536133, + "learning_rate": 1.931490578272306e-06, + "loss": 0.4548, + "num_input_tokens_seen": 10152640, + "step": 3230 + }, + { + "epoch": 0.2070930158120479, + "grad_norm": 33.72087097167969, + "learning_rate": 1.9310835368935867e-06, + "loss": 0.3538, + "num_input_tokens_seen": 10167936, + "step": 3235 + }, + { + "epoch": 0.20741309775302477, + "grad_norm": 36.13018035888672, + "learning_rate": 1.93067533304614e-06, + "loss": 0.4205, + "num_input_tokens_seen": 10183360, + "step": 3240 + }, + { + "epoch": 0.20773317969400165, + "grad_norm": 29.964752197265625, + "learning_rate": 1.9302659672396128e-06, + "loss": 0.5557, + "num_input_tokens_seen": 10198208, + "step": 3245 + }, + { + "epoch": 0.20805326163497856, + "grad_norm": 27.227624893188477, + "learning_rate": 1.9298554399851025e-06, + "loss": 0.4903, + "num_input_tokens_seen": 10213568, + "step": 3250 + }, + { + "epoch": 0.20837334357595544, + "grad_norm": 37.30453109741211, + "learning_rate": 1.929443751795158e-06, + "loss": 0.4833, + "num_input_tokens_seen": 10230080, + "step": 3255 + }, + { + "epoch": 0.20869342551693235, + "grad_norm": 23.320819854736328, + "learning_rate": 1.929030903183776e-06, + "loss": 0.4759, + "num_input_tokens_seen": 10246912, + "step": 3260 + }, + { + "epoch": 0.20901350745790923, + "grad_norm": 42.66804885864258, + "learning_rate": 1.9286168946664033e-06, + "loss": 0.5368, + "num_input_tokens_seen": 10262464, + "step": 3265 + }, + { + "epoch": 0.2093335893988861, + "grad_norm": 60.002376556396484, + "learning_rate": 1.9282017267599352e-06, + "loss": 0.6679, + "num_input_tokens_seen": 10278016, + "step": 3270 + }, + { + "epoch": 0.209653671339863, + "grad_norm": 42.901100158691406, + "learning_rate": 1.9277853999827125e-06, + "loss": 0.5054, + "num_input_tokens_seen": 10293824, + "step": 3275 + }, + { + "epoch": 0.2099737532808399, + "grad_norm": 44.74653244018555, + "learning_rate": 1.9273679148545244e-06, + "loss": 0.5116, + "num_input_tokens_seen": 10309568, + "step": 3280 + }, + { + "epoch": 0.21029383522181677, + "grad_norm": 33.75946044921875, + "learning_rate": 1.9269492718966062e-06, + "loss": 0.4229, + "num_input_tokens_seen": 10325696, + "step": 3285 + }, + { + "epoch": 0.21061391716279368, + "grad_norm": 30.77555274963379, + "learning_rate": 1.9265294716316384e-06, + "loss": 0.5261, + "num_input_tokens_seen": 10342016, + "step": 3290 + }, + { + "epoch": 0.21093399910377056, + "grad_norm": 29.430330276489258, + "learning_rate": 1.926108514583747e-06, + "loss": 0.4688, + "num_input_tokens_seen": 10357632, + "step": 3295 + }, + { + "epoch": 0.21125408104474747, + "grad_norm": 50.258575439453125, + "learning_rate": 1.925686401278501e-06, + "loss": 0.4801, + "num_input_tokens_seen": 10373056, + "step": 3300 + }, + { + "epoch": 0.21157416298572435, + "grad_norm": 61.20192337036133, + "learning_rate": 1.9252631322429143e-06, + "loss": 0.6373, + "num_input_tokens_seen": 10389248, + "step": 3305 + }, + { + "epoch": 0.21189424492670123, + "grad_norm": 23.071653366088867, + "learning_rate": 1.9248387080054435e-06, + "loss": 0.439, + "num_input_tokens_seen": 10404864, + "step": 3310 + }, + { + "epoch": 0.21221432686767813, + "grad_norm": 18.74202537536621, + "learning_rate": 1.9244131290959864e-06, + "loss": 0.4878, + "num_input_tokens_seen": 10420416, + "step": 3315 + }, + { + "epoch": 0.212534408808655, + "grad_norm": 33.07780075073242, + "learning_rate": 1.9239863960458845e-06, + "loss": 0.4244, + "num_input_tokens_seen": 10435456, + "step": 3320 + }, + { + "epoch": 0.21285449074963192, + "grad_norm": 31.487497329711914, + "learning_rate": 1.923558509387918e-06, + "loss": 0.4881, + "num_input_tokens_seen": 10451584, + "step": 3325 + }, + { + "epoch": 0.2131745726906088, + "grad_norm": 37.91923904418945, + "learning_rate": 1.9231294696563086e-06, + "loss": 0.3745, + "num_input_tokens_seen": 10467584, + "step": 3330 + }, + { + "epoch": 0.21349465463158568, + "grad_norm": 34.82919692993164, + "learning_rate": 1.922699277386718e-06, + "loss": 0.4146, + "num_input_tokens_seen": 10483264, + "step": 3335 + }, + { + "epoch": 0.21381473657256259, + "grad_norm": 34.857810974121094, + "learning_rate": 1.9222679331162454e-06, + "loss": 0.5865, + "num_input_tokens_seen": 10498560, + "step": 3340 + }, + { + "epoch": 0.21413481851353947, + "grad_norm": 37.536800384521484, + "learning_rate": 1.92183543738343e-06, + "loss": 0.4515, + "num_input_tokens_seen": 10514176, + "step": 3345 + }, + { + "epoch": 0.21445490045451635, + "grad_norm": 25.721649169921875, + "learning_rate": 1.9214017907282475e-06, + "loss": 0.4363, + "num_input_tokens_seen": 10529792, + "step": 3350 + }, + { + "epoch": 0.21477498239549325, + "grad_norm": 37.20597457885742, + "learning_rate": 1.9209669936921105e-06, + "loss": 0.4809, + "num_input_tokens_seen": 10545856, + "step": 3355 + }, + { + "epoch": 0.21509506433647013, + "grad_norm": 43.397335052490234, + "learning_rate": 1.920531046817869e-06, + "loss": 0.4092, + "num_input_tokens_seen": 10562368, + "step": 3360 + }, + { + "epoch": 0.21541514627744704, + "grad_norm": 44.76917266845703, + "learning_rate": 1.9200939506498067e-06, + "loss": 0.6238, + "num_input_tokens_seen": 10577280, + "step": 3365 + }, + { + "epoch": 0.21573522821842392, + "grad_norm": 30.43206214904785, + "learning_rate": 1.9196557057336446e-06, + "loss": 0.5817, + "num_input_tokens_seen": 10592384, + "step": 3370 + }, + { + "epoch": 0.2160553101594008, + "grad_norm": 24.50318145751953, + "learning_rate": 1.9192163126165354e-06, + "loss": 0.4498, + "num_input_tokens_seen": 10608704, + "step": 3375 + }, + { + "epoch": 0.2163753921003777, + "grad_norm": 43.877662658691406, + "learning_rate": 1.9187757718470673e-06, + "loss": 0.3997, + "num_input_tokens_seen": 10625280, + "step": 3380 + }, + { + "epoch": 0.21669547404135459, + "grad_norm": 22.60622215270996, + "learning_rate": 1.9183340839752606e-06, + "loss": 0.5339, + "num_input_tokens_seen": 10641152, + "step": 3385 + }, + { + "epoch": 0.21701555598233147, + "grad_norm": 28.090923309326172, + "learning_rate": 1.9178912495525672e-06, + "loss": 0.4193, + "num_input_tokens_seen": 10657472, + "step": 3390 + }, + { + "epoch": 0.21733563792330837, + "grad_norm": 24.062137603759766, + "learning_rate": 1.917447269131872e-06, + "loss": 0.5054, + "num_input_tokens_seen": 10673600, + "step": 3395 + }, + { + "epoch": 0.21765571986428525, + "grad_norm": 35.8740119934082, + "learning_rate": 1.917002143267489e-06, + "loss": 0.5693, + "num_input_tokens_seen": 10689344, + "step": 3400 + }, + { + "epoch": 0.21797580180526216, + "grad_norm": 29.342618942260742, + "learning_rate": 1.9165558725151633e-06, + "loss": 0.4478, + "num_input_tokens_seen": 10704384, + "step": 3405 + }, + { + "epoch": 0.21829588374623904, + "grad_norm": 56.710784912109375, + "learning_rate": 1.9161084574320692e-06, + "loss": 0.5002, + "num_input_tokens_seen": 10720512, + "step": 3410 + }, + { + "epoch": 0.21861596568721592, + "grad_norm": 31.751296997070312, + "learning_rate": 1.91565989857681e-06, + "loss": 0.4727, + "num_input_tokens_seen": 10735744, + "step": 3415 + }, + { + "epoch": 0.21893604762819283, + "grad_norm": 31.050350189208984, + "learning_rate": 1.9152101965094162e-06, + "loss": 0.4573, + "num_input_tokens_seen": 10750848, + "step": 3420 + }, + { + "epoch": 0.2192561295691697, + "grad_norm": 42.99034881591797, + "learning_rate": 1.9147593517913464e-06, + "loss": 0.4878, + "num_input_tokens_seen": 10765632, + "step": 3425 + }, + { + "epoch": 0.21957621151014659, + "grad_norm": 17.069164276123047, + "learning_rate": 1.914307364985485e-06, + "loss": 0.3856, + "num_input_tokens_seen": 10780928, + "step": 3430 + }, + { + "epoch": 0.2198962934511235, + "grad_norm": 24.95672607421875, + "learning_rate": 1.913854236656144e-06, + "loss": 0.4217, + "num_input_tokens_seen": 10796864, + "step": 3435 + }, + { + "epoch": 0.22021637539210037, + "grad_norm": 39.41409683227539, + "learning_rate": 1.9133999673690584e-06, + "loss": 0.4653, + "num_input_tokens_seen": 10812672, + "step": 3440 + }, + { + "epoch": 0.22053645733307728, + "grad_norm": 44.56681442260742, + "learning_rate": 1.9129445576913886e-06, + "loss": 0.4709, + "num_input_tokens_seen": 10828544, + "step": 3445 + }, + { + "epoch": 0.22085653927405416, + "grad_norm": 23.38069725036621, + "learning_rate": 1.91248800819172e-06, + "loss": 0.5335, + "num_input_tokens_seen": 10844288, + "step": 3450 + }, + { + "epoch": 0.22117662121503104, + "grad_norm": 48.04047775268555, + "learning_rate": 1.912030319440059e-06, + "loss": 0.5192, + "num_input_tokens_seen": 10860160, + "step": 3455 + }, + { + "epoch": 0.22149670315600795, + "grad_norm": 36.49208068847656, + "learning_rate": 1.9115714920078354e-06, + "loss": 0.6043, + "num_input_tokens_seen": 10875968, + "step": 3460 + }, + { + "epoch": 0.22181678509698483, + "grad_norm": 25.53341293334961, + "learning_rate": 1.9111115264679017e-06, + "loss": 0.3252, + "num_input_tokens_seen": 10892096, + "step": 3465 + }, + { + "epoch": 0.2221368670379617, + "grad_norm": 45.4945068359375, + "learning_rate": 1.910650423394529e-06, + "loss": 0.4378, + "num_input_tokens_seen": 10908544, + "step": 3470 + }, + { + "epoch": 0.2224569489789386, + "grad_norm": 45.49387741088867, + "learning_rate": 1.910188183363411e-06, + "loss": 0.4817, + "num_input_tokens_seen": 10924544, + "step": 3475 + }, + { + "epoch": 0.2227770309199155, + "grad_norm": 50.44002151489258, + "learning_rate": 1.909724806951659e-06, + "loss": 0.4441, + "num_input_tokens_seen": 10941888, + "step": 3480 + }, + { + "epoch": 0.2230971128608924, + "grad_norm": 50.978633880615234, + "learning_rate": 1.909260294737804e-06, + "loss": 0.4669, + "num_input_tokens_seen": 10958592, + "step": 3485 + }, + { + "epoch": 0.22341719480186928, + "grad_norm": 80.07136535644531, + "learning_rate": 1.9087946473017953e-06, + "loss": 0.555, + "num_input_tokens_seen": 10974208, + "step": 3490 + }, + { + "epoch": 0.22373727674284616, + "grad_norm": 33.776737213134766, + "learning_rate": 1.9083278652249992e-06, + "loss": 0.4304, + "num_input_tokens_seen": 10988928, + "step": 3495 + }, + { + "epoch": 0.22405735868382307, + "grad_norm": 35.86427307128906, + "learning_rate": 1.9078599490901983e-06, + "loss": 0.425, + "num_input_tokens_seen": 11005952, + "step": 3500 + }, + { + "epoch": 0.22437744062479995, + "grad_norm": 51.98170852661133, + "learning_rate": 1.9073908994815914e-06, + "loss": 0.3971, + "num_input_tokens_seen": 11020608, + "step": 3505 + }, + { + "epoch": 0.22469752256577685, + "grad_norm": 46.34355926513672, + "learning_rate": 1.9069207169847928e-06, + "loss": 0.4862, + "num_input_tokens_seen": 11036736, + "step": 3510 + }, + { + "epoch": 0.22501760450675373, + "grad_norm": 33.5971794128418, + "learning_rate": 1.9064494021868302e-06, + "loss": 0.3584, + "num_input_tokens_seen": 11052480, + "step": 3515 + }, + { + "epoch": 0.2253376864477306, + "grad_norm": 39.64836120605469, + "learning_rate": 1.9059769556761464e-06, + "loss": 0.48, + "num_input_tokens_seen": 11068416, + "step": 3520 + }, + { + "epoch": 0.22565776838870752, + "grad_norm": 31.865467071533203, + "learning_rate": 1.9055033780425962e-06, + "loss": 0.4454, + "num_input_tokens_seen": 11086400, + "step": 3525 + }, + { + "epoch": 0.2259778503296844, + "grad_norm": 88.44284057617188, + "learning_rate": 1.9050286698774464e-06, + "loss": 0.562, + "num_input_tokens_seen": 11102848, + "step": 3530 + }, + { + "epoch": 0.22629793227066128, + "grad_norm": 41.320526123046875, + "learning_rate": 1.904552831773376e-06, + "loss": 0.5359, + "num_input_tokens_seen": 11118080, + "step": 3535 + }, + { + "epoch": 0.22661801421163819, + "grad_norm": 24.0659236907959, + "learning_rate": 1.9040758643244748e-06, + "loss": 0.4967, + "num_input_tokens_seen": 11133120, + "step": 3540 + }, + { + "epoch": 0.22693809615261507, + "grad_norm": 31.473848342895508, + "learning_rate": 1.903597768126242e-06, + "loss": 0.4694, + "num_input_tokens_seen": 11150144, + "step": 3545 + }, + { + "epoch": 0.22725817809359197, + "grad_norm": 58.51475143432617, + "learning_rate": 1.9031185437755862e-06, + "loss": 0.4787, + "num_input_tokens_seen": 11165760, + "step": 3550 + }, + { + "epoch": 0.22757826003456885, + "grad_norm": 52.226993560791016, + "learning_rate": 1.9026381918708246e-06, + "loss": 0.4582, + "num_input_tokens_seen": 11180096, + "step": 3555 + }, + { + "epoch": 0.22789834197554573, + "grad_norm": 19.623682022094727, + "learning_rate": 1.9021567130116822e-06, + "loss": 0.3618, + "num_input_tokens_seen": 11195584, + "step": 3560 + }, + { + "epoch": 0.22821842391652264, + "grad_norm": 59.500858306884766, + "learning_rate": 1.9016741077992916e-06, + "loss": 0.3909, + "num_input_tokens_seen": 11210944, + "step": 3565 + }, + { + "epoch": 0.22853850585749952, + "grad_norm": 27.949474334716797, + "learning_rate": 1.90119037683619e-06, + "loss": 0.4052, + "num_input_tokens_seen": 11227392, + "step": 3570 + }, + { + "epoch": 0.2288585877984764, + "grad_norm": 26.94727325439453, + "learning_rate": 1.9007055207263223e-06, + "loss": 0.6492, + "num_input_tokens_seen": 11244416, + "step": 3575 + }, + { + "epoch": 0.2291786697394533, + "grad_norm": 28.7558536529541, + "learning_rate": 1.900219540075036e-06, + "loss": 0.3588, + "num_input_tokens_seen": 11260672, + "step": 3580 + }, + { + "epoch": 0.22949875168043019, + "grad_norm": 55.45866775512695, + "learning_rate": 1.8997324354890845e-06, + "loss": 0.4749, + "num_input_tokens_seen": 11277504, + "step": 3585 + }, + { + "epoch": 0.2298188336214071, + "grad_norm": 110.20696258544922, + "learning_rate": 1.8992442075766233e-06, + "loss": 0.539, + "num_input_tokens_seen": 11293184, + "step": 3590 + }, + { + "epoch": 0.23013891556238397, + "grad_norm": 29.66388702392578, + "learning_rate": 1.8987548569472105e-06, + "loss": 0.3191, + "num_input_tokens_seen": 11308480, + "step": 3595 + }, + { + "epoch": 0.23045899750336085, + "grad_norm": 31.010486602783203, + "learning_rate": 1.8982643842118064e-06, + "loss": 0.396, + "num_input_tokens_seen": 11323840, + "step": 3600 + }, + { + "epoch": 0.23077907944433776, + "grad_norm": 63.96700668334961, + "learning_rate": 1.8977727899827716e-06, + "loss": 0.5821, + "num_input_tokens_seen": 11339456, + "step": 3605 + }, + { + "epoch": 0.23109916138531464, + "grad_norm": 50.296600341796875, + "learning_rate": 1.8972800748738678e-06, + "loss": 0.6554, + "num_input_tokens_seen": 11354880, + "step": 3610 + }, + { + "epoch": 0.23141924332629152, + "grad_norm": 27.36386489868164, + "learning_rate": 1.896786239500255e-06, + "loss": 0.5226, + "num_input_tokens_seen": 11369984, + "step": 3615 + }, + { + "epoch": 0.23173932526726843, + "grad_norm": 51.205718994140625, + "learning_rate": 1.8962912844784928e-06, + "loss": 0.429, + "num_input_tokens_seen": 11384640, + "step": 3620 + }, + { + "epoch": 0.2320594072082453, + "grad_norm": 53.744346618652344, + "learning_rate": 1.8957952104265384e-06, + "loss": 0.4945, + "num_input_tokens_seen": 11401152, + "step": 3625 + }, + { + "epoch": 0.2323794891492222, + "grad_norm": 32.322486877441406, + "learning_rate": 1.8952980179637458e-06, + "loss": 0.4535, + "num_input_tokens_seen": 11416896, + "step": 3630 + }, + { + "epoch": 0.2326995710901991, + "grad_norm": 34.96129608154297, + "learning_rate": 1.8947997077108662e-06, + "loss": 0.4899, + "num_input_tokens_seen": 11432832, + "step": 3635 + }, + { + "epoch": 0.23301965303117597, + "grad_norm": 30.439565658569336, + "learning_rate": 1.894300280290045e-06, + "loss": 0.4807, + "num_input_tokens_seen": 11448320, + "step": 3640 + }, + { + "epoch": 0.23333973497215288, + "grad_norm": 23.5026912689209, + "learning_rate": 1.8937997363248237e-06, + "loss": 0.5674, + "num_input_tokens_seen": 11463488, + "step": 3645 + }, + { + "epoch": 0.23365981691312976, + "grad_norm": 20.100936889648438, + "learning_rate": 1.8932980764401373e-06, + "loss": 0.4527, + "num_input_tokens_seen": 11478592, + "step": 3650 + }, + { + "epoch": 0.23397989885410664, + "grad_norm": 24.669857025146484, + "learning_rate": 1.8927953012623141e-06, + "loss": 0.3564, + "num_input_tokens_seen": 11494720, + "step": 3655 + }, + { + "epoch": 0.23429998079508355, + "grad_norm": 56.09657287597656, + "learning_rate": 1.8922914114190744e-06, + "loss": 0.4846, + "num_input_tokens_seen": 11511232, + "step": 3660 + }, + { + "epoch": 0.23462006273606043, + "grad_norm": 31.37401008605957, + "learning_rate": 1.8917864075395312e-06, + "loss": 0.5093, + "num_input_tokens_seen": 11527040, + "step": 3665 + }, + { + "epoch": 0.23494014467703733, + "grad_norm": 18.777942657470703, + "learning_rate": 1.8912802902541873e-06, + "loss": 0.4461, + "num_input_tokens_seen": 11542528, + "step": 3670 + }, + { + "epoch": 0.2352602266180142, + "grad_norm": 37.04750442504883, + "learning_rate": 1.8907730601949362e-06, + "loss": 0.4974, + "num_input_tokens_seen": 11557696, + "step": 3675 + }, + { + "epoch": 0.2355803085589911, + "grad_norm": 50.14651870727539, + "learning_rate": 1.8902647179950608e-06, + "loss": 0.4648, + "num_input_tokens_seen": 11574848, + "step": 3680 + }, + { + "epoch": 0.235900390499968, + "grad_norm": 52.763484954833984, + "learning_rate": 1.889755264289232e-06, + "loss": 0.5108, + "num_input_tokens_seen": 11589696, + "step": 3685 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 20.895673751831055, + "learning_rate": 1.8892446997135087e-06, + "loss": 0.384, + "num_input_tokens_seen": 11606848, + "step": 3690 + }, + { + "epoch": 0.23654055438192176, + "grad_norm": 31.011825561523438, + "learning_rate": 1.888733024905337e-06, + "loss": 0.6707, + "num_input_tokens_seen": 11623744, + "step": 3695 + }, + { + "epoch": 0.23686063632289867, + "grad_norm": 34.26097106933594, + "learning_rate": 1.888220240503549e-06, + "loss": 0.4755, + "num_input_tokens_seen": 11640256, + "step": 3700 + }, + { + "epoch": 0.23718071826387555, + "grad_norm": 32.54058837890625, + "learning_rate": 1.8877063471483618e-06, + "loss": 0.412, + "num_input_tokens_seen": 11655744, + "step": 3705 + }, + { + "epoch": 0.23750080020485245, + "grad_norm": 17.025754928588867, + "learning_rate": 1.8871913454813772e-06, + "loss": 0.2935, + "num_input_tokens_seen": 11671104, + "step": 3710 + }, + { + "epoch": 0.23782088214582933, + "grad_norm": 29.473085403442383, + "learning_rate": 1.886675236145581e-06, + "loss": 0.3898, + "num_input_tokens_seen": 11686848, + "step": 3715 + }, + { + "epoch": 0.2381409640868062, + "grad_norm": 28.30191421508789, + "learning_rate": 1.8861580197853422e-06, + "loss": 0.5018, + "num_input_tokens_seen": 11701952, + "step": 3720 + }, + { + "epoch": 0.23846104602778312, + "grad_norm": 41.48347473144531, + "learning_rate": 1.8856396970464105e-06, + "loss": 0.4647, + "num_input_tokens_seen": 11718592, + "step": 3725 + }, + { + "epoch": 0.23878112796876, + "grad_norm": 40.44169235229492, + "learning_rate": 1.8851202685759189e-06, + "loss": 0.5143, + "num_input_tokens_seen": 11734208, + "step": 3730 + }, + { + "epoch": 0.2391012099097369, + "grad_norm": 11.559971809387207, + "learning_rate": 1.8845997350223792e-06, + "loss": 0.407, + "num_input_tokens_seen": 11748992, + "step": 3735 + }, + { + "epoch": 0.23942129185071379, + "grad_norm": 28.135868072509766, + "learning_rate": 1.8840780970356842e-06, + "loss": 0.4217, + "num_input_tokens_seen": 11764608, + "step": 3740 + }, + { + "epoch": 0.23974137379169067, + "grad_norm": 29.070838928222656, + "learning_rate": 1.8835553552671048e-06, + "loss": 0.4078, + "num_input_tokens_seen": 11780800, + "step": 3745 + }, + { + "epoch": 0.24006145573266757, + "grad_norm": 30.527294158935547, + "learning_rate": 1.8830315103692902e-06, + "loss": 0.4593, + "num_input_tokens_seen": 11795776, + "step": 3750 + }, + { + "epoch": 0.24038153767364445, + "grad_norm": 34.47731399536133, + "learning_rate": 1.8825065629962669e-06, + "loss": 0.5071, + "num_input_tokens_seen": 11811776, + "step": 3755 + }, + { + "epoch": 0.24070161961462133, + "grad_norm": 32.23590087890625, + "learning_rate": 1.881980513803438e-06, + "loss": 0.4852, + "num_input_tokens_seen": 11828224, + "step": 3760 + }, + { + "epoch": 0.24102170155559824, + "grad_norm": 48.78215026855469, + "learning_rate": 1.881453363447582e-06, + "loss": 0.5035, + "num_input_tokens_seen": 11843904, + "step": 3765 + }, + { + "epoch": 0.24134178349657512, + "grad_norm": 57.377567291259766, + "learning_rate": 1.880925112586852e-06, + "loss": 0.5574, + "num_input_tokens_seen": 11859392, + "step": 3770 + }, + { + "epoch": 0.24166186543755203, + "grad_norm": 48.24585723876953, + "learning_rate": 1.8803957618807762e-06, + "loss": 0.4427, + "num_input_tokens_seen": 11875968, + "step": 3775 + }, + { + "epoch": 0.2419819473785289, + "grad_norm": 72.58015441894531, + "learning_rate": 1.8798653119902548e-06, + "loss": 0.4404, + "num_input_tokens_seen": 11891584, + "step": 3780 + }, + { + "epoch": 0.24230202931950579, + "grad_norm": 26.939559936523438, + "learning_rate": 1.8793337635775603e-06, + "loss": 0.5029, + "num_input_tokens_seen": 11906944, + "step": 3785 + }, + { + "epoch": 0.2426221112604827, + "grad_norm": 44.384925842285156, + "learning_rate": 1.8788011173063376e-06, + "loss": 0.4729, + "num_input_tokens_seen": 11922368, + "step": 3790 + }, + { + "epoch": 0.24294219320145957, + "grad_norm": 45.79201126098633, + "learning_rate": 1.8782673738416018e-06, + "loss": 0.5181, + "num_input_tokens_seen": 11938432, + "step": 3795 + }, + { + "epoch": 0.24326227514243645, + "grad_norm": 43.953582763671875, + "learning_rate": 1.877732533849737e-06, + "loss": 0.5078, + "num_input_tokens_seen": 11956608, + "step": 3800 + }, + { + "epoch": 0.24358235708341336, + "grad_norm": 25.617721557617188, + "learning_rate": 1.8771965979984988e-06, + "loss": 0.4394, + "num_input_tokens_seen": 11972480, + "step": 3805 + }, + { + "epoch": 0.24390243902439024, + "grad_norm": 21.165599822998047, + "learning_rate": 1.8766595669570084e-06, + "loss": 0.3889, + "num_input_tokens_seen": 11987072, + "step": 3810 + }, + { + "epoch": 0.24422252096536715, + "grad_norm": 32.8095703125, + "learning_rate": 1.8761214413957553e-06, + "loss": 0.4361, + "num_input_tokens_seen": 12002112, + "step": 3815 + }, + { + "epoch": 0.24454260290634403, + "grad_norm": 23.940019607543945, + "learning_rate": 1.8755822219865963e-06, + "loss": 0.3493, + "num_input_tokens_seen": 12016960, + "step": 3820 + }, + { + "epoch": 0.2448626848473209, + "grad_norm": 68.5343246459961, + "learning_rate": 1.875041909402752e-06, + "loss": 0.4331, + "num_input_tokens_seen": 12032576, + "step": 3825 + }, + { + "epoch": 0.2451827667882978, + "grad_norm": 25.4498233795166, + "learning_rate": 1.8745005043188102e-06, + "loss": 0.3638, + "num_input_tokens_seen": 12048768, + "step": 3830 + }, + { + "epoch": 0.2455028487292747, + "grad_norm": 37.17061233520508, + "learning_rate": 1.8739580074107208e-06, + "loss": 0.395, + "num_input_tokens_seen": 12065088, + "step": 3835 + }, + { + "epoch": 0.24582293067025157, + "grad_norm": 38.826255798339844, + "learning_rate": 1.873414419355798e-06, + "loss": 0.6844, + "num_input_tokens_seen": 12080704, + "step": 3840 + }, + { + "epoch": 0.24614301261122848, + "grad_norm": 40.032527923583984, + "learning_rate": 1.872869740832717e-06, + "loss": 0.4292, + "num_input_tokens_seen": 12096704, + "step": 3845 + }, + { + "epoch": 0.24646309455220536, + "grad_norm": 36.49966049194336, + "learning_rate": 1.8723239725215165e-06, + "loss": 0.6103, + "num_input_tokens_seen": 12111488, + "step": 3850 + }, + { + "epoch": 0.24678317649318227, + "grad_norm": 22.378215789794922, + "learning_rate": 1.871777115103594e-06, + "loss": 0.4206, + "num_input_tokens_seen": 12128192, + "step": 3855 + }, + { + "epoch": 0.24710325843415915, + "grad_norm": 21.57525634765625, + "learning_rate": 1.8712291692617074e-06, + "loss": 0.4786, + "num_input_tokens_seen": 12143808, + "step": 3860 + }, + { + "epoch": 0.24742334037513602, + "grad_norm": 32.303707122802734, + "learning_rate": 1.8706801356799735e-06, + "loss": 0.4804, + "num_input_tokens_seen": 12159232, + "step": 3865 + }, + { + "epoch": 0.24774342231611293, + "grad_norm": 26.57257843017578, + "learning_rate": 1.8701300150438674e-06, + "loss": 0.4465, + "num_input_tokens_seen": 12175360, + "step": 3870 + }, + { + "epoch": 0.2480635042570898, + "grad_norm": 17.5268611907959, + "learning_rate": 1.869578808040221e-06, + "loss": 0.4191, + "num_input_tokens_seen": 12190272, + "step": 3875 + }, + { + "epoch": 0.2483835861980667, + "grad_norm": 48.708431243896484, + "learning_rate": 1.869026515357223e-06, + "loss": 0.5149, + "num_input_tokens_seen": 12208448, + "step": 3880 + }, + { + "epoch": 0.2487036681390436, + "grad_norm": 38.29990005493164, + "learning_rate": 1.8684731376844169e-06, + "loss": 0.6372, + "num_input_tokens_seen": 12225984, + "step": 3885 + }, + { + "epoch": 0.24902375008002048, + "grad_norm": 33.091251373291016, + "learning_rate": 1.8679186757127014e-06, + "loss": 0.4965, + "num_input_tokens_seen": 12241408, + "step": 3890 + }, + { + "epoch": 0.24934383202099739, + "grad_norm": 30.313892364501953, + "learning_rate": 1.8673631301343288e-06, + "loss": 0.4381, + "num_input_tokens_seen": 12256064, + "step": 3895 + }, + { + "epoch": 0.24966391396197427, + "grad_norm": 26.932268142700195, + "learning_rate": 1.8668065016429044e-06, + "loss": 0.4388, + "num_input_tokens_seen": 12272832, + "step": 3900 + }, + { + "epoch": 0.24998399590295114, + "grad_norm": 22.444902420043945, + "learning_rate": 1.866248790933385e-06, + "loss": 0.5257, + "num_input_tokens_seen": 12289024, + "step": 3905 + }, + { + "epoch": 0.25030407784392805, + "grad_norm": 27.67203140258789, + "learning_rate": 1.8656899987020795e-06, + "loss": 0.4226, + "num_input_tokens_seen": 12304064, + "step": 3910 + }, + { + "epoch": 0.25030407784392805, + "eval_loss": 0.4644124507904053, + "eval_runtime": 49.2047, + "eval_samples_per_second": 282.209, + "eval_steps_per_second": 35.281, + "num_input_tokens_seen": 12304064, + "step": 3910 + }, + { + "epoch": 0.25062415978490493, + "grad_norm": 31.06105613708496, + "learning_rate": 1.865130125646646e-06, + "loss": 0.4605, + "num_input_tokens_seen": 12320256, + "step": 3915 + }, + { + "epoch": 0.2509442417258818, + "grad_norm": 21.309823989868164, + "learning_rate": 1.8645691724660933e-06, + "loss": 0.4394, + "num_input_tokens_seen": 12335360, + "step": 3920 + }, + { + "epoch": 0.2512643236668587, + "grad_norm": 24.060503005981445, + "learning_rate": 1.8640071398607774e-06, + "loss": 0.4616, + "num_input_tokens_seen": 12351488, + "step": 3925 + }, + { + "epoch": 0.2515844056078356, + "grad_norm": 58.631771087646484, + "learning_rate": 1.8634440285324024e-06, + "loss": 0.6203, + "num_input_tokens_seen": 12365952, + "step": 3930 + }, + { + "epoch": 0.2519044875488125, + "grad_norm": 54.601966857910156, + "learning_rate": 1.8628798391840205e-06, + "loss": 0.469, + "num_input_tokens_seen": 12381376, + "step": 3935 + }, + { + "epoch": 0.2522245694897894, + "grad_norm": 57.81584548950195, + "learning_rate": 1.8623145725200277e-06, + "loss": 0.4588, + "num_input_tokens_seen": 12396160, + "step": 3940 + }, + { + "epoch": 0.25254465143076626, + "grad_norm": 27.153488159179688, + "learning_rate": 1.8617482292461664e-06, + "loss": 0.4468, + "num_input_tokens_seen": 12410944, + "step": 3945 + }, + { + "epoch": 0.25286473337174314, + "grad_norm": 25.399364471435547, + "learning_rate": 1.861180810069523e-06, + "loss": 0.4172, + "num_input_tokens_seen": 12426304, + "step": 3950 + }, + { + "epoch": 0.2531848153127201, + "grad_norm": 41.58170700073242, + "learning_rate": 1.8606123156985268e-06, + "loss": 0.4599, + "num_input_tokens_seen": 12442432, + "step": 3955 + }, + { + "epoch": 0.25350489725369696, + "grad_norm": 19.8244686126709, + "learning_rate": 1.8600427468429496e-06, + "loss": 0.4617, + "num_input_tokens_seen": 12458368, + "step": 3960 + }, + { + "epoch": 0.25382497919467384, + "grad_norm": 30.747608184814453, + "learning_rate": 1.8594721042139052e-06, + "loss": 0.4302, + "num_input_tokens_seen": 12474368, + "step": 3965 + }, + { + "epoch": 0.2541450611356507, + "grad_norm": 18.357315063476562, + "learning_rate": 1.858900388523847e-06, + "loss": 0.4147, + "num_input_tokens_seen": 12490176, + "step": 3970 + }, + { + "epoch": 0.2544651430766276, + "grad_norm": 25.5488224029541, + "learning_rate": 1.8583276004865694e-06, + "loss": 0.4639, + "num_input_tokens_seen": 12507840, + "step": 3975 + }, + { + "epoch": 0.25478522501760453, + "grad_norm": 38.78436279296875, + "learning_rate": 1.8577537408172046e-06, + "loss": 0.3452, + "num_input_tokens_seen": 12523520, + "step": 3980 + }, + { + "epoch": 0.2551053069585814, + "grad_norm": 32.23760986328125, + "learning_rate": 1.8571788102322234e-06, + "loss": 0.5365, + "num_input_tokens_seen": 12540736, + "step": 3985 + }, + { + "epoch": 0.2554253888995583, + "grad_norm": 34.73612976074219, + "learning_rate": 1.8566028094494332e-06, + "loss": 0.4704, + "num_input_tokens_seen": 12556352, + "step": 3990 + }, + { + "epoch": 0.25574547084053517, + "grad_norm": 21.44598388671875, + "learning_rate": 1.8560257391879778e-06, + "loss": 0.3726, + "num_input_tokens_seen": 12570688, + "step": 3995 + }, + { + "epoch": 0.25606555278151205, + "grad_norm": 16.398038864135742, + "learning_rate": 1.855447600168336e-06, + "loss": 0.4038, + "num_input_tokens_seen": 12585984, + "step": 4000 + }, + { + "epoch": 0.25638563472248893, + "grad_norm": 19.45931053161621, + "learning_rate": 1.8548683931123215e-06, + "loss": 0.4665, + "num_input_tokens_seen": 12601216, + "step": 4005 + }, + { + "epoch": 0.25670571666346587, + "grad_norm": 65.39263916015625, + "learning_rate": 1.8542881187430807e-06, + "loss": 0.4408, + "num_input_tokens_seen": 12618624, + "step": 4010 + }, + { + "epoch": 0.25702579860444275, + "grad_norm": 24.916526794433594, + "learning_rate": 1.8537067777850935e-06, + "loss": 0.5792, + "num_input_tokens_seen": 12635840, + "step": 4015 + }, + { + "epoch": 0.2573458805454196, + "grad_norm": 21.44871711730957, + "learning_rate": 1.8531243709641704e-06, + "loss": 0.3554, + "num_input_tokens_seen": 12651904, + "step": 4020 + }, + { + "epoch": 0.2576659624863965, + "grad_norm": 37.30930709838867, + "learning_rate": 1.8525408990074533e-06, + "loss": 0.4923, + "num_input_tokens_seen": 12666944, + "step": 4025 + }, + { + "epoch": 0.2579860444273734, + "grad_norm": 14.11586856842041, + "learning_rate": 1.851956362643414e-06, + "loss": 0.4155, + "num_input_tokens_seen": 12682688, + "step": 4030 + }, + { + "epoch": 0.2583061263683503, + "grad_norm": 43.13747024536133, + "learning_rate": 1.851370762601853e-06, + "loss": 0.5472, + "num_input_tokens_seen": 12698304, + "step": 4035 + }, + { + "epoch": 0.2586262083093272, + "grad_norm": 41.56428527832031, + "learning_rate": 1.8507840996138983e-06, + "loss": 0.4995, + "num_input_tokens_seen": 12712896, + "step": 4040 + }, + { + "epoch": 0.2589462902503041, + "grad_norm": 61.59485626220703, + "learning_rate": 1.8501963744120062e-06, + "loss": 0.39, + "num_input_tokens_seen": 12727488, + "step": 4045 + }, + { + "epoch": 0.25926637219128096, + "grad_norm": 34.89384078979492, + "learning_rate": 1.849607587729958e-06, + "loss": 0.4037, + "num_input_tokens_seen": 12742720, + "step": 4050 + }, + { + "epoch": 0.25958645413225784, + "grad_norm": 26.042404174804688, + "learning_rate": 1.8490177403028615e-06, + "loss": 0.3918, + "num_input_tokens_seen": 12757760, + "step": 4055 + }, + { + "epoch": 0.2599065360732348, + "grad_norm": 39.44220733642578, + "learning_rate": 1.8484268328671475e-06, + "loss": 0.4879, + "num_input_tokens_seen": 12773312, + "step": 4060 + }, + { + "epoch": 0.26022661801421165, + "grad_norm": 41.2028923034668, + "learning_rate": 1.847834866160571e-06, + "loss": 0.553, + "num_input_tokens_seen": 12790336, + "step": 4065 + }, + { + "epoch": 0.26054669995518853, + "grad_norm": 26.452022552490234, + "learning_rate": 1.847241840922209e-06, + "loss": 0.4995, + "num_input_tokens_seen": 12805632, + "step": 4070 + }, + { + "epoch": 0.2608667818961654, + "grad_norm": 36.87411117553711, + "learning_rate": 1.8466477578924616e-06, + "loss": 0.4861, + "num_input_tokens_seen": 12821184, + "step": 4075 + }, + { + "epoch": 0.2611868638371423, + "grad_norm": 30.8194522857666, + "learning_rate": 1.8460526178130472e-06, + "loss": 0.5037, + "num_input_tokens_seen": 12836544, + "step": 4080 + }, + { + "epoch": 0.26150694577811917, + "grad_norm": 37.22843551635742, + "learning_rate": 1.8454564214270056e-06, + "loss": 0.4307, + "num_input_tokens_seen": 12852032, + "step": 4085 + }, + { + "epoch": 0.2618270277190961, + "grad_norm": 46.01398468017578, + "learning_rate": 1.8448591694786955e-06, + "loss": 0.446, + "num_input_tokens_seen": 12867456, + "step": 4090 + }, + { + "epoch": 0.262147109660073, + "grad_norm": 30.995271682739258, + "learning_rate": 1.8442608627137925e-06, + "loss": 0.3206, + "num_input_tokens_seen": 12885184, + "step": 4095 + }, + { + "epoch": 0.26246719160104987, + "grad_norm": 30.171613693237305, + "learning_rate": 1.8436615018792897e-06, + "loss": 0.3815, + "num_input_tokens_seen": 12900416, + "step": 4100 + }, + { + "epoch": 0.26278727354202674, + "grad_norm": 38.23905563354492, + "learning_rate": 1.8430610877234957e-06, + "loss": 0.5722, + "num_input_tokens_seen": 12915648, + "step": 4105 + }, + { + "epoch": 0.2631073554830036, + "grad_norm": 15.184795379638672, + "learning_rate": 1.8424596209960356e-06, + "loss": 0.4491, + "num_input_tokens_seen": 12930368, + "step": 4110 + }, + { + "epoch": 0.26342743742398056, + "grad_norm": 24.648910522460938, + "learning_rate": 1.8418571024478466e-06, + "loss": 0.5253, + "num_input_tokens_seen": 12945472, + "step": 4115 + }, + { + "epoch": 0.26374751936495744, + "grad_norm": 24.325111389160156, + "learning_rate": 1.8412535328311812e-06, + "loss": 0.4884, + "num_input_tokens_seen": 12961472, + "step": 4120 + }, + { + "epoch": 0.2640676013059343, + "grad_norm": 67.2924575805664, + "learning_rate": 1.8406489128996023e-06, + "loss": 0.5935, + "num_input_tokens_seen": 12975872, + "step": 4125 + }, + { + "epoch": 0.2643876832469112, + "grad_norm": 33.307865142822266, + "learning_rate": 1.8400432434079853e-06, + "loss": 0.5286, + "num_input_tokens_seen": 12992128, + "step": 4130 + }, + { + "epoch": 0.2647077651878881, + "grad_norm": 17.04827308654785, + "learning_rate": 1.8394365251125162e-06, + "loss": 0.4112, + "num_input_tokens_seen": 13021184, + "step": 4135 + }, + { + "epoch": 0.265027847128865, + "grad_norm": 31.74374771118164, + "learning_rate": 1.8388287587706888e-06, + "loss": 0.4385, + "num_input_tokens_seen": 13037568, + "step": 4140 + }, + { + "epoch": 0.2653479290698419, + "grad_norm": 35.290184020996094, + "learning_rate": 1.8382199451413074e-06, + "loss": 0.4655, + "num_input_tokens_seen": 13053440, + "step": 4145 + }, + { + "epoch": 0.26566801101081877, + "grad_norm": 35.621437072753906, + "learning_rate": 1.837610084984483e-06, + "loss": 0.5121, + "num_input_tokens_seen": 13069440, + "step": 4150 + }, + { + "epoch": 0.26598809295179565, + "grad_norm": 59.76009750366211, + "learning_rate": 1.8369991790616327e-06, + "loss": 0.5466, + "num_input_tokens_seen": 13084224, + "step": 4155 + }, + { + "epoch": 0.26630817489277253, + "grad_norm": 38.1486701965332, + "learning_rate": 1.8363872281354795e-06, + "loss": 0.6597, + "num_input_tokens_seen": 13098688, + "step": 4160 + }, + { + "epoch": 0.26662825683374947, + "grad_norm": 33.94224166870117, + "learning_rate": 1.835774232970052e-06, + "loss": 0.4049, + "num_input_tokens_seen": 13114112, + "step": 4165 + }, + { + "epoch": 0.26694833877472635, + "grad_norm": 29.897977828979492, + "learning_rate": 1.8351601943306815e-06, + "loss": 0.4672, + "num_input_tokens_seen": 13130240, + "step": 4170 + }, + { + "epoch": 0.2672684207157032, + "grad_norm": 41.0724983215332, + "learning_rate": 1.8345451129840025e-06, + "loss": 0.3994, + "num_input_tokens_seen": 13145536, + "step": 4175 + }, + { + "epoch": 0.2675885026566801, + "grad_norm": 37.96142578125, + "learning_rate": 1.8339289896979515e-06, + "loss": 0.552, + "num_input_tokens_seen": 13160256, + "step": 4180 + }, + { + "epoch": 0.267908584597657, + "grad_norm": 37.417449951171875, + "learning_rate": 1.8333118252417651e-06, + "loss": 0.5336, + "num_input_tokens_seen": 13177088, + "step": 4185 + }, + { + "epoch": 0.26822866653863386, + "grad_norm": 32.74960708618164, + "learning_rate": 1.832693620385981e-06, + "loss": 0.5098, + "num_input_tokens_seen": 13192768, + "step": 4190 + }, + { + "epoch": 0.2685487484796108, + "grad_norm": 27.491313934326172, + "learning_rate": 1.8320743759024352e-06, + "loss": 0.5183, + "num_input_tokens_seen": 13208192, + "step": 4195 + }, + { + "epoch": 0.2688688304205877, + "grad_norm": 38.285240173339844, + "learning_rate": 1.831454092564261e-06, + "loss": 0.5242, + "num_input_tokens_seen": 13223872, + "step": 4200 + }, + { + "epoch": 0.26918891236156456, + "grad_norm": 20.660884857177734, + "learning_rate": 1.8308327711458899e-06, + "loss": 0.4714, + "num_input_tokens_seen": 13239104, + "step": 4205 + }, + { + "epoch": 0.26950899430254144, + "grad_norm": 36.68329620361328, + "learning_rate": 1.830210412423049e-06, + "loss": 0.3844, + "num_input_tokens_seen": 13254464, + "step": 4210 + }, + { + "epoch": 0.2698290762435183, + "grad_norm": 22.882728576660156, + "learning_rate": 1.8295870171727605e-06, + "loss": 0.3647, + "num_input_tokens_seen": 13269824, + "step": 4215 + }, + { + "epoch": 0.27014915818449525, + "grad_norm": 20.831666946411133, + "learning_rate": 1.8289625861733408e-06, + "loss": 0.4194, + "num_input_tokens_seen": 13288448, + "step": 4220 + }, + { + "epoch": 0.27046924012547213, + "grad_norm": 34.60063171386719, + "learning_rate": 1.8283371202043991e-06, + "loss": 0.5194, + "num_input_tokens_seen": 13304320, + "step": 4225 + }, + { + "epoch": 0.270789322066449, + "grad_norm": 39.810787200927734, + "learning_rate": 1.827710620046837e-06, + "loss": 0.5503, + "num_input_tokens_seen": 13321920, + "step": 4230 + }, + { + "epoch": 0.2711094040074259, + "grad_norm": 52.01685333251953, + "learning_rate": 1.8270830864828474e-06, + "loss": 0.4687, + "num_input_tokens_seen": 13337280, + "step": 4235 + }, + { + "epoch": 0.27142948594840277, + "grad_norm": 15.508134841918945, + "learning_rate": 1.8264545202959133e-06, + "loss": 0.4287, + "num_input_tokens_seen": 13354112, + "step": 4240 + }, + { + "epoch": 0.2717495678893797, + "grad_norm": 32.78725814819336, + "learning_rate": 1.8258249222708067e-06, + "loss": 0.4321, + "num_input_tokens_seen": 13369600, + "step": 4245 + }, + { + "epoch": 0.2720696498303566, + "grad_norm": 23.458738327026367, + "learning_rate": 1.8251942931935886e-06, + "loss": 0.4464, + "num_input_tokens_seen": 13385536, + "step": 4250 + }, + { + "epoch": 0.27238973177133347, + "grad_norm": 31.733396530151367, + "learning_rate": 1.8245626338516069e-06, + "loss": 0.3788, + "num_input_tokens_seen": 13400832, + "step": 4255 + }, + { + "epoch": 0.27270981371231034, + "grad_norm": 35.16189956665039, + "learning_rate": 1.823929945033495e-06, + "loss": 0.3397, + "num_input_tokens_seen": 13416000, + "step": 4260 + }, + { + "epoch": 0.2730298956532872, + "grad_norm": 31.286619186401367, + "learning_rate": 1.8232962275291728e-06, + "loss": 0.5015, + "num_input_tokens_seen": 13431360, + "step": 4265 + }, + { + "epoch": 0.2733499775942641, + "grad_norm": 45.81655502319336, + "learning_rate": 1.822661482129844e-06, + "loss": 0.4342, + "num_input_tokens_seen": 13446976, + "step": 4270 + }, + { + "epoch": 0.27367005953524104, + "grad_norm": 21.677684783935547, + "learning_rate": 1.8220257096279956e-06, + "loss": 0.3796, + "num_input_tokens_seen": 13463040, + "step": 4275 + }, + { + "epoch": 0.2739901414762179, + "grad_norm": 35.41159439086914, + "learning_rate": 1.8213889108173972e-06, + "loss": 0.6798, + "num_input_tokens_seen": 13478656, + "step": 4280 + }, + { + "epoch": 0.2743102234171948, + "grad_norm": 20.70133399963379, + "learning_rate": 1.8207510864930992e-06, + "loss": 0.4843, + "num_input_tokens_seen": 13495296, + "step": 4285 + }, + { + "epoch": 0.2746303053581717, + "grad_norm": 18.472976684570312, + "learning_rate": 1.8201122374514336e-06, + "loss": 0.5024, + "num_input_tokens_seen": 13510912, + "step": 4290 + }, + { + "epoch": 0.27495038729914856, + "grad_norm": 22.679168701171875, + "learning_rate": 1.8194723644900099e-06, + "loss": 0.4465, + "num_input_tokens_seen": 13525952, + "step": 4295 + }, + { + "epoch": 0.2752704692401255, + "grad_norm": 25.11664390563965, + "learning_rate": 1.8188314684077173e-06, + "loss": 0.5334, + "num_input_tokens_seen": 13546752, + "step": 4300 + }, + { + "epoch": 0.2755905511811024, + "grad_norm": 37.698638916015625, + "learning_rate": 1.8181895500047226e-06, + "loss": 0.5659, + "num_input_tokens_seen": 13561728, + "step": 4305 + }, + { + "epoch": 0.27591063312207925, + "grad_norm": 21.342445373535156, + "learning_rate": 1.817546610082468e-06, + "loss": 0.4559, + "num_input_tokens_seen": 13577344, + "step": 4310 + }, + { + "epoch": 0.27623071506305613, + "grad_norm": 25.98567008972168, + "learning_rate": 1.816902649443672e-06, + "loss": 0.4806, + "num_input_tokens_seen": 13592256, + "step": 4315 + }, + { + "epoch": 0.276550797004033, + "grad_norm": 36.9737548828125, + "learning_rate": 1.8162576688923262e-06, + "loss": 0.5351, + "num_input_tokens_seen": 13608832, + "step": 4320 + }, + { + "epoch": 0.27687087894500995, + "grad_norm": 25.08713150024414, + "learning_rate": 1.815611669233697e-06, + "loss": 0.5544, + "num_input_tokens_seen": 13624128, + "step": 4325 + }, + { + "epoch": 0.2771909608859868, + "grad_norm": 25.511003494262695, + "learning_rate": 1.8149646512743222e-06, + "loss": 0.5301, + "num_input_tokens_seen": 13640576, + "step": 4330 + }, + { + "epoch": 0.2775110428269637, + "grad_norm": 22.00773048400879, + "learning_rate": 1.8143166158220118e-06, + "loss": 0.4513, + "num_input_tokens_seen": 13655872, + "step": 4335 + }, + { + "epoch": 0.2778311247679406, + "grad_norm": 41.66020584106445, + "learning_rate": 1.8136675636858454e-06, + "loss": 0.6679, + "num_input_tokens_seen": 13672384, + "step": 4340 + }, + { + "epoch": 0.27815120670891746, + "grad_norm": 20.195674896240234, + "learning_rate": 1.8130174956761723e-06, + "loss": 0.3988, + "num_input_tokens_seen": 13687296, + "step": 4345 + }, + { + "epoch": 0.2784712886498944, + "grad_norm": 25.734270095825195, + "learning_rate": 1.81236641260461e-06, + "loss": 0.5363, + "num_input_tokens_seen": 13702528, + "step": 4350 + }, + { + "epoch": 0.2787913705908713, + "grad_norm": 67.11882019042969, + "learning_rate": 1.811714315284043e-06, + "loss": 0.5002, + "num_input_tokens_seen": 13717568, + "step": 4355 + }, + { + "epoch": 0.27911145253184816, + "grad_norm": 19.78514862060547, + "learning_rate": 1.8110612045286229e-06, + "loss": 0.4016, + "num_input_tokens_seen": 13733568, + "step": 4360 + }, + { + "epoch": 0.27943153447282504, + "grad_norm": 20.73729705810547, + "learning_rate": 1.8104070811537661e-06, + "loss": 0.3744, + "num_input_tokens_seen": 13749312, + "step": 4365 + }, + { + "epoch": 0.2797516164138019, + "grad_norm": 16.582807540893555, + "learning_rate": 1.8097519459761533e-06, + "loss": 0.4299, + "num_input_tokens_seen": 13765952, + "step": 4370 + }, + { + "epoch": 0.2800716983547788, + "grad_norm": 47.0535888671875, + "learning_rate": 1.8090957998137283e-06, + "loss": 0.495, + "num_input_tokens_seen": 13781440, + "step": 4375 + }, + { + "epoch": 0.28039178029575573, + "grad_norm": 53.1851921081543, + "learning_rate": 1.8084386434856978e-06, + "loss": 0.4471, + "num_input_tokens_seen": 13796864, + "step": 4380 + }, + { + "epoch": 0.2807118622367326, + "grad_norm": 26.30471420288086, + "learning_rate": 1.8077804778125283e-06, + "loss": 0.4915, + "num_input_tokens_seen": 13812736, + "step": 4385 + }, + { + "epoch": 0.2810319441777095, + "grad_norm": 60.074981689453125, + "learning_rate": 1.807121303615948e-06, + "loss": 0.4966, + "num_input_tokens_seen": 13828288, + "step": 4390 + }, + { + "epoch": 0.28135202611868637, + "grad_norm": 40.989219665527344, + "learning_rate": 1.8064611217189434e-06, + "loss": 0.4125, + "num_input_tokens_seen": 13845568, + "step": 4395 + }, + { + "epoch": 0.28167210805966325, + "grad_norm": 25.27169418334961, + "learning_rate": 1.8057999329457596e-06, + "loss": 0.398, + "num_input_tokens_seen": 13860608, + "step": 4400 + }, + { + "epoch": 0.2819921900006402, + "grad_norm": 39.82872772216797, + "learning_rate": 1.8051377381218984e-06, + "loss": 0.5663, + "num_input_tokens_seen": 13876608, + "step": 4405 + }, + { + "epoch": 0.28231227194161707, + "grad_norm": 34.87173080444336, + "learning_rate": 1.8044745380741177e-06, + "loss": 0.5656, + "num_input_tokens_seen": 13893632, + "step": 4410 + }, + { + "epoch": 0.28263235388259395, + "grad_norm": 49.1501579284668, + "learning_rate": 1.8038103336304306e-06, + "loss": 0.3896, + "num_input_tokens_seen": 13909312, + "step": 4415 + }, + { + "epoch": 0.2829524358235708, + "grad_norm": 27.521867752075195, + "learning_rate": 1.8031451256201042e-06, + "loss": 0.5699, + "num_input_tokens_seen": 13925824, + "step": 4420 + }, + { + "epoch": 0.2832725177645477, + "grad_norm": 25.578853607177734, + "learning_rate": 1.8024789148736589e-06, + "loss": 0.5385, + "num_input_tokens_seen": 13942336, + "step": 4425 + }, + { + "epoch": 0.28359259970552464, + "grad_norm": 27.650800704956055, + "learning_rate": 1.8018117022228655e-06, + "loss": 0.392, + "num_input_tokens_seen": 13957760, + "step": 4430 + }, + { + "epoch": 0.2839126816465015, + "grad_norm": 49.428855895996094, + "learning_rate": 1.8011434885007479e-06, + "loss": 0.4997, + "num_input_tokens_seen": 13972992, + "step": 4435 + }, + { + "epoch": 0.2842327635874784, + "grad_norm": 30.81421661376953, + "learning_rate": 1.8004742745415787e-06, + "loss": 0.4308, + "num_input_tokens_seen": 13988736, + "step": 4440 + }, + { + "epoch": 0.2845528455284553, + "grad_norm": 23.36966323852539, + "learning_rate": 1.799804061180879e-06, + "loss": 0.5427, + "num_input_tokens_seen": 14003520, + "step": 4445 + }, + { + "epoch": 0.28487292746943216, + "grad_norm": 29.571027755737305, + "learning_rate": 1.799132849255418e-06, + "loss": 0.518, + "num_input_tokens_seen": 14020608, + "step": 4450 + }, + { + "epoch": 0.28519300941040904, + "grad_norm": 34.7742919921875, + "learning_rate": 1.798460639603212e-06, + "loss": 0.4011, + "num_input_tokens_seen": 14035328, + "step": 4455 + }, + { + "epoch": 0.285513091351386, + "grad_norm": 37.04494094848633, + "learning_rate": 1.7977874330635224e-06, + "loss": 0.4805, + "num_input_tokens_seen": 14050816, + "step": 4460 + }, + { + "epoch": 0.28583317329236285, + "grad_norm": 18.75509262084961, + "learning_rate": 1.7971132304768555e-06, + "loss": 0.3289, + "num_input_tokens_seen": 14066880, + "step": 4465 + }, + { + "epoch": 0.28615325523333973, + "grad_norm": 24.66355323791504, + "learning_rate": 1.7964380326849612e-06, + "loss": 0.4937, + "num_input_tokens_seen": 14081728, + "step": 4470 + }, + { + "epoch": 0.2864733371743166, + "grad_norm": 18.791399002075195, + "learning_rate": 1.795761840530832e-06, + "loss": 0.4941, + "num_input_tokens_seen": 14097984, + "step": 4475 + }, + { + "epoch": 0.2867934191152935, + "grad_norm": 27.4366455078125, + "learning_rate": 1.7950846548587015e-06, + "loss": 0.4208, + "num_input_tokens_seen": 14115264, + "step": 4480 + }, + { + "epoch": 0.2871135010562704, + "grad_norm": 17.53047752380371, + "learning_rate": 1.7944064765140445e-06, + "loss": 0.2799, + "num_input_tokens_seen": 14129472, + "step": 4485 + }, + { + "epoch": 0.2874335829972473, + "grad_norm": 34.00762939453125, + "learning_rate": 1.7937273063435735e-06, + "loss": 0.55, + "num_input_tokens_seen": 14144896, + "step": 4490 + }, + { + "epoch": 0.2877536649382242, + "grad_norm": 27.387237548828125, + "learning_rate": 1.7930471451952416e-06, + "loss": 0.3622, + "num_input_tokens_seen": 14159744, + "step": 4495 + }, + { + "epoch": 0.28807374687920106, + "grad_norm": 39.22768020629883, + "learning_rate": 1.7923659939182377e-06, + "loss": 0.4915, + "num_input_tokens_seen": 14176384, + "step": 4500 + }, + { + "epoch": 0.28839382882017794, + "grad_norm": 39.973106384277344, + "learning_rate": 1.7916838533629866e-06, + "loss": 0.5376, + "num_input_tokens_seen": 14192320, + "step": 4505 + }, + { + "epoch": 0.2887139107611549, + "grad_norm": 27.084346771240234, + "learning_rate": 1.7910007243811493e-06, + "loss": 0.397, + "num_input_tokens_seen": 14208192, + "step": 4510 + }, + { + "epoch": 0.28903399270213176, + "grad_norm": 51.122711181640625, + "learning_rate": 1.7903166078256202e-06, + "loss": 0.5486, + "num_input_tokens_seen": 14223104, + "step": 4515 + }, + { + "epoch": 0.28935407464310864, + "grad_norm": 49.78089141845703, + "learning_rate": 1.789631504550527e-06, + "loss": 0.4153, + "num_input_tokens_seen": 14238464, + "step": 4520 + }, + { + "epoch": 0.2896741565840855, + "grad_norm": 32.12791061401367, + "learning_rate": 1.7889454154112288e-06, + "loss": 0.384, + "num_input_tokens_seen": 14254656, + "step": 4525 + }, + { + "epoch": 0.2899942385250624, + "grad_norm": 43.227901458740234, + "learning_rate": 1.7882583412643167e-06, + "loss": 0.3983, + "num_input_tokens_seen": 14268928, + "step": 4530 + }, + { + "epoch": 0.29031432046603933, + "grad_norm": 31.457603454589844, + "learning_rate": 1.78757028296761e-06, + "loss": 0.4326, + "num_input_tokens_seen": 14285952, + "step": 4535 + }, + { + "epoch": 0.2906344024070162, + "grad_norm": 18.678508758544922, + "learning_rate": 1.7868812413801582e-06, + "loss": 0.3522, + "num_input_tokens_seen": 14301760, + "step": 4540 + }, + { + "epoch": 0.2909544843479931, + "grad_norm": 53.38247299194336, + "learning_rate": 1.7861912173622372e-06, + "loss": 0.4976, + "num_input_tokens_seen": 14318208, + "step": 4545 + }, + { + "epoch": 0.29127456628896997, + "grad_norm": 41.86543655395508, + "learning_rate": 1.7855002117753504e-06, + "loss": 0.4597, + "num_input_tokens_seen": 14334144, + "step": 4550 + }, + { + "epoch": 0.29159464822994685, + "grad_norm": 49.806610107421875, + "learning_rate": 1.7848082254822266e-06, + "loss": 0.5283, + "num_input_tokens_seen": 14349120, + "step": 4555 + }, + { + "epoch": 0.29191473017092373, + "grad_norm": 56.75021743774414, + "learning_rate": 1.7841152593468185e-06, + "loss": 0.4868, + "num_input_tokens_seen": 14365376, + "step": 4560 + }, + { + "epoch": 0.29223481211190067, + "grad_norm": 34.16107940673828, + "learning_rate": 1.7834213142343026e-06, + "loss": 0.4582, + "num_input_tokens_seen": 14381568, + "step": 4565 + }, + { + "epoch": 0.29255489405287755, + "grad_norm": 28.742692947387695, + "learning_rate": 1.7827263910110777e-06, + "loss": 0.4626, + "num_input_tokens_seen": 14397312, + "step": 4570 + }, + { + "epoch": 0.2928749759938544, + "grad_norm": 34.53966522216797, + "learning_rate": 1.7820304905447632e-06, + "loss": 0.4372, + "num_input_tokens_seen": 14412928, + "step": 4575 + }, + { + "epoch": 0.2931950579348313, + "grad_norm": 47.14699935913086, + "learning_rate": 1.7813336137041991e-06, + "loss": 0.446, + "num_input_tokens_seen": 14427968, + "step": 4580 + }, + { + "epoch": 0.2935151398758082, + "grad_norm": 37.16606140136719, + "learning_rate": 1.7806357613594447e-06, + "loss": 0.3693, + "num_input_tokens_seen": 14442944, + "step": 4585 + }, + { + "epoch": 0.2938352218167851, + "grad_norm": 19.43882179260254, + "learning_rate": 1.7799369343817764e-06, + "loss": 0.4481, + "num_input_tokens_seen": 14458176, + "step": 4590 + }, + { + "epoch": 0.294155303757762, + "grad_norm": 24.445056915283203, + "learning_rate": 1.7792371336436883e-06, + "loss": 0.3566, + "num_input_tokens_seen": 14473600, + "step": 4595 + }, + { + "epoch": 0.2944753856987389, + "grad_norm": 28.31954574584961, + "learning_rate": 1.7785363600188892e-06, + "loss": 0.6518, + "num_input_tokens_seen": 14488896, + "step": 4600 + }, + { + "epoch": 0.29479546763971576, + "grad_norm": 38.648948669433594, + "learning_rate": 1.7778346143823038e-06, + "loss": 0.5881, + "num_input_tokens_seen": 14502784, + "step": 4605 + }, + { + "epoch": 0.29511554958069264, + "grad_norm": 33.51401138305664, + "learning_rate": 1.7771318976100696e-06, + "loss": 0.4293, + "num_input_tokens_seen": 14520000, + "step": 4610 + }, + { + "epoch": 0.2954356315216696, + "grad_norm": 28.780546188354492, + "learning_rate": 1.7764282105795364e-06, + "loss": 0.3401, + "num_input_tokens_seen": 14536320, + "step": 4615 + }, + { + "epoch": 0.29575571346264645, + "grad_norm": 47.155277252197266, + "learning_rate": 1.7757235541692663e-06, + "loss": 0.4524, + "num_input_tokens_seen": 14551808, + "step": 4620 + }, + { + "epoch": 0.29607579540362333, + "grad_norm": 19.841266632080078, + "learning_rate": 1.7750179292590306e-06, + "loss": 0.3157, + "num_input_tokens_seen": 14566976, + "step": 4625 + }, + { + "epoch": 0.2963958773446002, + "grad_norm": 26.28995132446289, + "learning_rate": 1.7743113367298107e-06, + "loss": 0.3475, + "num_input_tokens_seen": 14583104, + "step": 4630 + }, + { + "epoch": 0.2967159592855771, + "grad_norm": 38.58869552612305, + "learning_rate": 1.7736037774637955e-06, + "loss": 0.4454, + "num_input_tokens_seen": 14598336, + "step": 4635 + }, + { + "epoch": 0.29703604122655397, + "grad_norm": 50.025482177734375, + "learning_rate": 1.772895252344381e-06, + "loss": 0.5142, + "num_input_tokens_seen": 14615232, + "step": 4640 + }, + { + "epoch": 0.2973561231675309, + "grad_norm": 19.640771865844727, + "learning_rate": 1.7721857622561692e-06, + "loss": 0.3932, + "num_input_tokens_seen": 14630848, + "step": 4645 + }, + { + "epoch": 0.2976762051085078, + "grad_norm": 31.551252365112305, + "learning_rate": 1.7714753080849664e-06, + "loss": 0.4601, + "num_input_tokens_seen": 14647040, + "step": 4650 + }, + { + "epoch": 0.29799628704948466, + "grad_norm": 22.483062744140625, + "learning_rate": 1.7707638907177837e-06, + "loss": 0.4116, + "num_input_tokens_seen": 14661888, + "step": 4655 + }, + { + "epoch": 0.29831636899046154, + "grad_norm": 143.85166931152344, + "learning_rate": 1.7700515110428336e-06, + "loss": 0.7093, + "num_input_tokens_seen": 14677696, + "step": 4660 + }, + { + "epoch": 0.2986364509314384, + "grad_norm": 26.837242126464844, + "learning_rate": 1.7693381699495307e-06, + "loss": 0.4799, + "num_input_tokens_seen": 14693184, + "step": 4665 + }, + { + "epoch": 0.29895653287241536, + "grad_norm": 30.247093200683594, + "learning_rate": 1.7686238683284894e-06, + "loss": 0.3643, + "num_input_tokens_seen": 14707904, + "step": 4670 + }, + { + "epoch": 0.29927661481339224, + "grad_norm": 24.62070083618164, + "learning_rate": 1.7679086070715237e-06, + "loss": 0.3608, + "num_input_tokens_seen": 14724096, + "step": 4675 + }, + { + "epoch": 0.2995966967543691, + "grad_norm": 36.82127380371094, + "learning_rate": 1.7671923870716459e-06, + "loss": 0.4544, + "num_input_tokens_seen": 14738752, + "step": 4680 + }, + { + "epoch": 0.299916778695346, + "grad_norm": 41.65424346923828, + "learning_rate": 1.7664752092230652e-06, + "loss": 0.3486, + "num_input_tokens_seen": 14753664, + "step": 4685 + }, + { + "epoch": 0.3002368606363229, + "grad_norm": 34.0866813659668, + "learning_rate": 1.7657570744211863e-06, + "loss": 0.3784, + "num_input_tokens_seen": 14769152, + "step": 4690 + }, + { + "epoch": 0.30036489341271366, + "eval_loss": 0.4629112482070923, + "eval_runtime": 49.1915, + "eval_samples_per_second": 282.284, + "eval_steps_per_second": 35.291, + "num_input_tokens_seen": 14775488, + "step": 4692 + }, + { + "epoch": 0.3005569425772998, + "grad_norm": 48.05270004272461, + "learning_rate": 1.765037983562609e-06, + "loss": 0.5028, + "num_input_tokens_seen": 14784128, + "step": 4695 + }, + { + "epoch": 0.3008770245182767, + "grad_norm": 49.29054641723633, + "learning_rate": 1.7643179375451264e-06, + "loss": 0.4459, + "num_input_tokens_seen": 14799936, + "step": 4700 + }, + { + "epoch": 0.30119710645925357, + "grad_norm": 42.15516662597656, + "learning_rate": 1.7635969372677252e-06, + "loss": 0.6083, + "num_input_tokens_seen": 14814208, + "step": 4705 + }, + { + "epoch": 0.30151718840023045, + "grad_norm": 37.26246643066406, + "learning_rate": 1.7628749836305818e-06, + "loss": 0.483, + "num_input_tokens_seen": 14829504, + "step": 4710 + }, + { + "epoch": 0.30183727034120733, + "grad_norm": 30.036657333374023, + "learning_rate": 1.7621520775350645e-06, + "loss": 0.3949, + "num_input_tokens_seen": 14843968, + "step": 4715 + }, + { + "epoch": 0.30215735228218427, + "grad_norm": 33.79453659057617, + "learning_rate": 1.7614282198837293e-06, + "loss": 0.4567, + "num_input_tokens_seen": 14859840, + "step": 4720 + }, + { + "epoch": 0.30247743422316115, + "grad_norm": 39.85743713378906, + "learning_rate": 1.7607034115803219e-06, + "loss": 0.473, + "num_input_tokens_seen": 14875648, + "step": 4725 + }, + { + "epoch": 0.302797516164138, + "grad_norm": 27.397972106933594, + "learning_rate": 1.7599776535297734e-06, + "loss": 0.4192, + "num_input_tokens_seen": 14890560, + "step": 4730 + }, + { + "epoch": 0.3031175981051149, + "grad_norm": 40.91767501831055, + "learning_rate": 1.7592509466382012e-06, + "loss": 0.4702, + "num_input_tokens_seen": 14906688, + "step": 4735 + }, + { + "epoch": 0.3034376800460918, + "grad_norm": 54.96405029296875, + "learning_rate": 1.7585232918129076e-06, + "loss": 0.5561, + "num_input_tokens_seen": 14922496, + "step": 4740 + }, + { + "epoch": 0.30375776198706866, + "grad_norm": 36.16265869140625, + "learning_rate": 1.757794689962378e-06, + "loss": 0.4601, + "num_input_tokens_seen": 14938880, + "step": 4745 + }, + { + "epoch": 0.3040778439280456, + "grad_norm": 44.08560562133789, + "learning_rate": 1.7570651419962807e-06, + "loss": 0.4968, + "num_input_tokens_seen": 14954112, + "step": 4750 + }, + { + "epoch": 0.3043979258690225, + "grad_norm": 42.19171142578125, + "learning_rate": 1.7563346488254647e-06, + "loss": 0.448, + "num_input_tokens_seen": 14969536, + "step": 4755 + }, + { + "epoch": 0.30471800780999936, + "grad_norm": 35.03725051879883, + "learning_rate": 1.755603211361959e-06, + "loss": 0.3373, + "num_input_tokens_seen": 14985728, + "step": 4760 + }, + { + "epoch": 0.30503808975097624, + "grad_norm": 20.99566078186035, + "learning_rate": 1.7548708305189722e-06, + "loss": 0.452, + "num_input_tokens_seen": 15003904, + "step": 4765 + }, + { + "epoch": 0.3053581716919531, + "grad_norm": 59.016563415527344, + "learning_rate": 1.7541375072108905e-06, + "loss": 0.5662, + "num_input_tokens_seen": 15019328, + "step": 4770 + }, + { + "epoch": 0.30567825363293005, + "grad_norm": 45.97145462036133, + "learning_rate": 1.7534032423532766e-06, + "loss": 0.4597, + "num_input_tokens_seen": 15033856, + "step": 4775 + }, + { + "epoch": 0.30599833557390693, + "grad_norm": 22.04340362548828, + "learning_rate": 1.7526680368628685e-06, + "loss": 0.3603, + "num_input_tokens_seen": 15051200, + "step": 4780 + }, + { + "epoch": 0.3063184175148838, + "grad_norm": 32.850303649902344, + "learning_rate": 1.751931891657579e-06, + "loss": 0.4471, + "num_input_tokens_seen": 15066368, + "step": 4785 + }, + { + "epoch": 0.3066384994558607, + "grad_norm": 21.559911727905273, + "learning_rate": 1.7511948076564943e-06, + "loss": 0.3494, + "num_input_tokens_seen": 15081600, + "step": 4790 + }, + { + "epoch": 0.30695858139683757, + "grad_norm": 30.383432388305664, + "learning_rate": 1.7504567857798722e-06, + "loss": 0.5308, + "num_input_tokens_seen": 15097536, + "step": 4795 + }, + { + "epoch": 0.3072786633378145, + "grad_norm": 37.53936767578125, + "learning_rate": 1.7497178269491417e-06, + "loss": 0.5013, + "num_input_tokens_seen": 15113728, + "step": 4800 + }, + { + "epoch": 0.3075987452787914, + "grad_norm": 24.428794860839844, + "learning_rate": 1.7489779320869014e-06, + "loss": 0.5561, + "num_input_tokens_seen": 15130048, + "step": 4805 + }, + { + "epoch": 0.30791882721976827, + "grad_norm": 22.411056518554688, + "learning_rate": 1.7482371021169193e-06, + "loss": 0.3673, + "num_input_tokens_seen": 15145600, + "step": 4810 + }, + { + "epoch": 0.30823890916074514, + "grad_norm": 44.107322692871094, + "learning_rate": 1.7474953379641297e-06, + "loss": 0.3935, + "num_input_tokens_seen": 15162368, + "step": 4815 + }, + { + "epoch": 0.308558991101722, + "grad_norm": 34.96397018432617, + "learning_rate": 1.746752640554634e-06, + "loss": 0.4323, + "num_input_tokens_seen": 15178368, + "step": 4820 + }, + { + "epoch": 0.3088790730426989, + "grad_norm": 26.387361526489258, + "learning_rate": 1.7460090108156988e-06, + "loss": 0.5467, + "num_input_tokens_seen": 15193408, + "step": 4825 + }, + { + "epoch": 0.30919915498367584, + "grad_norm": 22.992677688598633, + "learning_rate": 1.7452644496757548e-06, + "loss": 0.3081, + "num_input_tokens_seen": 15208640, + "step": 4830 + }, + { + "epoch": 0.3095192369246527, + "grad_norm": 44.50247573852539, + "learning_rate": 1.7445189580643946e-06, + "loss": 0.4533, + "num_input_tokens_seen": 15224192, + "step": 4835 + }, + { + "epoch": 0.3098393188656296, + "grad_norm": 28.59990692138672, + "learning_rate": 1.7437725369123737e-06, + "loss": 0.5119, + "num_input_tokens_seen": 15239616, + "step": 4840 + }, + { + "epoch": 0.3101594008066065, + "grad_norm": 31.960166931152344, + "learning_rate": 1.7430251871516077e-06, + "loss": 0.4595, + "num_input_tokens_seen": 15255680, + "step": 4845 + }, + { + "epoch": 0.31047948274758336, + "grad_norm": 25.40645980834961, + "learning_rate": 1.7422769097151715e-06, + "loss": 0.4886, + "num_input_tokens_seen": 15271232, + "step": 4850 + }, + { + "epoch": 0.3107995646885603, + "grad_norm": 65.88490295410156, + "learning_rate": 1.7415277055372982e-06, + "loss": 0.4938, + "num_input_tokens_seen": 15287040, + "step": 4855 + }, + { + "epoch": 0.31111964662953717, + "grad_norm": 25.532987594604492, + "learning_rate": 1.7407775755533778e-06, + "loss": 0.5025, + "num_input_tokens_seen": 15304256, + "step": 4860 + }, + { + "epoch": 0.31143972857051405, + "grad_norm": 18.785158157348633, + "learning_rate": 1.7400265206999568e-06, + "loss": 0.3567, + "num_input_tokens_seen": 15322112, + "step": 4865 + }, + { + "epoch": 0.31175981051149093, + "grad_norm": 69.29310607910156, + "learning_rate": 1.7392745419147362e-06, + "loss": 0.5436, + "num_input_tokens_seen": 15337216, + "step": 4870 + }, + { + "epoch": 0.3120798924524678, + "grad_norm": 38.31575393676758, + "learning_rate": 1.7385216401365693e-06, + "loss": 0.4521, + "num_input_tokens_seen": 15354048, + "step": 4875 + }, + { + "epoch": 0.31239997439344475, + "grad_norm": 28.862852096557617, + "learning_rate": 1.7377678163054638e-06, + "loss": 0.4933, + "num_input_tokens_seen": 15369344, + "step": 4880 + }, + { + "epoch": 0.3127200563344216, + "grad_norm": 51.59070587158203, + "learning_rate": 1.7370130713625775e-06, + "loss": 0.4949, + "num_input_tokens_seen": 15385920, + "step": 4885 + }, + { + "epoch": 0.3130401382753985, + "grad_norm": 20.555160522460938, + "learning_rate": 1.736257406250218e-06, + "loss": 0.3867, + "num_input_tokens_seen": 15401536, + "step": 4890 + }, + { + "epoch": 0.3133602202163754, + "grad_norm": 28.439088821411133, + "learning_rate": 1.735500821911842e-06, + "loss": 0.4501, + "num_input_tokens_seen": 15417152, + "step": 4895 + }, + { + "epoch": 0.31368030215735226, + "grad_norm": 30.494640350341797, + "learning_rate": 1.7347433192920544e-06, + "loss": 0.4949, + "num_input_tokens_seen": 15431872, + "step": 4900 + }, + { + "epoch": 0.3140003840983292, + "grad_norm": 19.200109481811523, + "learning_rate": 1.7339848993366056e-06, + "loss": 0.4021, + "num_input_tokens_seen": 15447552, + "step": 4905 + }, + { + "epoch": 0.3143204660393061, + "grad_norm": 32.95127868652344, + "learning_rate": 1.7332255629923922e-06, + "loss": 0.4615, + "num_input_tokens_seen": 15464384, + "step": 4910 + }, + { + "epoch": 0.31464054798028296, + "grad_norm": 23.275110244750977, + "learning_rate": 1.732465311207454e-06, + "loss": 0.4968, + "num_input_tokens_seen": 15479808, + "step": 4915 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 47.221412658691406, + "learning_rate": 1.731704144930975e-06, + "loss": 0.4973, + "num_input_tokens_seen": 15496512, + "step": 4920 + }, + { + "epoch": 0.3152807118622367, + "grad_norm": 39.70328903198242, + "learning_rate": 1.7309420651132797e-06, + "loss": 0.4094, + "num_input_tokens_seen": 15512896, + "step": 4925 + }, + { + "epoch": 0.3156007938032136, + "grad_norm": 32.56901931762695, + "learning_rate": 1.7301790727058343e-06, + "loss": 0.3234, + "num_input_tokens_seen": 15528064, + "step": 4930 + }, + { + "epoch": 0.31592087574419053, + "grad_norm": 31.572166442871094, + "learning_rate": 1.7294151686612431e-06, + "loss": 0.3618, + "num_input_tokens_seen": 15543424, + "step": 4935 + }, + { + "epoch": 0.3162409576851674, + "grad_norm": 42.15610122680664, + "learning_rate": 1.7286503539332495e-06, + "loss": 0.5609, + "num_input_tokens_seen": 15560192, + "step": 4940 + }, + { + "epoch": 0.3165610396261443, + "grad_norm": 43.20957946777344, + "learning_rate": 1.7278846294767337e-06, + "loss": 0.3968, + "num_input_tokens_seen": 15576128, + "step": 4945 + }, + { + "epoch": 0.31688112156712117, + "grad_norm": 80.63443756103516, + "learning_rate": 1.7271179962477118e-06, + "loss": 0.7032, + "num_input_tokens_seen": 15592576, + "step": 4950 + }, + { + "epoch": 0.31720120350809805, + "grad_norm": 50.15550994873047, + "learning_rate": 1.7263504552033341e-06, + "loss": 0.4261, + "num_input_tokens_seen": 15607744, + "step": 4955 + }, + { + "epoch": 0.317521285449075, + "grad_norm": 22.618947982788086, + "learning_rate": 1.725582007301885e-06, + "loss": 0.4846, + "num_input_tokens_seen": 15623360, + "step": 4960 + }, + { + "epoch": 0.31784136739005187, + "grad_norm": 33.10743713378906, + "learning_rate": 1.7248126535027806e-06, + "loss": 0.4213, + "num_input_tokens_seen": 15638656, + "step": 4965 + }, + { + "epoch": 0.31816144933102875, + "grad_norm": 41.587379455566406, + "learning_rate": 1.7240423947665678e-06, + "loss": 0.4632, + "num_input_tokens_seen": 15654400, + "step": 4970 + }, + { + "epoch": 0.3184815312720056, + "grad_norm": 27.983142852783203, + "learning_rate": 1.723271232054924e-06, + "loss": 0.3822, + "num_input_tokens_seen": 15670016, + "step": 4975 + }, + { + "epoch": 0.3188016132129825, + "grad_norm": 54.66548538208008, + "learning_rate": 1.722499166330655e-06, + "loss": 0.4977, + "num_input_tokens_seen": 15686208, + "step": 4980 + }, + { + "epoch": 0.31912169515395944, + "grad_norm": 20.663721084594727, + "learning_rate": 1.7217261985576936e-06, + "loss": 0.44, + "num_input_tokens_seen": 15702592, + "step": 4985 + }, + { + "epoch": 0.3194417770949363, + "grad_norm": 73.22879791259766, + "learning_rate": 1.7209523297010992e-06, + "loss": 0.5176, + "num_input_tokens_seen": 15717696, + "step": 4990 + }, + { + "epoch": 0.3197618590359132, + "grad_norm": 36.40870666503906, + "learning_rate": 1.7201775607270564e-06, + "loss": 0.4644, + "num_input_tokens_seen": 15733184, + "step": 4995 + }, + { + "epoch": 0.3200819409768901, + "grad_norm": 30.597986221313477, + "learning_rate": 1.7194018926028733e-06, + "loss": 0.5267, + "num_input_tokens_seen": 15749888, + "step": 5000 + }, + { + "epoch": 0.32040202291786696, + "grad_norm": 35.71719741821289, + "learning_rate": 1.7186253262969803e-06, + "loss": 0.3621, + "num_input_tokens_seen": 15768384, + "step": 5005 + }, + { + "epoch": 0.32072210485884384, + "grad_norm": 24.331857681274414, + "learning_rate": 1.7178478627789299e-06, + "loss": 0.3269, + "num_input_tokens_seen": 15784448, + "step": 5010 + }, + { + "epoch": 0.3210421867998208, + "grad_norm": 25.280595779418945, + "learning_rate": 1.7170695030193944e-06, + "loss": 0.4088, + "num_input_tokens_seen": 15800512, + "step": 5015 + }, + { + "epoch": 0.32136226874079765, + "grad_norm": 29.914012908935547, + "learning_rate": 1.716290247990165e-06, + "loss": 0.4744, + "num_input_tokens_seen": 15815680, + "step": 5020 + }, + { + "epoch": 0.32168235068177453, + "grad_norm": 33.56769561767578, + "learning_rate": 1.715510098664151e-06, + "loss": 0.3939, + "num_input_tokens_seen": 15830528, + "step": 5025 + }, + { + "epoch": 0.3220024326227514, + "grad_norm": 29.231985092163086, + "learning_rate": 1.7147290560153777e-06, + "loss": 0.4933, + "num_input_tokens_seen": 15845568, + "step": 5030 + }, + { + "epoch": 0.3223225145637283, + "grad_norm": 39.174617767333984, + "learning_rate": 1.7139471210189862e-06, + "loss": 0.4531, + "num_input_tokens_seen": 15861632, + "step": 5035 + }, + { + "epoch": 0.3226425965047052, + "grad_norm": 31.1746826171875, + "learning_rate": 1.7131642946512312e-06, + "loss": 0.5187, + "num_input_tokens_seen": 15877632, + "step": 5040 + }, + { + "epoch": 0.3229626784456821, + "grad_norm": 19.761302947998047, + "learning_rate": 1.712380577889481e-06, + "loss": 0.37, + "num_input_tokens_seen": 15893184, + "step": 5045 + }, + { + "epoch": 0.323282760386659, + "grad_norm": 34.54355239868164, + "learning_rate": 1.711595971712215e-06, + "loss": 0.3955, + "num_input_tokens_seen": 15908416, + "step": 5050 + }, + { + "epoch": 0.32360284232763586, + "grad_norm": 25.96015739440918, + "learning_rate": 1.7108104770990234e-06, + "loss": 0.4074, + "num_input_tokens_seen": 15924224, + "step": 5055 + }, + { + "epoch": 0.32392292426861274, + "grad_norm": 22.604724884033203, + "learning_rate": 1.7100240950306052e-06, + "loss": 0.2532, + "num_input_tokens_seen": 15940032, + "step": 5060 + }, + { + "epoch": 0.3242430062095897, + "grad_norm": 38.15263366699219, + "learning_rate": 1.7092368264887677e-06, + "loss": 0.4556, + "num_input_tokens_seen": 15954944, + "step": 5065 + }, + { + "epoch": 0.32456308815056656, + "grad_norm": 57.1259765625, + "learning_rate": 1.7084486724564252e-06, + "loss": 0.4923, + "num_input_tokens_seen": 15970624, + "step": 5070 + }, + { + "epoch": 0.32488317009154344, + "grad_norm": 33.16521072387695, + "learning_rate": 1.707659633917597e-06, + "loss": 0.418, + "num_input_tokens_seen": 15986688, + "step": 5075 + }, + { + "epoch": 0.3252032520325203, + "grad_norm": 35.50617980957031, + "learning_rate": 1.7068697118574064e-06, + "loss": 0.4172, + "num_input_tokens_seen": 16002752, + "step": 5080 + }, + { + "epoch": 0.3255233339734972, + "grad_norm": 23.2056884765625, + "learning_rate": 1.7060789072620816e-06, + "loss": 0.4924, + "num_input_tokens_seen": 16018112, + "step": 5085 + }, + { + "epoch": 0.32584341591447413, + "grad_norm": 23.894432067871094, + "learning_rate": 1.7052872211189509e-06, + "loss": 0.411, + "num_input_tokens_seen": 16033984, + "step": 5090 + }, + { + "epoch": 0.326163497855451, + "grad_norm": 21.645387649536133, + "learning_rate": 1.7044946544164431e-06, + "loss": 0.3263, + "num_input_tokens_seen": 16049536, + "step": 5095 + }, + { + "epoch": 0.3264835797964279, + "grad_norm": 32.932411193847656, + "learning_rate": 1.703701208144088e-06, + "loss": 0.3722, + "num_input_tokens_seen": 16066304, + "step": 5100 + }, + { + "epoch": 0.32680366173740477, + "grad_norm": 42.86146926879883, + "learning_rate": 1.702906883292512e-06, + "loss": 0.4627, + "num_input_tokens_seen": 16081536, + "step": 5105 + }, + { + "epoch": 0.32712374367838165, + "grad_norm": 25.875411987304688, + "learning_rate": 1.7021116808534393e-06, + "loss": 0.5501, + "num_input_tokens_seen": 16096896, + "step": 5110 + }, + { + "epoch": 0.32744382561935853, + "grad_norm": 47.58795166015625, + "learning_rate": 1.7013156018196893e-06, + "loss": 0.4294, + "num_input_tokens_seen": 16112960, + "step": 5115 + }, + { + "epoch": 0.32776390756033547, + "grad_norm": 34.665802001953125, + "learning_rate": 1.7005186471851759e-06, + "loss": 0.4168, + "num_input_tokens_seen": 16129344, + "step": 5120 + }, + { + "epoch": 0.32808398950131235, + "grad_norm": 23.344072341918945, + "learning_rate": 1.6997208179449066e-06, + "loss": 0.5931, + "num_input_tokens_seen": 16147776, + "step": 5125 + }, + { + "epoch": 0.3284040714422892, + "grad_norm": 43.283119201660156, + "learning_rate": 1.6989221150949806e-06, + "loss": 0.3523, + "num_input_tokens_seen": 16162880, + "step": 5130 + }, + { + "epoch": 0.3287241533832661, + "grad_norm": 17.569599151611328, + "learning_rate": 1.6981225396325873e-06, + "loss": 0.2737, + "num_input_tokens_seen": 16179392, + "step": 5135 + }, + { + "epoch": 0.329044235324243, + "grad_norm": 38.69865036010742, + "learning_rate": 1.6973220925560067e-06, + "loss": 0.5036, + "num_input_tokens_seen": 16194560, + "step": 5140 + }, + { + "epoch": 0.3293643172652199, + "grad_norm": 55.1820182800293, + "learning_rate": 1.696520774864606e-06, + "loss": 0.4281, + "num_input_tokens_seen": 16210112, + "step": 5145 + }, + { + "epoch": 0.3296843992061968, + "grad_norm": 68.6947250366211, + "learning_rate": 1.69571858755884e-06, + "loss": 0.4646, + "num_input_tokens_seen": 16225856, + "step": 5150 + }, + { + "epoch": 0.3300044811471737, + "grad_norm": 25.549705505371094, + "learning_rate": 1.6949155316402487e-06, + "loss": 0.4177, + "num_input_tokens_seen": 16241536, + "step": 5155 + }, + { + "epoch": 0.33032456308815056, + "grad_norm": 31.668855667114258, + "learning_rate": 1.6941116081114566e-06, + "loss": 0.3777, + "num_input_tokens_seen": 16256384, + "step": 5160 + }, + { + "epoch": 0.33064464502912744, + "grad_norm": 34.3087158203125, + "learning_rate": 1.6933068179761722e-06, + "loss": 0.3937, + "num_input_tokens_seen": 16271360, + "step": 5165 + }, + { + "epoch": 0.3309647269701044, + "grad_norm": 26.086729049682617, + "learning_rate": 1.6925011622391857e-06, + "loss": 0.4118, + "num_input_tokens_seen": 16286656, + "step": 5170 + }, + { + "epoch": 0.33128480891108125, + "grad_norm": 18.95518684387207, + "learning_rate": 1.6916946419063667e-06, + "loss": 0.4038, + "num_input_tokens_seen": 16302592, + "step": 5175 + }, + { + "epoch": 0.33160489085205813, + "grad_norm": 25.953067779541016, + "learning_rate": 1.690887257984666e-06, + "loss": 0.5252, + "num_input_tokens_seen": 16318656, + "step": 5180 + }, + { + "epoch": 0.331924972793035, + "grad_norm": 26.030420303344727, + "learning_rate": 1.690079011482112e-06, + "loss": 0.4784, + "num_input_tokens_seen": 16334016, + "step": 5185 + }, + { + "epoch": 0.3322450547340119, + "grad_norm": 44.0208625793457, + "learning_rate": 1.6892699034078096e-06, + "loss": 0.5322, + "num_input_tokens_seen": 16349888, + "step": 5190 + }, + { + "epoch": 0.33256513667498877, + "grad_norm": 40.064537048339844, + "learning_rate": 1.68845993477194e-06, + "loss": 0.5017, + "num_input_tokens_seen": 16365056, + "step": 5195 + }, + { + "epoch": 0.3328852186159657, + "grad_norm": 27.49654197692871, + "learning_rate": 1.6876491065857584e-06, + "loss": 0.3857, + "num_input_tokens_seen": 16380032, + "step": 5200 + }, + { + "epoch": 0.3332053005569426, + "grad_norm": 31.578556060791016, + "learning_rate": 1.6868374198615928e-06, + "loss": 0.6437, + "num_input_tokens_seen": 16394752, + "step": 5205 + }, + { + "epoch": 0.33352538249791946, + "grad_norm": 19.591115951538086, + "learning_rate": 1.6860248756128448e-06, + "loss": 0.4782, + "num_input_tokens_seen": 16410368, + "step": 5210 + }, + { + "epoch": 0.33384546443889634, + "grad_norm": 22.99208641052246, + "learning_rate": 1.6852114748539844e-06, + "loss": 0.3992, + "num_input_tokens_seen": 16425088, + "step": 5215 + }, + { + "epoch": 0.3341655463798732, + "grad_norm": 22.972055435180664, + "learning_rate": 1.6843972186005525e-06, + "loss": 0.3352, + "num_input_tokens_seen": 16441152, + "step": 5220 + }, + { + "epoch": 0.33448562832085016, + "grad_norm": 34.798065185546875, + "learning_rate": 1.6835821078691577e-06, + "loss": 0.4641, + "num_input_tokens_seen": 16458240, + "step": 5225 + }, + { + "epoch": 0.33480571026182704, + "grad_norm": 35.769901275634766, + "learning_rate": 1.6827661436774746e-06, + "loss": 0.4142, + "num_input_tokens_seen": 16474112, + "step": 5230 + }, + { + "epoch": 0.3351257922028039, + "grad_norm": 43.8751335144043, + "learning_rate": 1.681949327044245e-06, + "loss": 0.3955, + "num_input_tokens_seen": 16490560, + "step": 5235 + }, + { + "epoch": 0.3354458741437808, + "grad_norm": 67.51107025146484, + "learning_rate": 1.6811316589892734e-06, + "loss": 0.6757, + "num_input_tokens_seen": 16505728, + "step": 5240 + }, + { + "epoch": 0.3357659560847577, + "grad_norm": 21.818950653076172, + "learning_rate": 1.6803131405334284e-06, + "loss": 0.4257, + "num_input_tokens_seen": 16521856, + "step": 5245 + }, + { + "epoch": 0.3360860380257346, + "grad_norm": 30.710657119750977, + "learning_rate": 1.6794937726986396e-06, + "loss": 0.4271, + "num_input_tokens_seen": 16537792, + "step": 5250 + }, + { + "epoch": 0.3364061199667115, + "grad_norm": 42.02250671386719, + "learning_rate": 1.6786735565078974e-06, + "loss": 0.434, + "num_input_tokens_seen": 16553408, + "step": 5255 + }, + { + "epoch": 0.33672620190768837, + "grad_norm": 28.501094818115234, + "learning_rate": 1.677852492985251e-06, + "loss": 0.4297, + "num_input_tokens_seen": 16570112, + "step": 5260 + }, + { + "epoch": 0.33704628384866525, + "grad_norm": 56.61883544921875, + "learning_rate": 1.6770305831558086e-06, + "loss": 0.4931, + "num_input_tokens_seen": 16586304, + "step": 5265 + }, + { + "epoch": 0.33736636578964213, + "grad_norm": 15.158733367919922, + "learning_rate": 1.6762078280457342e-06, + "loss": 0.3922, + "num_input_tokens_seen": 16601920, + "step": 5270 + }, + { + "epoch": 0.33768644773061907, + "grad_norm": 27.923097610473633, + "learning_rate": 1.6753842286822465e-06, + "loss": 0.4797, + "num_input_tokens_seen": 16618240, + "step": 5275 + }, + { + "epoch": 0.33800652967159595, + "grad_norm": 36.38385009765625, + "learning_rate": 1.6745597860936199e-06, + "loss": 0.59, + "num_input_tokens_seen": 16633408, + "step": 5280 + }, + { + "epoch": 0.3383266116125728, + "grad_norm": 38.012123107910156, + "learning_rate": 1.6737345013091794e-06, + "loss": 0.439, + "num_input_tokens_seen": 16649664, + "step": 5285 + }, + { + "epoch": 0.3386466935535497, + "grad_norm": 39.11860656738281, + "learning_rate": 1.672908375359304e-06, + "loss": 0.4602, + "num_input_tokens_seen": 16664896, + "step": 5290 + }, + { + "epoch": 0.3389667754945266, + "grad_norm": 56.845096588134766, + "learning_rate": 1.6720814092754209e-06, + "loss": 0.5433, + "num_input_tokens_seen": 16680384, + "step": 5295 + }, + { + "epoch": 0.33928685743550346, + "grad_norm": 20.308507919311523, + "learning_rate": 1.6712536040900075e-06, + "loss": 0.3696, + "num_input_tokens_seen": 16696192, + "step": 5300 + }, + { + "epoch": 0.3396069393764804, + "grad_norm": 26.112041473388672, + "learning_rate": 1.6704249608365878e-06, + "loss": 0.4752, + "num_input_tokens_seen": 16727104, + "step": 5305 + }, + { + "epoch": 0.3399270213174573, + "grad_norm": 27.13048553466797, + "learning_rate": 1.669595480549733e-06, + "loss": 0.4154, + "num_input_tokens_seen": 16741696, + "step": 5310 + }, + { + "epoch": 0.34024710325843416, + "grad_norm": 40.439273834228516, + "learning_rate": 1.6687651642650587e-06, + "loss": 0.432, + "num_input_tokens_seen": 16757120, + "step": 5315 + }, + { + "epoch": 0.34056718519941104, + "grad_norm": 27.309789657592773, + "learning_rate": 1.6679340130192245e-06, + "loss": 0.4471, + "num_input_tokens_seen": 16772416, + "step": 5320 + }, + { + "epoch": 0.3408872671403879, + "grad_norm": 24.121200561523438, + "learning_rate": 1.667102027849933e-06, + "loss": 0.3172, + "num_input_tokens_seen": 16788352, + "step": 5325 + }, + { + "epoch": 0.34120734908136485, + "grad_norm": 36.701873779296875, + "learning_rate": 1.6662692097959266e-06, + "loss": 0.3456, + "num_input_tokens_seen": 16803648, + "step": 5330 + }, + { + "epoch": 0.34152743102234173, + "grad_norm": 52.13604736328125, + "learning_rate": 1.6654355598969894e-06, + "loss": 0.4708, + "num_input_tokens_seen": 16818944, + "step": 5335 + }, + { + "epoch": 0.3418475129633186, + "grad_norm": 31.60714340209961, + "learning_rate": 1.6646010791939423e-06, + "loss": 0.5078, + "num_input_tokens_seen": 16833984, + "step": 5340 + }, + { + "epoch": 0.3421675949042955, + "grad_norm": 30.880844116210938, + "learning_rate": 1.6637657687286446e-06, + "loss": 0.5507, + "num_input_tokens_seen": 16849280, + "step": 5345 + }, + { + "epoch": 0.34248767684527237, + "grad_norm": 29.642696380615234, + "learning_rate": 1.6629296295439912e-06, + "loss": 0.3979, + "num_input_tokens_seen": 16865664, + "step": 5350 + }, + { + "epoch": 0.3428077587862493, + "grad_norm": 46.237457275390625, + "learning_rate": 1.6620926626839116e-06, + "loss": 0.4884, + "num_input_tokens_seen": 16881536, + "step": 5355 + }, + { + "epoch": 0.3431278407272262, + "grad_norm": 26.425844192504883, + "learning_rate": 1.661254869193369e-06, + "loss": 0.4395, + "num_input_tokens_seen": 16898816, + "step": 5360 + }, + { + "epoch": 0.34344792266820307, + "grad_norm": 44.35171127319336, + "learning_rate": 1.6604162501183581e-06, + "loss": 0.5104, + "num_input_tokens_seen": 16915136, + "step": 5365 + }, + { + "epoch": 0.34376800460917994, + "grad_norm": 29.71055793762207, + "learning_rate": 1.6595768065059045e-06, + "loss": 0.4607, + "num_input_tokens_seen": 16931200, + "step": 5370 + }, + { + "epoch": 0.3440880865501568, + "grad_norm": 26.671714782714844, + "learning_rate": 1.6587365394040641e-06, + "loss": 0.4652, + "num_input_tokens_seen": 16946816, + "step": 5375 + }, + { + "epoch": 0.3444081684911337, + "grad_norm": 28.532976150512695, + "learning_rate": 1.6578954498619195e-06, + "loss": 0.3893, + "num_input_tokens_seen": 16962880, + "step": 5380 + }, + { + "epoch": 0.34472825043211064, + "grad_norm": 31.44209861755371, + "learning_rate": 1.6570535389295814e-06, + "loss": 0.4587, + "num_input_tokens_seen": 16978240, + "step": 5385 + }, + { + "epoch": 0.3450483323730875, + "grad_norm": 22.520421981811523, + "learning_rate": 1.6562108076581853e-06, + "loss": 0.3628, + "num_input_tokens_seen": 16993728, + "step": 5390 + }, + { + "epoch": 0.3453684143140644, + "grad_norm": 37.299156188964844, + "learning_rate": 1.6553672570998912e-06, + "loss": 0.5903, + "num_input_tokens_seen": 17009728, + "step": 5395 + }, + { + "epoch": 0.3456884962550413, + "grad_norm": 38.635986328125, + "learning_rate": 1.6545228883078815e-06, + "loss": 0.4174, + "num_input_tokens_seen": 17024640, + "step": 5400 + }, + { + "epoch": 0.34600857819601816, + "grad_norm": 37.52071762084961, + "learning_rate": 1.653677702336361e-06, + "loss": 0.3541, + "num_input_tokens_seen": 17040512, + "step": 5405 + }, + { + "epoch": 0.3463286601369951, + "grad_norm": 19.03274917602539, + "learning_rate": 1.6528317002405538e-06, + "loss": 0.4657, + "num_input_tokens_seen": 17056064, + "step": 5410 + }, + { + "epoch": 0.34664874207797197, + "grad_norm": 28.59636878967285, + "learning_rate": 1.6519848830767043e-06, + "loss": 0.3692, + "num_input_tokens_seen": 17072448, + "step": 5415 + }, + { + "epoch": 0.34696882401894885, + "grad_norm": 38.893310546875, + "learning_rate": 1.6511372519020726e-06, + "loss": 0.6197, + "num_input_tokens_seen": 17088320, + "step": 5420 + }, + { + "epoch": 0.34728890595992573, + "grad_norm": 39.06748962402344, + "learning_rate": 1.650288807774937e-06, + "loss": 0.4291, + "num_input_tokens_seen": 17104448, + "step": 5425 + }, + { + "epoch": 0.3476089879009026, + "grad_norm": 36.80699920654297, + "learning_rate": 1.6494395517545893e-06, + "loss": 0.3964, + "num_input_tokens_seen": 17121856, + "step": 5430 + }, + { + "epoch": 0.34792906984187955, + "grad_norm": 47.49158477783203, + "learning_rate": 1.6485894849013362e-06, + "loss": 0.5052, + "num_input_tokens_seen": 17136512, + "step": 5435 + }, + { + "epoch": 0.3482491517828564, + "grad_norm": 26.2275333404541, + "learning_rate": 1.6477386082764961e-06, + "loss": 0.443, + "num_input_tokens_seen": 17152640, + "step": 5440 + }, + { + "epoch": 0.3485692337238333, + "grad_norm": 25.935453414916992, + "learning_rate": 1.6468869229423983e-06, + "loss": 0.362, + "num_input_tokens_seen": 17167680, + "step": 5445 + }, + { + "epoch": 0.3488893156648102, + "grad_norm": 57.09697341918945, + "learning_rate": 1.6460344299623813e-06, + "loss": 0.6295, + "num_input_tokens_seen": 17183296, + "step": 5450 + }, + { + "epoch": 0.34920939760578706, + "grad_norm": 62.791343688964844, + "learning_rate": 1.6451811304007939e-06, + "loss": 0.5424, + "num_input_tokens_seen": 17198272, + "step": 5455 + }, + { + "epoch": 0.349529479546764, + "grad_norm": 46.02850341796875, + "learning_rate": 1.6443270253229895e-06, + "loss": 0.5177, + "num_input_tokens_seen": 17213376, + "step": 5460 + }, + { + "epoch": 0.3498495614877409, + "grad_norm": 39.094146728515625, + "learning_rate": 1.6434721157953288e-06, + "loss": 0.4657, + "num_input_tokens_seen": 17229632, + "step": 5465 + }, + { + "epoch": 0.35016964342871776, + "grad_norm": 35.04682540893555, + "learning_rate": 1.6426164028851765e-06, + "loss": 0.579, + "num_input_tokens_seen": 17245696, + "step": 5470 + }, + { + "epoch": 0.3504257089814993, + "eval_loss": 0.43906036019325256, + "eval_runtime": 49.1679, + "eval_samples_per_second": 282.42, + "eval_steps_per_second": 35.308, + "num_input_tokens_seen": 17259840, + "step": 5474 + }, + { + "epoch": 0.35048972536969464, + "grad_norm": 26.412445068359375, + "learning_rate": 1.6417598876609002e-06, + "loss": 0.3787, + "num_input_tokens_seen": 17262976, + "step": 5475 + }, + { + "epoch": 0.3508098073106715, + "grad_norm": 36.70389175415039, + "learning_rate": 1.640902571191869e-06, + "loss": 0.419, + "num_input_tokens_seen": 17278336, + "step": 5480 + }, + { + "epoch": 0.3511298892516484, + "grad_norm": 41.35291290283203, + "learning_rate": 1.6400444545484524e-06, + "loss": 0.3535, + "num_input_tokens_seen": 17293248, + "step": 5485 + }, + { + "epoch": 0.35144997119262533, + "grad_norm": 19.715316772460938, + "learning_rate": 1.6391855388020193e-06, + "loss": 0.4275, + "num_input_tokens_seen": 17309184, + "step": 5490 + }, + { + "epoch": 0.3517700531336022, + "grad_norm": 32.778873443603516, + "learning_rate": 1.6383258250249363e-06, + "loss": 0.4436, + "num_input_tokens_seen": 17325248, + "step": 5495 + }, + { + "epoch": 0.3520901350745791, + "grad_norm": 19.160093307495117, + "learning_rate": 1.6374653142905661e-06, + "loss": 0.4226, + "num_input_tokens_seen": 17340736, + "step": 5500 + }, + { + "epoch": 0.35241021701555597, + "grad_norm": 35.900447845458984, + "learning_rate": 1.6366040076732662e-06, + "loss": 0.4188, + "num_input_tokens_seen": 17355904, + "step": 5505 + }, + { + "epoch": 0.35273029895653285, + "grad_norm": 28.459196090698242, + "learning_rate": 1.6357419062483882e-06, + "loss": 0.4712, + "num_input_tokens_seen": 17371264, + "step": 5510 + }, + { + "epoch": 0.3530503808975098, + "grad_norm": 24.3746337890625, + "learning_rate": 1.6348790110922758e-06, + "loss": 0.4168, + "num_input_tokens_seen": 17388608, + "step": 5515 + }, + { + "epoch": 0.35337046283848667, + "grad_norm": 30.681352615356445, + "learning_rate": 1.6340153232822635e-06, + "loss": 0.4668, + "num_input_tokens_seen": 17403712, + "step": 5520 + }, + { + "epoch": 0.35369054477946354, + "grad_norm": 40.181785583496094, + "learning_rate": 1.633150843896676e-06, + "loss": 0.4809, + "num_input_tokens_seen": 17421056, + "step": 5525 + }, + { + "epoch": 0.3540106267204404, + "grad_norm": 58.2733154296875, + "learning_rate": 1.6322855740148263e-06, + "loss": 0.5588, + "num_input_tokens_seen": 17436096, + "step": 5530 + }, + { + "epoch": 0.3543307086614173, + "grad_norm": 24.002464294433594, + "learning_rate": 1.6314195147170132e-06, + "loss": 0.3701, + "num_input_tokens_seen": 17452480, + "step": 5535 + }, + { + "epoch": 0.35465079060239424, + "grad_norm": 28.335710525512695, + "learning_rate": 1.6305526670845225e-06, + "loss": 0.4038, + "num_input_tokens_seen": 17467776, + "step": 5540 + }, + { + "epoch": 0.3549708725433711, + "grad_norm": 46.305484771728516, + "learning_rate": 1.6296850321996232e-06, + "loss": 0.5081, + "num_input_tokens_seen": 17482752, + "step": 5545 + }, + { + "epoch": 0.355290954484348, + "grad_norm": 31.239910125732422, + "learning_rate": 1.6288166111455683e-06, + "loss": 0.3885, + "num_input_tokens_seen": 17497792, + "step": 5550 + }, + { + "epoch": 0.3556110364253249, + "grad_norm": 21.766979217529297, + "learning_rate": 1.6279474050065906e-06, + "loss": 0.4774, + "num_input_tokens_seen": 17513024, + "step": 5555 + }, + { + "epoch": 0.35593111836630176, + "grad_norm": 28.28034210205078, + "learning_rate": 1.6270774148679054e-06, + "loss": 0.4143, + "num_input_tokens_seen": 17529024, + "step": 5560 + }, + { + "epoch": 0.35625120030727864, + "grad_norm": 15.855846405029297, + "learning_rate": 1.6262066418157048e-06, + "loss": 0.3764, + "num_input_tokens_seen": 17543936, + "step": 5565 + }, + { + "epoch": 0.35657128224825557, + "grad_norm": 52.373390197753906, + "learning_rate": 1.6253350869371595e-06, + "loss": 0.5374, + "num_input_tokens_seen": 17559168, + "step": 5570 + }, + { + "epoch": 0.35689136418923245, + "grad_norm": 32.6270751953125, + "learning_rate": 1.6244627513204158e-06, + "loss": 0.3828, + "num_input_tokens_seen": 17574912, + "step": 5575 + }, + { + "epoch": 0.35721144613020933, + "grad_norm": 24.754146575927734, + "learning_rate": 1.6235896360545954e-06, + "loss": 0.4239, + "num_input_tokens_seen": 17590272, + "step": 5580 + }, + { + "epoch": 0.3575315280711862, + "grad_norm": 40.839786529541016, + "learning_rate": 1.622715742229792e-06, + "loss": 0.4379, + "num_input_tokens_seen": 17605952, + "step": 5585 + }, + { + "epoch": 0.3578516100121631, + "grad_norm": 21.1004638671875, + "learning_rate": 1.6218410709370734e-06, + "loss": 0.3813, + "num_input_tokens_seen": 17621120, + "step": 5590 + }, + { + "epoch": 0.35817169195314, + "grad_norm": 40.48637008666992, + "learning_rate": 1.6209656232684768e-06, + "loss": 0.5629, + "num_input_tokens_seen": 17636096, + "step": 5595 + }, + { + "epoch": 0.3584917738941169, + "grad_norm": 86.99573516845703, + "learning_rate": 1.620089400317008e-06, + "loss": 0.4427, + "num_input_tokens_seen": 17652672, + "step": 5600 + }, + { + "epoch": 0.3588118558350938, + "grad_norm": 33.9478645324707, + "learning_rate": 1.6192124031766425e-06, + "loss": 0.4875, + "num_input_tokens_seen": 17668032, + "step": 5605 + }, + { + "epoch": 0.35913193777607066, + "grad_norm": 28.759950637817383, + "learning_rate": 1.6183346329423213e-06, + "loss": 0.4474, + "num_input_tokens_seen": 17683264, + "step": 5610 + }, + { + "epoch": 0.35945201971704754, + "grad_norm": 49.65534210205078, + "learning_rate": 1.6174560907099508e-06, + "loss": 0.3642, + "num_input_tokens_seen": 17699200, + "step": 5615 + }, + { + "epoch": 0.3597721016580245, + "grad_norm": 21.184310913085938, + "learning_rate": 1.6165767775764013e-06, + "loss": 0.3489, + "num_input_tokens_seen": 17714816, + "step": 5620 + }, + { + "epoch": 0.36009218359900136, + "grad_norm": 36.253963470458984, + "learning_rate": 1.6156966946395056e-06, + "loss": 0.411, + "num_input_tokens_seen": 17732352, + "step": 5625 + }, + { + "epoch": 0.36041226553997824, + "grad_norm": 52.9035758972168, + "learning_rate": 1.6148158429980577e-06, + "loss": 0.5376, + "num_input_tokens_seen": 17748288, + "step": 5630 + }, + { + "epoch": 0.3607323474809551, + "grad_norm": 40.94856262207031, + "learning_rate": 1.6139342237518108e-06, + "loss": 0.3839, + "num_input_tokens_seen": 17763520, + "step": 5635 + }, + { + "epoch": 0.361052429421932, + "grad_norm": 33.37528610229492, + "learning_rate": 1.6130518380014773e-06, + "loss": 0.428, + "num_input_tokens_seen": 17779328, + "step": 5640 + }, + { + "epoch": 0.3613725113629089, + "grad_norm": 38.7974853515625, + "learning_rate": 1.6121686868487259e-06, + "loss": 0.4178, + "num_input_tokens_seen": 17795584, + "step": 5645 + }, + { + "epoch": 0.3616925933038858, + "grad_norm": 16.909976959228516, + "learning_rate": 1.6112847713961815e-06, + "loss": 0.44, + "num_input_tokens_seen": 17810368, + "step": 5650 + }, + { + "epoch": 0.3620126752448627, + "grad_norm": 27.985116958618164, + "learning_rate": 1.610400092747423e-06, + "loss": 0.4283, + "num_input_tokens_seen": 17826496, + "step": 5655 + }, + { + "epoch": 0.36233275718583957, + "grad_norm": 30.853046417236328, + "learning_rate": 1.609514652006981e-06, + "loss": 0.4191, + "num_input_tokens_seen": 17841344, + "step": 5660 + }, + { + "epoch": 0.36265283912681645, + "grad_norm": 31.243133544921875, + "learning_rate": 1.60862845028034e-06, + "loss": 0.5596, + "num_input_tokens_seen": 17857408, + "step": 5665 + }, + { + "epoch": 0.36297292106779333, + "grad_norm": 24.529314041137695, + "learning_rate": 1.6077414886739327e-06, + "loss": 0.4256, + "num_input_tokens_seen": 17873280, + "step": 5670 + }, + { + "epoch": 0.36329300300877027, + "grad_norm": 20.652950286865234, + "learning_rate": 1.6068537682951412e-06, + "loss": 0.4936, + "num_input_tokens_seen": 17888448, + "step": 5675 + }, + { + "epoch": 0.36361308494974715, + "grad_norm": 28.207895278930664, + "learning_rate": 1.6059652902522947e-06, + "loss": 0.4402, + "num_input_tokens_seen": 17904320, + "step": 5680 + }, + { + "epoch": 0.363933166890724, + "grad_norm": 51.1041145324707, + "learning_rate": 1.6050760556546683e-06, + "loss": 0.3667, + "num_input_tokens_seen": 17919744, + "step": 5685 + }, + { + "epoch": 0.3642532488317009, + "grad_norm": 26.759593963623047, + "learning_rate": 1.6041860656124823e-06, + "loss": 0.3814, + "num_input_tokens_seen": 17934656, + "step": 5690 + }, + { + "epoch": 0.3645733307726778, + "grad_norm": 39.42972946166992, + "learning_rate": 1.6032953212368993e-06, + "loss": 0.5375, + "num_input_tokens_seen": 17950976, + "step": 5695 + }, + { + "epoch": 0.3648934127136547, + "grad_norm": 22.8485164642334, + "learning_rate": 1.6024038236400243e-06, + "loss": 0.4688, + "num_input_tokens_seen": 17966400, + "step": 5700 + }, + { + "epoch": 0.3652134946546316, + "grad_norm": 97.59317016601562, + "learning_rate": 1.6015115739349027e-06, + "loss": 0.5649, + "num_input_tokens_seen": 17983872, + "step": 5705 + }, + { + "epoch": 0.3655335765956085, + "grad_norm": 33.57761764526367, + "learning_rate": 1.6006185732355183e-06, + "loss": 0.5461, + "num_input_tokens_seen": 17999680, + "step": 5710 + }, + { + "epoch": 0.36585365853658536, + "grad_norm": 21.023252487182617, + "learning_rate": 1.5997248226567931e-06, + "loss": 0.3802, + "num_input_tokens_seen": 18014784, + "step": 5715 + }, + { + "epoch": 0.36617374047756224, + "grad_norm": 22.69112205505371, + "learning_rate": 1.5988303233145853e-06, + "loss": 0.4997, + "num_input_tokens_seen": 18029888, + "step": 5720 + }, + { + "epoch": 0.3664938224185392, + "grad_norm": 29.783832550048828, + "learning_rate": 1.597935076325688e-06, + "loss": 0.3877, + "num_input_tokens_seen": 18045632, + "step": 5725 + }, + { + "epoch": 0.36681390435951605, + "grad_norm": 41.83056640625, + "learning_rate": 1.5970390828078272e-06, + "loss": 0.5839, + "num_input_tokens_seen": 18060928, + "step": 5730 + }, + { + "epoch": 0.36713398630049293, + "grad_norm": 16.932323455810547, + "learning_rate": 1.5961423438796615e-06, + "loss": 0.4567, + "num_input_tokens_seen": 18076352, + "step": 5735 + }, + { + "epoch": 0.3674540682414698, + "grad_norm": 43.994022369384766, + "learning_rate": 1.59524486066078e-06, + "loss": 0.4411, + "num_input_tokens_seen": 18092096, + "step": 5740 + }, + { + "epoch": 0.3677741501824467, + "grad_norm": 29.11937141418457, + "learning_rate": 1.5943466342717012e-06, + "loss": 0.5834, + "num_input_tokens_seen": 18107648, + "step": 5745 + }, + { + "epoch": 0.36809423212342357, + "grad_norm": 26.03652572631836, + "learning_rate": 1.5934476658338708e-06, + "loss": 0.4433, + "num_input_tokens_seen": 18123264, + "step": 5750 + }, + { + "epoch": 0.3684143140644005, + "grad_norm": 25.282079696655273, + "learning_rate": 1.5925479564696619e-06, + "loss": 0.5414, + "num_input_tokens_seen": 18138368, + "step": 5755 + }, + { + "epoch": 0.3687343960053774, + "grad_norm": 11.744181632995605, + "learning_rate": 1.5916475073023721e-06, + "loss": 0.3336, + "num_input_tokens_seen": 18154432, + "step": 5760 + }, + { + "epoch": 0.36905447794635426, + "grad_norm": 50.17704391479492, + "learning_rate": 1.5907463194562226e-06, + "loss": 0.3355, + "num_input_tokens_seen": 18171200, + "step": 5765 + }, + { + "epoch": 0.36937455988733114, + "grad_norm": 24.319721221923828, + "learning_rate": 1.589844394056357e-06, + "loss": 0.3807, + "num_input_tokens_seen": 18187008, + "step": 5770 + }, + { + "epoch": 0.369694641828308, + "grad_norm": 48.6660270690918, + "learning_rate": 1.5889417322288403e-06, + "loss": 0.3492, + "num_input_tokens_seen": 18202944, + "step": 5775 + }, + { + "epoch": 0.37001472376928496, + "grad_norm": 86.95288848876953, + "learning_rate": 1.5880383351006556e-06, + "loss": 0.4969, + "num_input_tokens_seen": 18217984, + "step": 5780 + }, + { + "epoch": 0.37033480571026184, + "grad_norm": 30.690433502197266, + "learning_rate": 1.5871342037997055e-06, + "loss": 0.505, + "num_input_tokens_seen": 18233984, + "step": 5785 + }, + { + "epoch": 0.3706548876512387, + "grad_norm": 43.78403091430664, + "learning_rate": 1.5862293394548082e-06, + "loss": 0.403, + "num_input_tokens_seen": 18249024, + "step": 5790 + }, + { + "epoch": 0.3709749695922156, + "grad_norm": 73.22137451171875, + "learning_rate": 1.5853237431956972e-06, + "loss": 0.3414, + "num_input_tokens_seen": 18264256, + "step": 5795 + }, + { + "epoch": 0.3712950515331925, + "grad_norm": 40.81637954711914, + "learning_rate": 1.5844174161530206e-06, + "loss": 0.5495, + "num_input_tokens_seen": 18279936, + "step": 5800 + }, + { + "epoch": 0.3716151334741694, + "grad_norm": 24.28744888305664, + "learning_rate": 1.5835103594583382e-06, + "loss": 0.4039, + "num_input_tokens_seen": 18295488, + "step": 5805 + }, + { + "epoch": 0.3719352154151463, + "grad_norm": 25.278915405273438, + "learning_rate": 1.5826025742441207e-06, + "loss": 0.5329, + "num_input_tokens_seen": 18311360, + "step": 5810 + }, + { + "epoch": 0.37225529735612317, + "grad_norm": 25.298076629638672, + "learning_rate": 1.5816940616437486e-06, + "loss": 0.4284, + "num_input_tokens_seen": 18326592, + "step": 5815 + }, + { + "epoch": 0.37257537929710005, + "grad_norm": 32.25617599487305, + "learning_rate": 1.5807848227915108e-06, + "loss": 0.3573, + "num_input_tokens_seen": 18344000, + "step": 5820 + }, + { + "epoch": 0.37289546123807693, + "grad_norm": 61.83903503417969, + "learning_rate": 1.5798748588226028e-06, + "loss": 0.4787, + "num_input_tokens_seen": 18359872, + "step": 5825 + }, + { + "epoch": 0.3732155431790538, + "grad_norm": 42.77378463745117, + "learning_rate": 1.578964170873125e-06, + "loss": 0.4776, + "num_input_tokens_seen": 18374400, + "step": 5830 + }, + { + "epoch": 0.37353562512003075, + "grad_norm": 19.963783264160156, + "learning_rate": 1.5780527600800816e-06, + "loss": 0.2927, + "num_input_tokens_seen": 18390656, + "step": 5835 + }, + { + "epoch": 0.3738557070610076, + "grad_norm": 63.39997100830078, + "learning_rate": 1.5771406275813808e-06, + "loss": 0.4476, + "num_input_tokens_seen": 18406400, + "step": 5840 + }, + { + "epoch": 0.3741757890019845, + "grad_norm": 51.011985778808594, + "learning_rate": 1.5762277745158297e-06, + "loss": 0.5497, + "num_input_tokens_seen": 18422848, + "step": 5845 + }, + { + "epoch": 0.3744958709429614, + "grad_norm": 70.113525390625, + "learning_rate": 1.5753142020231365e-06, + "loss": 0.4932, + "num_input_tokens_seen": 18438912, + "step": 5850 + }, + { + "epoch": 0.37481595288393826, + "grad_norm": 43.822303771972656, + "learning_rate": 1.5743999112439073e-06, + "loss": 0.525, + "num_input_tokens_seen": 18455488, + "step": 5855 + }, + { + "epoch": 0.3751360348249152, + "grad_norm": 36.65006637573242, + "learning_rate": 1.5734849033196446e-06, + "loss": 0.3954, + "num_input_tokens_seen": 18470080, + "step": 5860 + }, + { + "epoch": 0.3754561167658921, + "grad_norm": 42.59208297729492, + "learning_rate": 1.5725691793927468e-06, + "loss": 0.4337, + "num_input_tokens_seen": 18484480, + "step": 5865 + }, + { + "epoch": 0.37577619870686896, + "grad_norm": 23.022443771362305, + "learning_rate": 1.5716527406065057e-06, + "loss": 0.46, + "num_input_tokens_seen": 18501312, + "step": 5870 + }, + { + "epoch": 0.37609628064784584, + "grad_norm": 25.66585350036621, + "learning_rate": 1.570735588105106e-06, + "loss": 0.449, + "num_input_tokens_seen": 18515968, + "step": 5875 + }, + { + "epoch": 0.3764163625888227, + "grad_norm": 16.919160842895508, + "learning_rate": 1.5698177230336234e-06, + "loss": 0.3901, + "num_input_tokens_seen": 18531200, + "step": 5880 + }, + { + "epoch": 0.37673644452979965, + "grad_norm": 31.569171905517578, + "learning_rate": 1.568899146538023e-06, + "loss": 0.2699, + "num_input_tokens_seen": 18547712, + "step": 5885 + }, + { + "epoch": 0.37705652647077653, + "grad_norm": 28.067827224731445, + "learning_rate": 1.5679798597651587e-06, + "loss": 0.4111, + "num_input_tokens_seen": 18562752, + "step": 5890 + }, + { + "epoch": 0.3773766084117534, + "grad_norm": 40.843196868896484, + "learning_rate": 1.5670598638627706e-06, + "loss": 0.4265, + "num_input_tokens_seen": 18578368, + "step": 5895 + }, + { + "epoch": 0.3776966903527303, + "grad_norm": 40.625885009765625, + "learning_rate": 1.5661391599794847e-06, + "loss": 0.3882, + "num_input_tokens_seen": 18593408, + "step": 5900 + }, + { + "epoch": 0.37801677229370717, + "grad_norm": 28.81650161743164, + "learning_rate": 1.56521774926481e-06, + "loss": 0.4155, + "num_input_tokens_seen": 18607872, + "step": 5905 + }, + { + "epoch": 0.3783368542346841, + "grad_norm": 25.494752883911133, + "learning_rate": 1.5642956328691393e-06, + "loss": 0.359, + "num_input_tokens_seen": 18624000, + "step": 5910 + }, + { + "epoch": 0.378656936175661, + "grad_norm": 52.101295471191406, + "learning_rate": 1.5633728119437451e-06, + "loss": 0.564, + "num_input_tokens_seen": 18640704, + "step": 5915 + }, + { + "epoch": 0.37897701811663786, + "grad_norm": 28.191926956176758, + "learning_rate": 1.5624492876407807e-06, + "loss": 0.4568, + "num_input_tokens_seen": 18658368, + "step": 5920 + }, + { + "epoch": 0.37929710005761474, + "grad_norm": 46.10580825805664, + "learning_rate": 1.5615250611132766e-06, + "loss": 0.4087, + "num_input_tokens_seen": 18675584, + "step": 5925 + }, + { + "epoch": 0.3796171819985916, + "grad_norm": 23.61751365661621, + "learning_rate": 1.5606001335151405e-06, + "loss": 0.5669, + "num_input_tokens_seen": 18691904, + "step": 5930 + }, + { + "epoch": 0.3799372639395685, + "grad_norm": 33.682106018066406, + "learning_rate": 1.5596745060011561e-06, + "loss": 0.3744, + "num_input_tokens_seen": 18708736, + "step": 5935 + }, + { + "epoch": 0.38025734588054544, + "grad_norm": 35.933292388916016, + "learning_rate": 1.5587481797269793e-06, + "loss": 0.3464, + "num_input_tokens_seen": 18724032, + "step": 5940 + }, + { + "epoch": 0.3805774278215223, + "grad_norm": 38.045902252197266, + "learning_rate": 1.5578211558491396e-06, + "loss": 0.4203, + "num_input_tokens_seen": 18740352, + "step": 5945 + }, + { + "epoch": 0.3808975097624992, + "grad_norm": 24.26993751525879, + "learning_rate": 1.5568934355250375e-06, + "loss": 0.3225, + "num_input_tokens_seen": 18754560, + "step": 5950 + }, + { + "epoch": 0.3812175917034761, + "grad_norm": 67.30828094482422, + "learning_rate": 1.5559650199129423e-06, + "loss": 0.6491, + "num_input_tokens_seen": 18769280, + "step": 5955 + }, + { + "epoch": 0.38153767364445296, + "grad_norm": 46.19745635986328, + "learning_rate": 1.5550359101719921e-06, + "loss": 0.4012, + "num_input_tokens_seen": 18784512, + "step": 5960 + }, + { + "epoch": 0.3818577555854299, + "grad_norm": 62.694427490234375, + "learning_rate": 1.554106107462191e-06, + "loss": 0.3561, + "num_input_tokens_seen": 18800384, + "step": 5965 + }, + { + "epoch": 0.38217783752640677, + "grad_norm": 43.13536834716797, + "learning_rate": 1.5531756129444092e-06, + "loss": 0.4248, + "num_input_tokens_seen": 18815552, + "step": 5970 + }, + { + "epoch": 0.38249791946738365, + "grad_norm": 23.844327926635742, + "learning_rate": 1.5522444277803796e-06, + "loss": 0.3884, + "num_input_tokens_seen": 18830080, + "step": 5975 + }, + { + "epoch": 0.38281800140836053, + "grad_norm": 30.173629760742188, + "learning_rate": 1.5513125531326976e-06, + "loss": 0.4319, + "num_input_tokens_seen": 18846272, + "step": 5980 + }, + { + "epoch": 0.3831380833493374, + "grad_norm": 29.421924591064453, + "learning_rate": 1.5503799901648198e-06, + "loss": 0.3747, + "num_input_tokens_seen": 18860928, + "step": 5985 + }, + { + "epoch": 0.38345816529031435, + "grad_norm": 61.6126594543457, + "learning_rate": 1.5494467400410625e-06, + "loss": 0.4553, + "num_input_tokens_seen": 18877120, + "step": 5990 + }, + { + "epoch": 0.3837782472312912, + "grad_norm": 50.92166519165039, + "learning_rate": 1.5485128039265986e-06, + "loss": 0.6017, + "num_input_tokens_seen": 18892224, + "step": 5995 + }, + { + "epoch": 0.3840983291722681, + "grad_norm": 51.964595794677734, + "learning_rate": 1.547578182987459e-06, + "loss": 0.4408, + "num_input_tokens_seen": 18907008, + "step": 6000 + }, + { + "epoch": 0.384418411113245, + "grad_norm": 21.846920013427734, + "learning_rate": 1.5466428783905286e-06, + "loss": 0.2736, + "num_input_tokens_seen": 18922368, + "step": 6005 + }, + { + "epoch": 0.38473849305422186, + "grad_norm": 30.069700241088867, + "learning_rate": 1.5457068913035463e-06, + "loss": 0.4288, + "num_input_tokens_seen": 18937536, + "step": 6010 + }, + { + "epoch": 0.38505857499519874, + "grad_norm": 40.16860580444336, + "learning_rate": 1.544770222895103e-06, + "loss": 0.4784, + "num_input_tokens_seen": 18954048, + "step": 6015 + }, + { + "epoch": 0.3853786569361757, + "grad_norm": 30.41385269165039, + "learning_rate": 1.5438328743346398e-06, + "loss": 0.5188, + "num_input_tokens_seen": 18969472, + "step": 6020 + }, + { + "epoch": 0.38569873887715256, + "grad_norm": 22.75130844116211, + "learning_rate": 1.5428948467924478e-06, + "loss": 0.4098, + "num_input_tokens_seen": 18983872, + "step": 6025 + }, + { + "epoch": 0.38601882081812944, + "grad_norm": 20.55361557006836, + "learning_rate": 1.5419561414396656e-06, + "loss": 0.3223, + "num_input_tokens_seen": 18999360, + "step": 6030 + }, + { + "epoch": 0.3863389027591063, + "grad_norm": 23.010210037231445, + "learning_rate": 1.541016759448277e-06, + "loss": 0.4888, + "num_input_tokens_seen": 19015424, + "step": 6035 + }, + { + "epoch": 0.3866589847000832, + "grad_norm": 30.879016876220703, + "learning_rate": 1.5400767019911124e-06, + "loss": 0.3641, + "num_input_tokens_seen": 19031616, + "step": 6040 + }, + { + "epoch": 0.38697906664106013, + "grad_norm": 31.967321395874023, + "learning_rate": 1.539135970241844e-06, + "loss": 0.4821, + "num_input_tokens_seen": 19047040, + "step": 6045 + }, + { + "epoch": 0.387299148582037, + "grad_norm": 50.726158142089844, + "learning_rate": 1.5381945653749866e-06, + "loss": 0.479, + "num_input_tokens_seen": 19062848, + "step": 6050 + }, + { + "epoch": 0.3876192305230139, + "grad_norm": 80.43476867675781, + "learning_rate": 1.5372524885658952e-06, + "loss": 0.5564, + "num_input_tokens_seen": 19078976, + "step": 6055 + }, + { + "epoch": 0.38793931246399077, + "grad_norm": 24.717586517333984, + "learning_rate": 1.5363097409907638e-06, + "loss": 0.3676, + "num_input_tokens_seen": 19093632, + "step": 6060 + }, + { + "epoch": 0.38825939440496765, + "grad_norm": 22.33540916442871, + "learning_rate": 1.535366323826624e-06, + "loss": 0.3605, + "num_input_tokens_seen": 19109056, + "step": 6065 + }, + { + "epoch": 0.3885794763459446, + "grad_norm": 46.442413330078125, + "learning_rate": 1.534422238251343e-06, + "loss": 0.3699, + "num_input_tokens_seen": 19124544, + "step": 6070 + }, + { + "epoch": 0.38889955828692147, + "grad_norm": 33.82103729248047, + "learning_rate": 1.5334774854436223e-06, + "loss": 0.3834, + "num_input_tokens_seen": 19140480, + "step": 6075 + }, + { + "epoch": 0.38921964022789834, + "grad_norm": 41.09638214111328, + "learning_rate": 1.5325320665829975e-06, + "loss": 0.3776, + "num_input_tokens_seen": 19156736, + "step": 6080 + }, + { + "epoch": 0.3895397221688752, + "grad_norm": 31.53407096862793, + "learning_rate": 1.5315859828498352e-06, + "loss": 0.4455, + "num_input_tokens_seen": 19171520, + "step": 6085 + }, + { + "epoch": 0.3898598041098521, + "grad_norm": 31.16860580444336, + "learning_rate": 1.5306392354253316e-06, + "loss": 0.4921, + "num_input_tokens_seen": 19187136, + "step": 6090 + }, + { + "epoch": 0.39017988605082904, + "grad_norm": 23.219755172729492, + "learning_rate": 1.5296918254915123e-06, + "loss": 0.4377, + "num_input_tokens_seen": 19201856, + "step": 6095 + }, + { + "epoch": 0.3904999679918059, + "grad_norm": 26.253602981567383, + "learning_rate": 1.5287437542312296e-06, + "loss": 0.3869, + "num_input_tokens_seen": 19216704, + "step": 6100 + }, + { + "epoch": 0.3908200499327828, + "grad_norm": 61.03850173950195, + "learning_rate": 1.5277950228281614e-06, + "loss": 0.5316, + "num_input_tokens_seen": 19233408, + "step": 6105 + }, + { + "epoch": 0.3911401318737597, + "grad_norm": 26.556734085083008, + "learning_rate": 1.52684563246681e-06, + "loss": 0.354, + "num_input_tokens_seen": 19250048, + "step": 6110 + }, + { + "epoch": 0.39146021381473656, + "grad_norm": 16.79180335998535, + "learning_rate": 1.5258955843325015e-06, + "loss": 0.4243, + "num_input_tokens_seen": 19266560, + "step": 6115 + }, + { + "epoch": 0.39178029575571344, + "grad_norm": 58.60289764404297, + "learning_rate": 1.5249448796113804e-06, + "loss": 0.4885, + "num_input_tokens_seen": 19281408, + "step": 6120 + }, + { + "epoch": 0.39210037769669037, + "grad_norm": 47.47416687011719, + "learning_rate": 1.5239935194904141e-06, + "loss": 0.4747, + "num_input_tokens_seen": 19296384, + "step": 6125 + }, + { + "epoch": 0.39242045963766725, + "grad_norm": 24.381053924560547, + "learning_rate": 1.523041505157386e-06, + "loss": 0.3702, + "num_input_tokens_seen": 19312000, + "step": 6130 + }, + { + "epoch": 0.39274054157864413, + "grad_norm": 25.145042419433594, + "learning_rate": 1.5220888378008977e-06, + "loss": 0.3909, + "num_input_tokens_seen": 19327488, + "step": 6135 + }, + { + "epoch": 0.393060623519621, + "grad_norm": 22.552824020385742, + "learning_rate": 1.5211355186103654e-06, + "loss": 0.4661, + "num_input_tokens_seen": 19342080, + "step": 6140 + }, + { + "epoch": 0.3933807054605979, + "grad_norm": 50.69114303588867, + "learning_rate": 1.5201815487760192e-06, + "loss": 0.4126, + "num_input_tokens_seen": 19358336, + "step": 6145 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 92.56407165527344, + "learning_rate": 1.5192269294889019e-06, + "loss": 0.508, + "num_input_tokens_seen": 19373376, + "step": 6150 + }, + { + "epoch": 0.3940208693425517, + "grad_norm": 28.557926177978516, + "learning_rate": 1.5182716619408666e-06, + "loss": 0.4029, + "num_input_tokens_seen": 19388608, + "step": 6155 + }, + { + "epoch": 0.3943409512835286, + "grad_norm": 31.255754470825195, + "learning_rate": 1.5173157473245764e-06, + "loss": 0.5398, + "num_input_tokens_seen": 19403264, + "step": 6160 + }, + { + "epoch": 0.39466103322450546, + "grad_norm": 36.93677520751953, + "learning_rate": 1.5163591868335016e-06, + "loss": 0.4363, + "num_input_tokens_seen": 19418816, + "step": 6165 + }, + { + "epoch": 0.39498111516548234, + "grad_norm": 39.658329010009766, + "learning_rate": 1.515401981661919e-06, + "loss": 0.5781, + "num_input_tokens_seen": 19435392, + "step": 6170 + }, + { + "epoch": 0.3953011971064593, + "grad_norm": 32.506134033203125, + "learning_rate": 1.514444133004911e-06, + "loss": 0.4592, + "num_input_tokens_seen": 19450048, + "step": 6175 + }, + { + "epoch": 0.39562127904743616, + "grad_norm": 30.993446350097656, + "learning_rate": 1.5134856420583631e-06, + "loss": 0.4592, + "num_input_tokens_seen": 19466368, + "step": 6180 + }, + { + "epoch": 0.39594136098841304, + "grad_norm": 23.403287887573242, + "learning_rate": 1.5125265100189614e-06, + "loss": 0.3338, + "num_input_tokens_seen": 19482624, + "step": 6185 + }, + { + "epoch": 0.3962614429293899, + "grad_norm": 32.384483337402344, + "learning_rate": 1.5115667380841948e-06, + "loss": 0.5304, + "num_input_tokens_seen": 19498048, + "step": 6190 + }, + { + "epoch": 0.3965815248703668, + "grad_norm": 19.235095977783203, + "learning_rate": 1.510606327452349e-06, + "loss": 0.43, + "num_input_tokens_seen": 19515264, + "step": 6195 + }, + { + "epoch": 0.3969016068113437, + "grad_norm": 34.2067985534668, + "learning_rate": 1.5096452793225082e-06, + "loss": 0.4319, + "num_input_tokens_seen": 19533056, + "step": 6200 + }, + { + "epoch": 0.3972216887523206, + "grad_norm": 30.670093536376953, + "learning_rate": 1.5086835948945522e-06, + "loss": 0.4003, + "num_input_tokens_seen": 19548480, + "step": 6205 + }, + { + "epoch": 0.3975417706932975, + "grad_norm": 29.265615463256836, + "learning_rate": 1.5077212753691556e-06, + "loss": 0.3271, + "num_input_tokens_seen": 19563712, + "step": 6210 + }, + { + "epoch": 0.39786185263427437, + "grad_norm": 35.499732971191406, + "learning_rate": 1.5067583219477852e-06, + "loss": 0.4049, + "num_input_tokens_seen": 19578624, + "step": 6215 + }, + { + "epoch": 0.39818193457525125, + "grad_norm": 30.449113845825195, + "learning_rate": 1.5057947358327e-06, + "loss": 0.3916, + "num_input_tokens_seen": 19593408, + "step": 6220 + }, + { + "epoch": 0.39850201651622813, + "grad_norm": 37.85767364501953, + "learning_rate": 1.504830518226948e-06, + "loss": 0.4907, + "num_input_tokens_seen": 19609216, + "step": 6225 + }, + { + "epoch": 0.39882209845720507, + "grad_norm": 19.524030685424805, + "learning_rate": 1.5038656703343672e-06, + "loss": 0.449, + "num_input_tokens_seen": 19624896, + "step": 6230 + }, + { + "epoch": 0.39914218039818194, + "grad_norm": 76.64604949951172, + "learning_rate": 1.5029001933595805e-06, + "loss": 0.4925, + "num_input_tokens_seen": 19640128, + "step": 6235 + }, + { + "epoch": 0.3994622623391588, + "grad_norm": 32.2121696472168, + "learning_rate": 1.501934088507998e-06, + "loss": 0.3433, + "num_input_tokens_seen": 19655680, + "step": 6240 + }, + { + "epoch": 0.3997823442801357, + "grad_norm": 32.78192901611328, + "learning_rate": 1.5009673569858126e-06, + "loss": 0.6227, + "num_input_tokens_seen": 19672192, + "step": 6245 + }, + { + "epoch": 0.4001024262211126, + "grad_norm": 46.18693542480469, + "learning_rate": 1.5e-06, + "loss": 0.5284, + "num_input_tokens_seen": 19688896, + "step": 6250 + }, + { + "epoch": 0.4004225081620895, + "grad_norm": 18.203367233276367, + "learning_rate": 1.4990320187583167e-06, + "loss": 0.3547, + "num_input_tokens_seen": 19704128, + "step": 6255 + }, + { + "epoch": 0.4004865245502849, + "eval_loss": 0.42333245277404785, + "eval_runtime": 49.177, + "eval_samples_per_second": 282.368, + "eval_steps_per_second": 35.301, + "num_input_tokens_seen": 19707456, + "step": 6256 + }, + { + "epoch": 0.4007425901030664, + "grad_norm": 34.608970642089844, + "learning_rate": 1.4980634144692986e-06, + "loss": 0.395, + "num_input_tokens_seen": 19719744, + "step": 6260 + }, + { + "epoch": 0.4010626720440433, + "grad_norm": 48.07910919189453, + "learning_rate": 1.4970941883422599e-06, + "loss": 0.3795, + "num_input_tokens_seen": 19736128, + "step": 6265 + }, + { + "epoch": 0.40138275398502016, + "grad_norm": 25.680130004882812, + "learning_rate": 1.4961243415872901e-06, + "loss": 0.4165, + "num_input_tokens_seen": 19751296, + "step": 6270 + }, + { + "epoch": 0.40170283592599704, + "grad_norm": 64.0484848022461, + "learning_rate": 1.4951538754152551e-06, + "loss": 0.4057, + "num_input_tokens_seen": 19765888, + "step": 6275 + }, + { + "epoch": 0.402022917866974, + "grad_norm": 29.654808044433594, + "learning_rate": 1.4941827910377925e-06, + "loss": 0.4205, + "num_input_tokens_seen": 19780864, + "step": 6280 + }, + { + "epoch": 0.40234299980795085, + "grad_norm": 23.910985946655273, + "learning_rate": 1.4932110896673131e-06, + "loss": 0.4014, + "num_input_tokens_seen": 19796864, + "step": 6285 + }, + { + "epoch": 0.40266308174892773, + "grad_norm": 29.215768814086914, + "learning_rate": 1.4922387725169973e-06, + "loss": 0.5395, + "num_input_tokens_seen": 19811904, + "step": 6290 + }, + { + "epoch": 0.4029831636899046, + "grad_norm": 33.94330596923828, + "learning_rate": 1.4912658408007947e-06, + "loss": 0.4049, + "num_input_tokens_seen": 19827456, + "step": 6295 + }, + { + "epoch": 0.4033032456308815, + "grad_norm": 33.57729721069336, + "learning_rate": 1.4902922957334215e-06, + "loss": 0.4269, + "num_input_tokens_seen": 19842496, + "step": 6300 + }, + { + "epoch": 0.40362332757185837, + "grad_norm": 43.49477005004883, + "learning_rate": 1.4893181385303608e-06, + "loss": 0.408, + "num_input_tokens_seen": 19858240, + "step": 6305 + }, + { + "epoch": 0.4039434095128353, + "grad_norm": 32.84989547729492, + "learning_rate": 1.4883433704078584e-06, + "loss": 0.3994, + "num_input_tokens_seen": 19874368, + "step": 6310 + }, + { + "epoch": 0.4042634914538122, + "grad_norm": 32.79706954956055, + "learning_rate": 1.4873679925829246e-06, + "loss": 0.3874, + "num_input_tokens_seen": 19891904, + "step": 6315 + }, + { + "epoch": 0.40458357339478906, + "grad_norm": 21.430252075195312, + "learning_rate": 1.4863920062733298e-06, + "loss": 0.4077, + "num_input_tokens_seen": 19907392, + "step": 6320 + }, + { + "epoch": 0.40490365533576594, + "grad_norm": 46.1721305847168, + "learning_rate": 1.485415412697604e-06, + "loss": 0.3779, + "num_input_tokens_seen": 19922624, + "step": 6325 + }, + { + "epoch": 0.4052237372767428, + "grad_norm": 36.21952438354492, + "learning_rate": 1.484438213075036e-06, + "loss": 0.4348, + "num_input_tokens_seen": 19939328, + "step": 6330 + }, + { + "epoch": 0.40554381921771976, + "grad_norm": 42.817806243896484, + "learning_rate": 1.4834604086256713e-06, + "loss": 0.4465, + "num_input_tokens_seen": 19955392, + "step": 6335 + }, + { + "epoch": 0.40586390115869664, + "grad_norm": 35.81399154663086, + "learning_rate": 1.4824820005703097e-06, + "loss": 0.3818, + "num_input_tokens_seen": 19971520, + "step": 6340 + }, + { + "epoch": 0.4061839830996735, + "grad_norm": 22.910531997680664, + "learning_rate": 1.4815029901305061e-06, + "loss": 0.46, + "num_input_tokens_seen": 19988352, + "step": 6345 + }, + { + "epoch": 0.4065040650406504, + "grad_norm": 29.75078010559082, + "learning_rate": 1.480523378528565e-06, + "loss": 0.4748, + "num_input_tokens_seen": 20005184, + "step": 6350 + }, + { + "epoch": 0.4068241469816273, + "grad_norm": 45.245052337646484, + "learning_rate": 1.4795431669875441e-06, + "loss": 0.4064, + "num_input_tokens_seen": 20020800, + "step": 6355 + }, + { + "epoch": 0.4071442289226042, + "grad_norm": 29.282560348510742, + "learning_rate": 1.478562356731249e-06, + "loss": 0.472, + "num_input_tokens_seen": 20036416, + "step": 6360 + }, + { + "epoch": 0.4074643108635811, + "grad_norm": 37.65520095825195, + "learning_rate": 1.4775809489842326e-06, + "loss": 0.4525, + "num_input_tokens_seen": 20053184, + "step": 6365 + }, + { + "epoch": 0.40778439280455797, + "grad_norm": 28.39930534362793, + "learning_rate": 1.4765989449717937e-06, + "loss": 0.3987, + "num_input_tokens_seen": 20069888, + "step": 6370 + }, + { + "epoch": 0.40810447474553485, + "grad_norm": 64.06832885742188, + "learning_rate": 1.4756163459199763e-06, + "loss": 0.5504, + "num_input_tokens_seen": 20085760, + "step": 6375 + }, + { + "epoch": 0.40842455668651173, + "grad_norm": 34.247596740722656, + "learning_rate": 1.4746331530555665e-06, + "loss": 0.2742, + "num_input_tokens_seen": 20101056, + "step": 6380 + }, + { + "epoch": 0.4087446386274886, + "grad_norm": 41.40673828125, + "learning_rate": 1.4736493676060923e-06, + "loss": 0.4133, + "num_input_tokens_seen": 20116352, + "step": 6385 + }, + { + "epoch": 0.40906472056846555, + "grad_norm": 20.095537185668945, + "learning_rate": 1.4726649907998216e-06, + "loss": 0.3642, + "num_input_tokens_seen": 20131712, + "step": 6390 + }, + { + "epoch": 0.4093848025094424, + "grad_norm": 30.422456741333008, + "learning_rate": 1.4716800238657599e-06, + "loss": 0.3759, + "num_input_tokens_seen": 20146880, + "step": 6395 + }, + { + "epoch": 0.4097048844504193, + "grad_norm": 16.951066970825195, + "learning_rate": 1.4706944680336505e-06, + "loss": 0.2767, + "num_input_tokens_seen": 20163520, + "step": 6400 + }, + { + "epoch": 0.4100249663913962, + "grad_norm": 42.80522537231445, + "learning_rate": 1.469708324533971e-06, + "loss": 0.4681, + "num_input_tokens_seen": 20177984, + "step": 6405 + }, + { + "epoch": 0.41034504833237306, + "grad_norm": 16.141464233398438, + "learning_rate": 1.4687215945979335e-06, + "loss": 0.3395, + "num_input_tokens_seen": 20193472, + "step": 6410 + }, + { + "epoch": 0.41066513027335, + "grad_norm": 42.42402267456055, + "learning_rate": 1.4677342794574815e-06, + "loss": 0.4507, + "num_input_tokens_seen": 20210624, + "step": 6415 + }, + { + "epoch": 0.4109852122143269, + "grad_norm": 58.724464416503906, + "learning_rate": 1.4667463803452902e-06, + "loss": 0.4199, + "num_input_tokens_seen": 20226688, + "step": 6420 + }, + { + "epoch": 0.41130529415530376, + "grad_norm": 41.05823516845703, + "learning_rate": 1.4657578984947627e-06, + "loss": 0.4472, + "num_input_tokens_seen": 20244608, + "step": 6425 + }, + { + "epoch": 0.41162537609628064, + "grad_norm": 36.066612243652344, + "learning_rate": 1.4647688351400303e-06, + "loss": 0.3699, + "num_input_tokens_seen": 20261184, + "step": 6430 + }, + { + "epoch": 0.4119454580372575, + "grad_norm": 21.72051239013672, + "learning_rate": 1.46377919151595e-06, + "loss": 0.3348, + "num_input_tokens_seen": 20276736, + "step": 6435 + }, + { + "epoch": 0.41226553997823445, + "grad_norm": 29.512678146362305, + "learning_rate": 1.462788968858104e-06, + "loss": 0.4651, + "num_input_tokens_seen": 20293888, + "step": 6440 + }, + { + "epoch": 0.41258562191921133, + "grad_norm": 20.376218795776367, + "learning_rate": 1.4617981684027966e-06, + "loss": 0.482, + "num_input_tokens_seen": 20309696, + "step": 6445 + }, + { + "epoch": 0.4129057038601882, + "grad_norm": 19.641904830932617, + "learning_rate": 1.4608067913870536e-06, + "loss": 0.4013, + "num_input_tokens_seen": 20325632, + "step": 6450 + }, + { + "epoch": 0.4132257858011651, + "grad_norm": 22.1761474609375, + "learning_rate": 1.4598148390486213e-06, + "loss": 0.3968, + "num_input_tokens_seen": 20341888, + "step": 6455 + }, + { + "epoch": 0.41354586774214197, + "grad_norm": 30.586984634399414, + "learning_rate": 1.4588223126259639e-06, + "loss": 0.5073, + "num_input_tokens_seen": 20358656, + "step": 6460 + }, + { + "epoch": 0.4138659496831189, + "grad_norm": 15.245569229125977, + "learning_rate": 1.4578292133582615e-06, + "loss": 0.3245, + "num_input_tokens_seen": 20372864, + "step": 6465 + }, + { + "epoch": 0.4141860316240958, + "grad_norm": 27.139429092407227, + "learning_rate": 1.456835542485411e-06, + "loss": 0.3954, + "num_input_tokens_seen": 20387840, + "step": 6470 + }, + { + "epoch": 0.41450611356507266, + "grad_norm": 32.64242172241211, + "learning_rate": 1.4558413012480215e-06, + "loss": 0.4092, + "num_input_tokens_seen": 20404736, + "step": 6475 + }, + { + "epoch": 0.41482619550604954, + "grad_norm": 37.946998596191406, + "learning_rate": 1.4548464908874156e-06, + "loss": 0.5673, + "num_input_tokens_seen": 20422848, + "step": 6480 + }, + { + "epoch": 0.4151462774470264, + "grad_norm": 31.876144409179688, + "learning_rate": 1.4538511126456255e-06, + "loss": 0.3996, + "num_input_tokens_seen": 20438016, + "step": 6485 + }, + { + "epoch": 0.4154663593880033, + "grad_norm": 54.237831115722656, + "learning_rate": 1.452855167765392e-06, + "loss": 0.5913, + "num_input_tokens_seen": 20454464, + "step": 6490 + }, + { + "epoch": 0.41578644132898024, + "grad_norm": 24.1745548248291, + "learning_rate": 1.4518586574901647e-06, + "loss": 0.4487, + "num_input_tokens_seen": 20470464, + "step": 6495 + }, + { + "epoch": 0.4161065232699571, + "grad_norm": 27.391712188720703, + "learning_rate": 1.450861583064098e-06, + "loss": 0.4617, + "num_input_tokens_seen": 20485696, + "step": 6500 + }, + { + "epoch": 0.416426605210934, + "grad_norm": 24.388179779052734, + "learning_rate": 1.4498639457320515e-06, + "loss": 0.3642, + "num_input_tokens_seen": 20500608, + "step": 6505 + }, + { + "epoch": 0.4167466871519109, + "grad_norm": 34.60757827758789, + "learning_rate": 1.4488657467395865e-06, + "loss": 0.4686, + "num_input_tokens_seen": 20515776, + "step": 6510 + }, + { + "epoch": 0.41706676909288776, + "grad_norm": 38.96852493286133, + "learning_rate": 1.4478669873329663e-06, + "loss": 0.5078, + "num_input_tokens_seen": 20531456, + "step": 6515 + }, + { + "epoch": 0.4173868510338647, + "grad_norm": 32.143882751464844, + "learning_rate": 1.4468676687591536e-06, + "loss": 0.386, + "num_input_tokens_seen": 20547200, + "step": 6520 + }, + { + "epoch": 0.41770693297484157, + "grad_norm": 28.233505249023438, + "learning_rate": 1.4458677922658104e-06, + "loss": 0.4358, + "num_input_tokens_seen": 20562560, + "step": 6525 + }, + { + "epoch": 0.41802701491581845, + "grad_norm": 18.132049560546875, + "learning_rate": 1.444867359101293e-06, + "loss": 0.2798, + "num_input_tokens_seen": 20577344, + "step": 6530 + }, + { + "epoch": 0.41834709685679533, + "grad_norm": 35.095619201660156, + "learning_rate": 1.4438663705146545e-06, + "loss": 0.3529, + "num_input_tokens_seen": 20593088, + "step": 6535 + }, + { + "epoch": 0.4186671787977722, + "grad_norm": 29.12217903137207, + "learning_rate": 1.442864827755641e-06, + "loss": 0.3589, + "num_input_tokens_seen": 20609792, + "step": 6540 + }, + { + "epoch": 0.41898726073874915, + "grad_norm": 18.185195922851562, + "learning_rate": 1.4418627320746901e-06, + "loss": 0.4407, + "num_input_tokens_seen": 20625280, + "step": 6545 + }, + { + "epoch": 0.419307342679726, + "grad_norm": 31.992891311645508, + "learning_rate": 1.4408600847229304e-06, + "loss": 0.3854, + "num_input_tokens_seen": 20641984, + "step": 6550 + }, + { + "epoch": 0.4196274246207029, + "grad_norm": 31.761362075805664, + "learning_rate": 1.4398568869521782e-06, + "loss": 0.5281, + "num_input_tokens_seen": 20658240, + "step": 6555 + }, + { + "epoch": 0.4199475065616798, + "grad_norm": 31.19809341430664, + "learning_rate": 1.4388531400149384e-06, + "loss": 0.3645, + "num_input_tokens_seen": 20673408, + "step": 6560 + }, + { + "epoch": 0.42026758850265666, + "grad_norm": 46.35468673706055, + "learning_rate": 1.4378488451644007e-06, + "loss": 0.3866, + "num_input_tokens_seen": 20688960, + "step": 6565 + }, + { + "epoch": 0.42058767044363354, + "grad_norm": 25.512950897216797, + "learning_rate": 1.4368440036544386e-06, + "loss": 0.4049, + "num_input_tokens_seen": 20704768, + "step": 6570 + }, + { + "epoch": 0.4209077523846105, + "grad_norm": 43.326324462890625, + "learning_rate": 1.435838616739609e-06, + "loss": 0.4199, + "num_input_tokens_seen": 20719808, + "step": 6575 + }, + { + "epoch": 0.42122783432558736, + "grad_norm": 35.062923431396484, + "learning_rate": 1.4348326856751493e-06, + "loss": 0.5392, + "num_input_tokens_seen": 20735680, + "step": 6580 + }, + { + "epoch": 0.42154791626656424, + "grad_norm": 27.509485244750977, + "learning_rate": 1.433826211716976e-06, + "loss": 0.3422, + "num_input_tokens_seen": 20750144, + "step": 6585 + }, + { + "epoch": 0.4218679982075411, + "grad_norm": 33.31727981567383, + "learning_rate": 1.4328191961216835e-06, + "loss": 0.3966, + "num_input_tokens_seen": 20766016, + "step": 6590 + }, + { + "epoch": 0.422188080148518, + "grad_norm": 54.75680923461914, + "learning_rate": 1.4318116401465427e-06, + "loss": 0.4812, + "num_input_tokens_seen": 20782720, + "step": 6595 + }, + { + "epoch": 0.42250816208949493, + "grad_norm": 29.398454666137695, + "learning_rate": 1.430803545049499e-06, + "loss": 0.388, + "num_input_tokens_seen": 20798208, + "step": 6600 + }, + { + "epoch": 0.4228282440304718, + "grad_norm": 16.673908233642578, + "learning_rate": 1.4297949120891716e-06, + "loss": 0.5652, + "num_input_tokens_seen": 20813056, + "step": 6605 + }, + { + "epoch": 0.4231483259714487, + "grad_norm": 35.20106506347656, + "learning_rate": 1.4287857425248497e-06, + "loss": 0.4121, + "num_input_tokens_seen": 20828800, + "step": 6610 + }, + { + "epoch": 0.42346840791242557, + "grad_norm": 23.11936378479004, + "learning_rate": 1.427776037616494e-06, + "loss": 0.4974, + "num_input_tokens_seen": 20844736, + "step": 6615 + }, + { + "epoch": 0.42378848985340245, + "grad_norm": 30.515439987182617, + "learning_rate": 1.4267657986247326e-06, + "loss": 0.3527, + "num_input_tokens_seen": 20860672, + "step": 6620 + }, + { + "epoch": 0.4241085717943794, + "grad_norm": 33.26582717895508, + "learning_rate": 1.425755026810861e-06, + "loss": 0.3746, + "num_input_tokens_seen": 20877184, + "step": 6625 + }, + { + "epoch": 0.42442865373535626, + "grad_norm": 55.12078857421875, + "learning_rate": 1.4247437234368394e-06, + "loss": 0.4095, + "num_input_tokens_seen": 20894208, + "step": 6630 + }, + { + "epoch": 0.42474873567633314, + "grad_norm": 36.948524475097656, + "learning_rate": 1.423731889765292e-06, + "loss": 0.4001, + "num_input_tokens_seen": 20909696, + "step": 6635 + }, + { + "epoch": 0.42506881761731, + "grad_norm": 15.861876487731934, + "learning_rate": 1.422719527059505e-06, + "loss": 0.3504, + "num_input_tokens_seen": 20926016, + "step": 6640 + }, + { + "epoch": 0.4253888995582869, + "grad_norm": 20.37615394592285, + "learning_rate": 1.4217066365834253e-06, + "loss": 0.3636, + "num_input_tokens_seen": 20941440, + "step": 6645 + }, + { + "epoch": 0.42570898149926384, + "grad_norm": 34.968894958496094, + "learning_rate": 1.4206932196016586e-06, + "loss": 0.4406, + "num_input_tokens_seen": 20956352, + "step": 6650 + }, + { + "epoch": 0.4260290634402407, + "grad_norm": 54.73747634887695, + "learning_rate": 1.4196792773794672e-06, + "loss": 0.3928, + "num_input_tokens_seen": 20973056, + "step": 6655 + }, + { + "epoch": 0.4263491453812176, + "grad_norm": 38.245426177978516, + "learning_rate": 1.418664811182771e-06, + "loss": 0.438, + "num_input_tokens_seen": 20989248, + "step": 6660 + }, + { + "epoch": 0.4266692273221945, + "grad_norm": 41.432498931884766, + "learning_rate": 1.417649822278142e-06, + "loss": 0.4836, + "num_input_tokens_seen": 21004096, + "step": 6665 + }, + { + "epoch": 0.42698930926317136, + "grad_norm": 24.442115783691406, + "learning_rate": 1.4166343119328064e-06, + "loss": 0.4722, + "num_input_tokens_seen": 21020224, + "step": 6670 + }, + { + "epoch": 0.42730939120414824, + "grad_norm": 30.54802703857422, + "learning_rate": 1.4156182814146404e-06, + "loss": 0.4616, + "num_input_tokens_seen": 21035264, + "step": 6675 + }, + { + "epoch": 0.42762947314512517, + "grad_norm": 19.643733978271484, + "learning_rate": 1.4146017319921701e-06, + "loss": 0.3497, + "num_input_tokens_seen": 21051904, + "step": 6680 + }, + { + "epoch": 0.42794955508610205, + "grad_norm": 31.077213287353516, + "learning_rate": 1.4135846649345695e-06, + "loss": 0.4215, + "num_input_tokens_seen": 21069504, + "step": 6685 + }, + { + "epoch": 0.42826963702707893, + "grad_norm": 30.736148834228516, + "learning_rate": 1.4125670815116589e-06, + "loss": 0.427, + "num_input_tokens_seen": 21084288, + "step": 6690 + }, + { + "epoch": 0.4285897189680558, + "grad_norm": 28.045896530151367, + "learning_rate": 1.4115489829939025e-06, + "loss": 0.2926, + "num_input_tokens_seen": 21100544, + "step": 6695 + }, + { + "epoch": 0.4289098009090327, + "grad_norm": 28.585994720458984, + "learning_rate": 1.4105303706524093e-06, + "loss": 0.4407, + "num_input_tokens_seen": 21116608, + "step": 6700 + }, + { + "epoch": 0.4292298828500096, + "grad_norm": 48.415164947509766, + "learning_rate": 1.4095112457589276e-06, + "loss": 0.5926, + "num_input_tokens_seen": 21131776, + "step": 6705 + }, + { + "epoch": 0.4295499647909865, + "grad_norm": 28.813779830932617, + "learning_rate": 1.4084916095858477e-06, + "loss": 0.3962, + "num_input_tokens_seen": 21146368, + "step": 6710 + }, + { + "epoch": 0.4298700467319634, + "grad_norm": 30.74667739868164, + "learning_rate": 1.407471463406197e-06, + "loss": 0.4951, + "num_input_tokens_seen": 21162368, + "step": 6715 + }, + { + "epoch": 0.43019012867294026, + "grad_norm": 28.847599029541016, + "learning_rate": 1.4064508084936399e-06, + "loss": 0.4329, + "num_input_tokens_seen": 21179008, + "step": 6720 + }, + { + "epoch": 0.43051021061391714, + "grad_norm": 30.80069351196289, + "learning_rate": 1.405429646122476e-06, + "loss": 0.5761, + "num_input_tokens_seen": 21196160, + "step": 6725 + }, + { + "epoch": 0.4308302925548941, + "grad_norm": 20.700214385986328, + "learning_rate": 1.4044079775676392e-06, + "loss": 0.5175, + "num_input_tokens_seen": 21212032, + "step": 6730 + }, + { + "epoch": 0.43115037449587096, + "grad_norm": 22.982175827026367, + "learning_rate": 1.4033858041046936e-06, + "loss": 0.3659, + "num_input_tokens_seen": 21230272, + "step": 6735 + }, + { + "epoch": 0.43147045643684784, + "grad_norm": 23.89682388305664, + "learning_rate": 1.4023631270098352e-06, + "loss": 0.3926, + "num_input_tokens_seen": 21245760, + "step": 6740 + }, + { + "epoch": 0.4317905383778247, + "grad_norm": 28.520267486572266, + "learning_rate": 1.4013399475598888e-06, + "loss": 0.3411, + "num_input_tokens_seen": 21260992, + "step": 6745 + }, + { + "epoch": 0.4321106203188016, + "grad_norm": 22.041383743286133, + "learning_rate": 1.4003162670323056e-06, + "loss": 0.2807, + "num_input_tokens_seen": 21275136, + "step": 6750 + }, + { + "epoch": 0.4324307022597785, + "grad_norm": 72.86239624023438, + "learning_rate": 1.3992920867051627e-06, + "loss": 0.5292, + "num_input_tokens_seen": 21290560, + "step": 6755 + }, + { + "epoch": 0.4327507842007554, + "grad_norm": 43.2622184753418, + "learning_rate": 1.3982674078571614e-06, + "loss": 0.3525, + "num_input_tokens_seen": 21305536, + "step": 6760 + }, + { + "epoch": 0.4330708661417323, + "grad_norm": 25.79481315612793, + "learning_rate": 1.3972422317676252e-06, + "loss": 0.3785, + "num_input_tokens_seen": 21320576, + "step": 6765 + }, + { + "epoch": 0.43339094808270917, + "grad_norm": 17.41854476928711, + "learning_rate": 1.3962165597164985e-06, + "loss": 0.367, + "num_input_tokens_seen": 21335680, + "step": 6770 + }, + { + "epoch": 0.43371103002368605, + "grad_norm": 30.709115982055664, + "learning_rate": 1.395190392984345e-06, + "loss": 0.3496, + "num_input_tokens_seen": 21351808, + "step": 6775 + }, + { + "epoch": 0.43403111196466293, + "grad_norm": 26.75821304321289, + "learning_rate": 1.3941637328523452e-06, + "loss": 0.4482, + "num_input_tokens_seen": 21366464, + "step": 6780 + }, + { + "epoch": 0.43435119390563987, + "grad_norm": 36.62665939331055, + "learning_rate": 1.3931365806022978e-06, + "loss": 0.3094, + "num_input_tokens_seen": 21383296, + "step": 6785 + }, + { + "epoch": 0.43467127584661674, + "grad_norm": 38.72547912597656, + "learning_rate": 1.3921089375166131e-06, + "loss": 0.3178, + "num_input_tokens_seen": 21399616, + "step": 6790 + }, + { + "epoch": 0.4349913577875936, + "grad_norm": 21.409557342529297, + "learning_rate": 1.391080804878316e-06, + "loss": 0.4475, + "num_input_tokens_seen": 21414848, + "step": 6795 + }, + { + "epoch": 0.4353114397285705, + "grad_norm": 60.08560562133789, + "learning_rate": 1.3900521839710427e-06, + "loss": 0.3748, + "num_input_tokens_seen": 21430144, + "step": 6800 + }, + { + "epoch": 0.4356315216695474, + "grad_norm": 23.838533401489258, + "learning_rate": 1.3890230760790373e-06, + "loss": 0.3516, + "num_input_tokens_seen": 21445248, + "step": 6805 + }, + { + "epoch": 0.4359516036105243, + "grad_norm": 95.79853820800781, + "learning_rate": 1.3879934824871544e-06, + "loss": 0.5972, + "num_input_tokens_seen": 21460544, + "step": 6810 + }, + { + "epoch": 0.4362716855515012, + "grad_norm": 28.7491512298584, + "learning_rate": 1.3869634044808526e-06, + "loss": 0.4871, + "num_input_tokens_seen": 21476224, + "step": 6815 + }, + { + "epoch": 0.4365917674924781, + "grad_norm": 38.301719665527344, + "learning_rate": 1.3859328433461971e-06, + "loss": 0.5996, + "num_input_tokens_seen": 21491712, + "step": 6820 + }, + { + "epoch": 0.43691184943345496, + "grad_norm": 67.00725555419922, + "learning_rate": 1.3849018003698553e-06, + "loss": 0.5784, + "num_input_tokens_seen": 21508928, + "step": 6825 + }, + { + "epoch": 0.43723193137443184, + "grad_norm": 36.97137451171875, + "learning_rate": 1.3838702768390964e-06, + "loss": 0.415, + "num_input_tokens_seen": 21523648, + "step": 6830 + }, + { + "epoch": 0.43755201331540877, + "grad_norm": 31.718050003051758, + "learning_rate": 1.38283827404179e-06, + "loss": 0.4777, + "num_input_tokens_seen": 21539264, + "step": 6835 + }, + { + "epoch": 0.43787209525638565, + "grad_norm": 50.313236236572266, + "learning_rate": 1.381805793266403e-06, + "loss": 0.3776, + "num_input_tokens_seen": 21555520, + "step": 6840 + }, + { + "epoch": 0.43819217719736253, + "grad_norm": 35.556846618652344, + "learning_rate": 1.3807728358020009e-06, + "loss": 0.4517, + "num_input_tokens_seen": 21570112, + "step": 6845 + }, + { + "epoch": 0.4385122591383394, + "grad_norm": 45.01139450073242, + "learning_rate": 1.3797394029382416e-06, + "loss": 0.3386, + "num_input_tokens_seen": 21584768, + "step": 6850 + }, + { + "epoch": 0.4388323410793163, + "grad_norm": 22.66309928894043, + "learning_rate": 1.37870549596538e-06, + "loss": 0.2963, + "num_input_tokens_seen": 21599872, + "step": 6855 + }, + { + "epoch": 0.43915242302029317, + "grad_norm": 19.721696853637695, + "learning_rate": 1.3776711161742595e-06, + "loss": 0.5262, + "num_input_tokens_seen": 21615808, + "step": 6860 + }, + { + "epoch": 0.4394725049612701, + "grad_norm": 27.445302963256836, + "learning_rate": 1.3766362648563166e-06, + "loss": 0.4639, + "num_input_tokens_seen": 21630656, + "step": 6865 + }, + { + "epoch": 0.439792586902247, + "grad_norm": 62.023433685302734, + "learning_rate": 1.3756009433035744e-06, + "loss": 0.4073, + "num_input_tokens_seen": 21646976, + "step": 6870 + }, + { + "epoch": 0.44011266884322386, + "grad_norm": 27.705705642700195, + "learning_rate": 1.3745651528086447e-06, + "loss": 0.5615, + "num_input_tokens_seen": 21665024, + "step": 6875 + }, + { + "epoch": 0.44043275078420074, + "grad_norm": 15.181832313537598, + "learning_rate": 1.373528894664724e-06, + "loss": 0.4486, + "num_input_tokens_seen": 21680128, + "step": 6880 + }, + { + "epoch": 0.4407528327251776, + "grad_norm": 23.56239128112793, + "learning_rate": 1.3724921701655924e-06, + "loss": 0.3509, + "num_input_tokens_seen": 21695808, + "step": 6885 + }, + { + "epoch": 0.44107291466615456, + "grad_norm": 15.783341407775879, + "learning_rate": 1.3714549806056125e-06, + "loss": 0.3155, + "num_input_tokens_seen": 21711936, + "step": 6890 + }, + { + "epoch": 0.44139299660713144, + "grad_norm": 45.81801986694336, + "learning_rate": 1.3704173272797283e-06, + "loss": 0.4241, + "num_input_tokens_seen": 21727488, + "step": 6895 + }, + { + "epoch": 0.4417130785481083, + "grad_norm": 39.76817321777344, + "learning_rate": 1.3693792114834619e-06, + "loss": 0.4386, + "num_input_tokens_seen": 21745280, + "step": 6900 + }, + { + "epoch": 0.4420331604890852, + "grad_norm": 26.435964584350586, + "learning_rate": 1.3683406345129129e-06, + "loss": 0.4684, + "num_input_tokens_seen": 21760000, + "step": 6905 + }, + { + "epoch": 0.4423532424300621, + "grad_norm": 25.047027587890625, + "learning_rate": 1.3673015976647567e-06, + "loss": 0.4025, + "num_input_tokens_seen": 21775232, + "step": 6910 + }, + { + "epoch": 0.442673324371039, + "grad_norm": 36.185760498046875, + "learning_rate": 1.3662621022362435e-06, + "loss": 0.3967, + "num_input_tokens_seen": 21790656, + "step": 6915 + }, + { + "epoch": 0.4429934063120159, + "grad_norm": 53.022464752197266, + "learning_rate": 1.3652221495251952e-06, + "loss": 0.4654, + "num_input_tokens_seen": 21806336, + "step": 6920 + }, + { + "epoch": 0.44331348825299277, + "grad_norm": 26.99211883544922, + "learning_rate": 1.3641817408300049e-06, + "loss": 0.3204, + "num_input_tokens_seen": 21823744, + "step": 6925 + }, + { + "epoch": 0.44363357019396965, + "grad_norm": 30.070894241333008, + "learning_rate": 1.3631408774496352e-06, + "loss": 0.5579, + "num_input_tokens_seen": 21839104, + "step": 6930 + }, + { + "epoch": 0.44395365213494653, + "grad_norm": 26.091249465942383, + "learning_rate": 1.3620995606836165e-06, + "loss": 0.3566, + "num_input_tokens_seen": 21854528, + "step": 6935 + }, + { + "epoch": 0.4442737340759234, + "grad_norm": 58.88991165161133, + "learning_rate": 1.3610577918320446e-06, + "loss": 0.6023, + "num_input_tokens_seen": 21870592, + "step": 6940 + }, + { + "epoch": 0.44459381601690035, + "grad_norm": 44.893310546875, + "learning_rate": 1.3600155721955802e-06, + "loss": 0.3743, + "num_input_tokens_seen": 21885696, + "step": 6945 + }, + { + "epoch": 0.4449138979578772, + "grad_norm": 24.15410614013672, + "learning_rate": 1.3589729030754468e-06, + "loss": 0.3819, + "num_input_tokens_seen": 21901248, + "step": 6950 + }, + { + "epoch": 0.4452339798988541, + "grad_norm": 28.12432861328125, + "learning_rate": 1.3579297857734293e-06, + "loss": 0.4341, + "num_input_tokens_seen": 21916352, + "step": 6955 + }, + { + "epoch": 0.445554061839831, + "grad_norm": 17.1772518157959, + "learning_rate": 1.3568862215918717e-06, + "loss": 0.3365, + "num_input_tokens_seen": 21931072, + "step": 6960 + }, + { + "epoch": 0.44587414378080786, + "grad_norm": 32.58141326904297, + "learning_rate": 1.3558422118336762e-06, + "loss": 0.4944, + "num_input_tokens_seen": 21946752, + "step": 6965 + }, + { + "epoch": 0.4461942257217848, + "grad_norm": 37.54017639160156, + "learning_rate": 1.354797757802301e-06, + "loss": 0.4804, + "num_input_tokens_seen": 21962176, + "step": 6970 + }, + { + "epoch": 0.4465143076627617, + "grad_norm": 17.05492401123047, + "learning_rate": 1.3537528608017596e-06, + "loss": 0.392, + "num_input_tokens_seen": 21978496, + "step": 6975 + }, + { + "epoch": 0.44683438960373856, + "grad_norm": 23.01466941833496, + "learning_rate": 1.352707522136618e-06, + "loss": 0.3973, + "num_input_tokens_seen": 21992576, + "step": 6980 + }, + { + "epoch": 0.44715447154471544, + "grad_norm": 17.1395206451416, + "learning_rate": 1.3516617431119934e-06, + "loss": 0.3998, + "num_input_tokens_seen": 22008000, + "step": 6985 + }, + { + "epoch": 0.4474745534856923, + "grad_norm": 32.3569450378418, + "learning_rate": 1.350615525033554e-06, + "loss": 0.53, + "num_input_tokens_seen": 22022976, + "step": 6990 + }, + { + "epoch": 0.44779463542666925, + "grad_norm": 25.158411026000977, + "learning_rate": 1.3495688692075144e-06, + "loss": 0.4027, + "num_input_tokens_seen": 22038144, + "step": 6995 + }, + { + "epoch": 0.44811471736764613, + "grad_norm": 31.82624053955078, + "learning_rate": 1.3485217769406376e-06, + "loss": 0.3435, + "num_input_tokens_seen": 22054016, + "step": 7000 + }, + { + "epoch": 0.448434799308623, + "grad_norm": 30.720848083496094, + "learning_rate": 1.3474742495402303e-06, + "loss": 0.3605, + "num_input_tokens_seen": 22073920, + "step": 7005 + }, + { + "epoch": 0.4487548812495999, + "grad_norm": 50.93308639526367, + "learning_rate": 1.3464262883141425e-06, + "loss": 0.4297, + "num_input_tokens_seen": 22089728, + "step": 7010 + }, + { + "epoch": 0.44907496319057677, + "grad_norm": 36.83964538574219, + "learning_rate": 1.3453778945707663e-06, + "loss": 0.5687, + "num_input_tokens_seen": 22105344, + "step": 7015 + }, + { + "epoch": 0.4493950451315537, + "grad_norm": 53.62667465209961, + "learning_rate": 1.3443290696190332e-06, + "loss": 0.4471, + "num_input_tokens_seen": 22121792, + "step": 7020 + }, + { + "epoch": 0.4497151270725306, + "grad_norm": 23.14280128479004, + "learning_rate": 1.343279814768414e-06, + "loss": 0.4034, + "num_input_tokens_seen": 22136128, + "step": 7025 + }, + { + "epoch": 0.45003520901350746, + "grad_norm": 22.742084503173828, + "learning_rate": 1.3422301313289156e-06, + "loss": 0.38, + "num_input_tokens_seen": 22151936, + "step": 7030 + }, + { + "epoch": 0.45035529095448434, + "grad_norm": 21.072940826416016, + "learning_rate": 1.34118002061108e-06, + "loss": 0.3794, + "num_input_tokens_seen": 22168128, + "step": 7035 + }, + { + "epoch": 0.4505473401190705, + "eval_loss": 0.43158382177352905, + "eval_runtime": 49.1758, + "eval_samples_per_second": 282.375, + "eval_steps_per_second": 35.302, + "num_input_tokens_seen": 22178432, + "step": 7038 + }, + { + "epoch": 0.4506753728954612, + "grad_norm": 38.73175048828125, + "learning_rate": 1.3401294839259828e-06, + "loss": 0.4309, + "num_input_tokens_seen": 22184512, + "step": 7040 + }, + { + "epoch": 0.4509954548364381, + "grad_norm": 33.52423095703125, + "learning_rate": 1.3390785225852312e-06, + "loss": 0.54, + "num_input_tokens_seen": 22199872, + "step": 7045 + }, + { + "epoch": 0.45131553677741504, + "grad_norm": 19.460634231567383, + "learning_rate": 1.3380271379009631e-06, + "loss": 0.4411, + "num_input_tokens_seen": 22216960, + "step": 7050 + }, + { + "epoch": 0.4516356187183919, + "grad_norm": 19.555931091308594, + "learning_rate": 1.3369753311858442e-06, + "loss": 0.2615, + "num_input_tokens_seen": 22231488, + "step": 7055 + }, + { + "epoch": 0.4519557006593688, + "grad_norm": 28.813966751098633, + "learning_rate": 1.3359231037530682e-06, + "loss": 0.4584, + "num_input_tokens_seen": 22246976, + "step": 7060 + }, + { + "epoch": 0.4522757826003457, + "grad_norm": 16.085895538330078, + "learning_rate": 1.3348704569163527e-06, + "loss": 0.4139, + "num_input_tokens_seen": 22263680, + "step": 7065 + }, + { + "epoch": 0.45259586454132256, + "grad_norm": 18.5650691986084, + "learning_rate": 1.33381739198994e-06, + "loss": 0.3347, + "num_input_tokens_seen": 22279552, + "step": 7070 + }, + { + "epoch": 0.4529159464822995, + "grad_norm": 19.012405395507812, + "learning_rate": 1.3327639102885938e-06, + "loss": 0.4436, + "num_input_tokens_seen": 22295296, + "step": 7075 + }, + { + "epoch": 0.45323602842327637, + "grad_norm": 34.81302261352539, + "learning_rate": 1.3317100131275986e-06, + "loss": 0.3973, + "num_input_tokens_seen": 22310400, + "step": 7080 + }, + { + "epoch": 0.45355611036425325, + "grad_norm": 60.76240921020508, + "learning_rate": 1.3306557018227576e-06, + "loss": 0.492, + "num_input_tokens_seen": 22326848, + "step": 7085 + }, + { + "epoch": 0.45387619230523013, + "grad_norm": 30.761585235595703, + "learning_rate": 1.3296009776903903e-06, + "loss": 0.47, + "num_input_tokens_seen": 22342592, + "step": 7090 + }, + { + "epoch": 0.454196274246207, + "grad_norm": 29.366207122802734, + "learning_rate": 1.3285458420473323e-06, + "loss": 0.4386, + "num_input_tokens_seen": 22358912, + "step": 7095 + }, + { + "epoch": 0.45451635618718395, + "grad_norm": 30.328184127807617, + "learning_rate": 1.3274902962109332e-06, + "loss": 0.3744, + "num_input_tokens_seen": 22374528, + "step": 7100 + }, + { + "epoch": 0.4548364381281608, + "grad_norm": 17.943153381347656, + "learning_rate": 1.3264343414990539e-06, + "loss": 0.3686, + "num_input_tokens_seen": 22389824, + "step": 7105 + }, + { + "epoch": 0.4551565200691377, + "grad_norm": 35.101932525634766, + "learning_rate": 1.3253779792300663e-06, + "loss": 0.4148, + "num_input_tokens_seen": 22405376, + "step": 7110 + }, + { + "epoch": 0.4554766020101146, + "grad_norm": 14.828371047973633, + "learning_rate": 1.3243212107228518e-06, + "loss": 0.3551, + "num_input_tokens_seen": 22420032, + "step": 7115 + }, + { + "epoch": 0.45579668395109146, + "grad_norm": 15.68032169342041, + "learning_rate": 1.3232640372967974e-06, + "loss": 0.3909, + "num_input_tokens_seen": 22434688, + "step": 7120 + }, + { + "epoch": 0.45611676589206834, + "grad_norm": 51.65379333496094, + "learning_rate": 1.3222064602717974e-06, + "loss": 0.4645, + "num_input_tokens_seen": 22451072, + "step": 7125 + }, + { + "epoch": 0.4564368478330453, + "grad_norm": 30.610668182373047, + "learning_rate": 1.321148480968248e-06, + "loss": 0.3488, + "num_input_tokens_seen": 22466688, + "step": 7130 + }, + { + "epoch": 0.45675692977402216, + "grad_norm": 38.32967758178711, + "learning_rate": 1.3200901007070495e-06, + "loss": 0.4609, + "num_input_tokens_seen": 22482432, + "step": 7135 + }, + { + "epoch": 0.45707701171499904, + "grad_norm": 42.44841003417969, + "learning_rate": 1.3190313208096022e-06, + "loss": 0.4616, + "num_input_tokens_seen": 22496960, + "step": 7140 + }, + { + "epoch": 0.4573970936559759, + "grad_norm": 62.05764389038086, + "learning_rate": 1.3179721425978048e-06, + "loss": 0.3617, + "num_input_tokens_seen": 22512256, + "step": 7145 + }, + { + "epoch": 0.4577171755969528, + "grad_norm": 27.489582061767578, + "learning_rate": 1.3169125673940541e-06, + "loss": 0.4002, + "num_input_tokens_seen": 22528192, + "step": 7150 + }, + { + "epoch": 0.45803725753792973, + "grad_norm": 23.193330764770508, + "learning_rate": 1.3158525965212422e-06, + "loss": 0.4126, + "num_input_tokens_seen": 22545408, + "step": 7155 + }, + { + "epoch": 0.4583573394789066, + "grad_norm": 44.60530090332031, + "learning_rate": 1.3147922313027548e-06, + "loss": 0.5063, + "num_input_tokens_seen": 22560832, + "step": 7160 + }, + { + "epoch": 0.4586774214198835, + "grad_norm": 34.29766845703125, + "learning_rate": 1.3137314730624707e-06, + "loss": 0.3456, + "num_input_tokens_seen": 22577728, + "step": 7165 + }, + { + "epoch": 0.45899750336086037, + "grad_norm": 59.20881652832031, + "learning_rate": 1.3126703231247588e-06, + "loss": 0.4722, + "num_input_tokens_seen": 22594112, + "step": 7170 + }, + { + "epoch": 0.45931758530183725, + "grad_norm": 57.1280632019043, + "learning_rate": 1.3116087828144772e-06, + "loss": 0.3917, + "num_input_tokens_seen": 22609728, + "step": 7175 + }, + { + "epoch": 0.4596376672428142, + "grad_norm": 24.825468063354492, + "learning_rate": 1.310546853456972e-06, + "loss": 0.4692, + "num_input_tokens_seen": 22624704, + "step": 7180 + }, + { + "epoch": 0.45995774918379106, + "grad_norm": 27.96169662475586, + "learning_rate": 1.3094845363780737e-06, + "loss": 0.3145, + "num_input_tokens_seen": 22640448, + "step": 7185 + }, + { + "epoch": 0.46027783112476794, + "grad_norm": 26.550325393676758, + "learning_rate": 1.3084218329040976e-06, + "loss": 0.2277, + "num_input_tokens_seen": 22655680, + "step": 7190 + }, + { + "epoch": 0.4605979130657448, + "grad_norm": 17.48622703552246, + "learning_rate": 1.3073587443618425e-06, + "loss": 0.3769, + "num_input_tokens_seen": 22672128, + "step": 7195 + }, + { + "epoch": 0.4609179950067217, + "grad_norm": 60.23152542114258, + "learning_rate": 1.3062952720785861e-06, + "loss": 0.5418, + "num_input_tokens_seen": 22687104, + "step": 7200 + }, + { + "epoch": 0.4612380769476986, + "grad_norm": 48.24466323852539, + "learning_rate": 1.305231417382086e-06, + "loss": 0.3724, + "num_input_tokens_seen": 22702976, + "step": 7205 + }, + { + "epoch": 0.4615581588886755, + "grad_norm": 34.0355224609375, + "learning_rate": 1.3041671816005777e-06, + "loss": 0.3522, + "num_input_tokens_seen": 22718464, + "step": 7210 + }, + { + "epoch": 0.4618782408296524, + "grad_norm": 30.36563491821289, + "learning_rate": 1.3031025660627718e-06, + "loss": 0.3783, + "num_input_tokens_seen": 22734656, + "step": 7215 + }, + { + "epoch": 0.4621983227706293, + "grad_norm": 38.3671989440918, + "learning_rate": 1.3020375720978534e-06, + "loss": 0.4376, + "num_input_tokens_seen": 22750016, + "step": 7220 + }, + { + "epoch": 0.46251840471160616, + "grad_norm": 32.97966003417969, + "learning_rate": 1.3009722010354799e-06, + "loss": 0.3855, + "num_input_tokens_seen": 22765632, + "step": 7225 + }, + { + "epoch": 0.46283848665258304, + "grad_norm": 39.90695571899414, + "learning_rate": 1.2999064542057794e-06, + "loss": 0.4528, + "num_input_tokens_seen": 22781184, + "step": 7230 + }, + { + "epoch": 0.46315856859355997, + "grad_norm": 31.27988624572754, + "learning_rate": 1.2988403329393495e-06, + "loss": 0.4842, + "num_input_tokens_seen": 22797248, + "step": 7235 + }, + { + "epoch": 0.46347865053453685, + "grad_norm": 29.927885055541992, + "learning_rate": 1.2977738385672557e-06, + "loss": 0.4177, + "num_input_tokens_seen": 22812800, + "step": 7240 + }, + { + "epoch": 0.46379873247551373, + "grad_norm": 21.404644012451172, + "learning_rate": 1.2967069724210278e-06, + "loss": 0.4087, + "num_input_tokens_seen": 22827200, + "step": 7245 + }, + { + "epoch": 0.4641188144164906, + "grad_norm": 31.973535537719727, + "learning_rate": 1.2956397358326609e-06, + "loss": 0.5265, + "num_input_tokens_seen": 22843264, + "step": 7250 + }, + { + "epoch": 0.4644388963574675, + "grad_norm": 39.217674255371094, + "learning_rate": 1.294572130134613e-06, + "loss": 0.3799, + "num_input_tokens_seen": 22858624, + "step": 7255 + }, + { + "epoch": 0.4647589782984444, + "grad_norm": 36.54713821411133, + "learning_rate": 1.2935041566598016e-06, + "loss": 0.5557, + "num_input_tokens_seen": 22873856, + "step": 7260 + }, + { + "epoch": 0.4650790602394213, + "grad_norm": 32.417545318603516, + "learning_rate": 1.2924358167416049e-06, + "loss": 0.356, + "num_input_tokens_seen": 22889600, + "step": 7265 + }, + { + "epoch": 0.4653991421803982, + "grad_norm": 24.408979415893555, + "learning_rate": 1.2913671117138572e-06, + "loss": 0.4007, + "num_input_tokens_seen": 22904704, + "step": 7270 + }, + { + "epoch": 0.46571922412137506, + "grad_norm": 22.64531898498535, + "learning_rate": 1.29029804291085e-06, + "loss": 0.3471, + "num_input_tokens_seen": 22920384, + "step": 7275 + }, + { + "epoch": 0.46603930606235194, + "grad_norm": 44.77216339111328, + "learning_rate": 1.2892286116673269e-06, + "loss": 0.3475, + "num_input_tokens_seen": 22937024, + "step": 7280 + }, + { + "epoch": 0.4663593880033289, + "grad_norm": 26.58623695373535, + "learning_rate": 1.2881588193184865e-06, + "loss": 0.4934, + "num_input_tokens_seen": 22954816, + "step": 7285 + }, + { + "epoch": 0.46667946994430576, + "grad_norm": 22.52194595336914, + "learning_rate": 1.287088667199977e-06, + "loss": 0.2918, + "num_input_tokens_seen": 22969472, + "step": 7290 + }, + { + "epoch": 0.46699955188528264, + "grad_norm": 22.330564498901367, + "learning_rate": 1.2860181566478956e-06, + "loss": 0.4681, + "num_input_tokens_seen": 22984192, + "step": 7295 + }, + { + "epoch": 0.4673196338262595, + "grad_norm": 13.149898529052734, + "learning_rate": 1.2849472889987874e-06, + "loss": 0.3868, + "num_input_tokens_seen": 22999680, + "step": 7300 + }, + { + "epoch": 0.4676397157672364, + "grad_norm": 27.509746551513672, + "learning_rate": 1.2838760655896431e-06, + "loss": 0.3784, + "num_input_tokens_seen": 23014720, + "step": 7305 + }, + { + "epoch": 0.4679597977082133, + "grad_norm": 35.98652648925781, + "learning_rate": 1.2828044877578983e-06, + "loss": 0.4544, + "num_input_tokens_seen": 23030528, + "step": 7310 + }, + { + "epoch": 0.4682798796491902, + "grad_norm": 26.335607528686523, + "learning_rate": 1.2817325568414297e-06, + "loss": 0.5205, + "num_input_tokens_seen": 23046784, + "step": 7315 + }, + { + "epoch": 0.4685999615901671, + "grad_norm": 26.756956100463867, + "learning_rate": 1.2806602741785562e-06, + "loss": 0.3379, + "num_input_tokens_seen": 23061632, + "step": 7320 + }, + { + "epoch": 0.46892004353114397, + "grad_norm": 17.465469360351562, + "learning_rate": 1.2795876411080346e-06, + "loss": 0.3202, + "num_input_tokens_seen": 23077888, + "step": 7325 + }, + { + "epoch": 0.46924012547212085, + "grad_norm": 24.94025993347168, + "learning_rate": 1.278514658969061e-06, + "loss": 0.3308, + "num_input_tokens_seen": 23093568, + "step": 7330 + }, + { + "epoch": 0.46956020741309773, + "grad_norm": 29.178998947143555, + "learning_rate": 1.2774413291012648e-06, + "loss": 0.5047, + "num_input_tokens_seen": 23108992, + "step": 7335 + }, + { + "epoch": 0.46988028935407467, + "grad_norm": 25.278213500976562, + "learning_rate": 1.2763676528447122e-06, + "loss": 0.4191, + "num_input_tokens_seen": 23124992, + "step": 7340 + }, + { + "epoch": 0.47020037129505154, + "grad_norm": 31.44306755065918, + "learning_rate": 1.2752936315399003e-06, + "loss": 0.3417, + "num_input_tokens_seen": 23141888, + "step": 7345 + }, + { + "epoch": 0.4705204532360284, + "grad_norm": 27.29042625427246, + "learning_rate": 1.2742192665277566e-06, + "loss": 0.3346, + "num_input_tokens_seen": 23157888, + "step": 7350 + }, + { + "epoch": 0.4708405351770053, + "grad_norm": 25.130107879638672, + "learning_rate": 1.2731445591496393e-06, + "loss": 0.2813, + "num_input_tokens_seen": 23172864, + "step": 7355 + }, + { + "epoch": 0.4711606171179822, + "grad_norm": 45.540672302246094, + "learning_rate": 1.2720695107473325e-06, + "loss": 0.4622, + "num_input_tokens_seen": 23188352, + "step": 7360 + }, + { + "epoch": 0.4714806990589591, + "grad_norm": 38.563602447509766, + "learning_rate": 1.2709941226630475e-06, + "loss": 0.3897, + "num_input_tokens_seen": 23204096, + "step": 7365 + }, + { + "epoch": 0.471800780999936, + "grad_norm": 27.982297897338867, + "learning_rate": 1.2699183962394182e-06, + "loss": 0.3513, + "num_input_tokens_seen": 23219072, + "step": 7370 + }, + { + "epoch": 0.4721208629409129, + "grad_norm": 15.643006324768066, + "learning_rate": 1.2688423328195021e-06, + "loss": 0.4198, + "num_input_tokens_seen": 23234560, + "step": 7375 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 62.19183349609375, + "learning_rate": 1.267765933746777e-06, + "loss": 0.3426, + "num_input_tokens_seen": 23250304, + "step": 7380 + }, + { + "epoch": 0.47276102682286664, + "grad_norm": 51.6485710144043, + "learning_rate": 1.2666892003651397e-06, + "loss": 0.6245, + "num_input_tokens_seen": 23265664, + "step": 7385 + }, + { + "epoch": 0.4730811087638435, + "grad_norm": 28.73395538330078, + "learning_rate": 1.2656121340189043e-06, + "loss": 0.442, + "num_input_tokens_seen": 23281472, + "step": 7390 + }, + { + "epoch": 0.47340119070482045, + "grad_norm": 28.408031463623047, + "learning_rate": 1.264534736052801e-06, + "loss": 0.411, + "num_input_tokens_seen": 23297024, + "step": 7395 + }, + { + "epoch": 0.47372127264579733, + "grad_norm": 41.88270950317383, + "learning_rate": 1.2634570078119739e-06, + "loss": 0.4385, + "num_input_tokens_seen": 23313344, + "step": 7400 + }, + { + "epoch": 0.4740413545867742, + "grad_norm": 27.301424026489258, + "learning_rate": 1.262378950641979e-06, + "loss": 0.5213, + "num_input_tokens_seen": 23328512, + "step": 7405 + }, + { + "epoch": 0.4743614365277511, + "grad_norm": 23.59923553466797, + "learning_rate": 1.2613005658887836e-06, + "loss": 0.4465, + "num_input_tokens_seen": 23342400, + "step": 7410 + }, + { + "epoch": 0.47468151846872797, + "grad_norm": 34.58885192871094, + "learning_rate": 1.2602218548987637e-06, + "loss": 0.4134, + "num_input_tokens_seen": 23358400, + "step": 7415 + }, + { + "epoch": 0.4750016004097049, + "grad_norm": 32.09384536743164, + "learning_rate": 1.2591428190187029e-06, + "loss": 0.4102, + "num_input_tokens_seen": 23373376, + "step": 7420 + }, + { + "epoch": 0.4753216823506818, + "grad_norm": 57.16767501831055, + "learning_rate": 1.2580634595957898e-06, + "loss": 0.5013, + "num_input_tokens_seen": 23390400, + "step": 7425 + }, + { + "epoch": 0.47564176429165866, + "grad_norm": 27.278974533081055, + "learning_rate": 1.2569837779776172e-06, + "loss": 0.3705, + "num_input_tokens_seen": 23406400, + "step": 7430 + }, + { + "epoch": 0.47596184623263554, + "grad_norm": 27.228130340576172, + "learning_rate": 1.2559037755121804e-06, + "loss": 0.3131, + "num_input_tokens_seen": 23421824, + "step": 7435 + }, + { + "epoch": 0.4762819281736124, + "grad_norm": 51.93519592285156, + "learning_rate": 1.2548234535478754e-06, + "loss": 0.4512, + "num_input_tokens_seen": 23438272, + "step": 7440 + }, + { + "epoch": 0.47660201011458936, + "grad_norm": 17.943632125854492, + "learning_rate": 1.2537428134334968e-06, + "loss": 0.4216, + "num_input_tokens_seen": 23454976, + "step": 7445 + }, + { + "epoch": 0.47692209205556624, + "grad_norm": 98.46037292480469, + "learning_rate": 1.252661856518236e-06, + "loss": 0.5189, + "num_input_tokens_seen": 23471168, + "step": 7450 + }, + { + "epoch": 0.4772421739965431, + "grad_norm": 28.342315673828125, + "learning_rate": 1.251580584151681e-06, + "loss": 0.3564, + "num_input_tokens_seen": 23486720, + "step": 7455 + }, + { + "epoch": 0.47756225593752, + "grad_norm": 21.639692306518555, + "learning_rate": 1.2504989976838129e-06, + "loss": 0.3059, + "num_input_tokens_seen": 23502912, + "step": 7460 + }, + { + "epoch": 0.4778823378784969, + "grad_norm": 26.391496658325195, + "learning_rate": 1.2494170984650048e-06, + "loss": 0.3667, + "num_input_tokens_seen": 23519552, + "step": 7465 + }, + { + "epoch": 0.4782024198194738, + "grad_norm": 31.00334930419922, + "learning_rate": 1.248334887846021e-06, + "loss": 0.4019, + "num_input_tokens_seen": 23535936, + "step": 7470 + }, + { + "epoch": 0.4785225017604507, + "grad_norm": 29.97296142578125, + "learning_rate": 1.2472523671780135e-06, + "loss": 0.4373, + "num_input_tokens_seen": 23551040, + "step": 7475 + }, + { + "epoch": 0.47884258370142757, + "grad_norm": 35.39260482788086, + "learning_rate": 1.2461695378125233e-06, + "loss": 0.3115, + "num_input_tokens_seen": 23566208, + "step": 7480 + }, + { + "epoch": 0.47916266564240445, + "grad_norm": 20.799793243408203, + "learning_rate": 1.245086401101474e-06, + "loss": 0.4197, + "num_input_tokens_seen": 23581696, + "step": 7485 + }, + { + "epoch": 0.47948274758338133, + "grad_norm": 69.36449432373047, + "learning_rate": 1.2440029583971757e-06, + "loss": 0.4454, + "num_input_tokens_seen": 23597248, + "step": 7490 + }, + { + "epoch": 0.4798028295243582, + "grad_norm": 16.190322875976562, + "learning_rate": 1.2429192110523188e-06, + "loss": 0.4913, + "num_input_tokens_seen": 23612800, + "step": 7495 + }, + { + "epoch": 0.48012291146533514, + "grad_norm": 28.28662109375, + "learning_rate": 1.2418351604199746e-06, + "loss": 0.3338, + "num_input_tokens_seen": 23629056, + "step": 7500 + }, + { + "epoch": 0.480442993406312, + "grad_norm": 39.906612396240234, + "learning_rate": 1.2407508078535934e-06, + "loss": 0.4447, + "num_input_tokens_seen": 23644352, + "step": 7505 + }, + { + "epoch": 0.4807630753472889, + "grad_norm": 25.87689208984375, + "learning_rate": 1.2396661547070017e-06, + "loss": 0.2785, + "num_input_tokens_seen": 23661120, + "step": 7510 + }, + { + "epoch": 0.4810831572882658, + "grad_norm": 18.180044174194336, + "learning_rate": 1.238581202334402e-06, + "loss": 0.3347, + "num_input_tokens_seen": 23677632, + "step": 7515 + }, + { + "epoch": 0.48140323922924266, + "grad_norm": 26.29235076904297, + "learning_rate": 1.2374959520903699e-06, + "loss": 0.3673, + "num_input_tokens_seen": 23693952, + "step": 7520 + }, + { + "epoch": 0.4817233211702196, + "grad_norm": 17.1253662109375, + "learning_rate": 1.2364104053298531e-06, + "loss": 0.3341, + "num_input_tokens_seen": 23708736, + "step": 7525 + }, + { + "epoch": 0.4820434031111965, + "grad_norm": 30.4875431060791, + "learning_rate": 1.2353245634081692e-06, + "loss": 0.3913, + "num_input_tokens_seen": 23724864, + "step": 7530 + }, + { + "epoch": 0.48236348505217336, + "grad_norm": 23.729246139526367, + "learning_rate": 1.2342384276810053e-06, + "loss": 0.4148, + "num_input_tokens_seen": 23740160, + "step": 7535 + }, + { + "epoch": 0.48268356699315024, + "grad_norm": 70.08629608154297, + "learning_rate": 1.233151999504414e-06, + "loss": 0.423, + "num_input_tokens_seen": 23755264, + "step": 7540 + }, + { + "epoch": 0.4830036489341271, + "grad_norm": 46.91286849975586, + "learning_rate": 1.232065280234814e-06, + "loss": 0.3317, + "num_input_tokens_seen": 23770112, + "step": 7545 + }, + { + "epoch": 0.48332373087510405, + "grad_norm": 24.17731285095215, + "learning_rate": 1.2309782712289867e-06, + "loss": 0.4189, + "num_input_tokens_seen": 23785536, + "step": 7550 + }, + { + "epoch": 0.48364381281608093, + "grad_norm": 50.58120346069336, + "learning_rate": 1.2298909738440758e-06, + "loss": 0.4307, + "num_input_tokens_seen": 23801280, + "step": 7555 + }, + { + "epoch": 0.4839638947570578, + "grad_norm": 39.50659942626953, + "learning_rate": 1.2288033894375847e-06, + "loss": 0.371, + "num_input_tokens_seen": 23816448, + "step": 7560 + }, + { + "epoch": 0.4842839766980347, + "grad_norm": 31.22879409790039, + "learning_rate": 1.2277155193673755e-06, + "loss": 0.5539, + "num_input_tokens_seen": 23832512, + "step": 7565 + }, + { + "epoch": 0.48460405863901157, + "grad_norm": 14.704495429992676, + "learning_rate": 1.2266273649916668e-06, + "loss": 0.3968, + "num_input_tokens_seen": 23848192, + "step": 7570 + }, + { + "epoch": 0.48492414057998845, + "grad_norm": 18.676654815673828, + "learning_rate": 1.2255389276690318e-06, + "loss": 0.4249, + "num_input_tokens_seen": 23863808, + "step": 7575 + }, + { + "epoch": 0.4852442225209654, + "grad_norm": 32.08503341674805, + "learning_rate": 1.2244502087583978e-06, + "loss": 0.2927, + "num_input_tokens_seen": 23880960, + "step": 7580 + }, + { + "epoch": 0.48556430446194226, + "grad_norm": 46.882720947265625, + "learning_rate": 1.2233612096190426e-06, + "loss": 0.3969, + "num_input_tokens_seen": 23896256, + "step": 7585 + }, + { + "epoch": 0.48588438640291914, + "grad_norm": 36.5152473449707, + "learning_rate": 1.222271931610595e-06, + "loss": 0.5189, + "num_input_tokens_seen": 23912832, + "step": 7590 + }, + { + "epoch": 0.486204468343896, + "grad_norm": 26.63950538635254, + "learning_rate": 1.2211823760930306e-06, + "loss": 0.4929, + "num_input_tokens_seen": 23928768, + "step": 7595 + }, + { + "epoch": 0.4865245502848729, + "grad_norm": 18.74747657775879, + "learning_rate": 1.2200925444266726e-06, + "loss": 0.4206, + "num_input_tokens_seen": 23945088, + "step": 7600 + }, + { + "epoch": 0.48684463222584984, + "grad_norm": 39.23282241821289, + "learning_rate": 1.219002437972189e-06, + "loss": 0.5087, + "num_input_tokens_seen": 23960192, + "step": 7605 + }, + { + "epoch": 0.4871647141668267, + "grad_norm": 31.527008056640625, + "learning_rate": 1.21791205809059e-06, + "loss": 0.4208, + "num_input_tokens_seen": 23977152, + "step": 7610 + }, + { + "epoch": 0.4874847961078036, + "grad_norm": 30.472713470458984, + "learning_rate": 1.2168214061432283e-06, + "loss": 0.3611, + "num_input_tokens_seen": 23992448, + "step": 7615 + }, + { + "epoch": 0.4878048780487805, + "grad_norm": 24.9169864654541, + "learning_rate": 1.2157304834917947e-06, + "loss": 0.4276, + "num_input_tokens_seen": 24008384, + "step": 7620 + }, + { + "epoch": 0.48812495998975736, + "grad_norm": 28.272476196289062, + "learning_rate": 1.2146392914983202e-06, + "loss": 0.6241, + "num_input_tokens_seen": 24025728, + "step": 7625 + }, + { + "epoch": 0.4884450419307343, + "grad_norm": 44.216453552246094, + "learning_rate": 1.2135478315251694e-06, + "loss": 0.5169, + "num_input_tokens_seen": 24040448, + "step": 7630 + }, + { + "epoch": 0.48876512387171117, + "grad_norm": 26.274669647216797, + "learning_rate": 1.2124561049350442e-06, + "loss": 0.3428, + "num_input_tokens_seen": 24055168, + "step": 7635 + }, + { + "epoch": 0.48908520581268805, + "grad_norm": 41.2357292175293, + "learning_rate": 1.2113641130909772e-06, + "loss": 0.453, + "num_input_tokens_seen": 24070016, + "step": 7640 + }, + { + "epoch": 0.48940528775366493, + "grad_norm": 58.80428695678711, + "learning_rate": 1.2102718573563334e-06, + "loss": 0.3108, + "num_input_tokens_seen": 24084800, + "step": 7645 + }, + { + "epoch": 0.4897253696946418, + "grad_norm": 53.14729309082031, + "learning_rate": 1.2091793390948066e-06, + "loss": 0.4842, + "num_input_tokens_seen": 24100416, + "step": 7650 + }, + { + "epoch": 0.49004545163561875, + "grad_norm": 17.676326751708984, + "learning_rate": 1.2080865596704191e-06, + "loss": 0.2906, + "num_input_tokens_seen": 24117120, + "step": 7655 + }, + { + "epoch": 0.4903655335765956, + "grad_norm": 30.914222717285156, + "learning_rate": 1.2069935204475187e-06, + "loss": 0.4391, + "num_input_tokens_seen": 24132224, + "step": 7660 + }, + { + "epoch": 0.4906856155175725, + "grad_norm": 23.044315338134766, + "learning_rate": 1.2059002227907776e-06, + "loss": 0.3992, + "num_input_tokens_seen": 24147712, + "step": 7665 + }, + { + "epoch": 0.4910056974585494, + "grad_norm": 37.006168365478516, + "learning_rate": 1.2048066680651908e-06, + "loss": 0.4121, + "num_input_tokens_seen": 24164288, + "step": 7670 + }, + { + "epoch": 0.49132577939952626, + "grad_norm": 37.811988830566406, + "learning_rate": 1.2037128576360743e-06, + "loss": 0.5577, + "num_input_tokens_seen": 24193728, + "step": 7675 + }, + { + "epoch": 0.49164586134050314, + "grad_norm": 36.05268478393555, + "learning_rate": 1.2026187928690627e-06, + "loss": 0.4148, + "num_input_tokens_seen": 24208832, + "step": 7680 + }, + { + "epoch": 0.4919659432814801, + "grad_norm": 34.80404281616211, + "learning_rate": 1.2015244751301098e-06, + "loss": 0.5085, + "num_input_tokens_seen": 24223424, + "step": 7685 + }, + { + "epoch": 0.49228602522245696, + "grad_norm": 47.47758865356445, + "learning_rate": 1.2004299057854832e-06, + "loss": 0.43, + "num_input_tokens_seen": 24238976, + "step": 7690 + }, + { + "epoch": 0.49260610716343384, + "grad_norm": 22.682682037353516, + "learning_rate": 1.1993350862017661e-06, + "loss": 0.3893, + "num_input_tokens_seen": 24253632, + "step": 7695 + }, + { + "epoch": 0.4929261891044107, + "grad_norm": 35.10201644897461, + "learning_rate": 1.1982400177458534e-06, + "loss": 0.3968, + "num_input_tokens_seen": 24270720, + "step": 7700 + }, + { + "epoch": 0.4932462710453876, + "grad_norm": 34.98603820800781, + "learning_rate": 1.197144701784951e-06, + "loss": 0.4284, + "num_input_tokens_seen": 24285312, + "step": 7705 + }, + { + "epoch": 0.49356635298636453, + "grad_norm": 32.93339157104492, + "learning_rate": 1.1960491396865735e-06, + "loss": 0.3926, + "num_input_tokens_seen": 24300352, + "step": 7710 + }, + { + "epoch": 0.4938864349273414, + "grad_norm": 27.799358367919922, + "learning_rate": 1.1949533328185435e-06, + "loss": 0.3458, + "num_input_tokens_seen": 24317056, + "step": 7715 + }, + { + "epoch": 0.4942065168683183, + "grad_norm": 25.46038818359375, + "learning_rate": 1.1938572825489883e-06, + "loss": 0.3741, + "num_input_tokens_seen": 24333184, + "step": 7720 + }, + { + "epoch": 0.49452659880929517, + "grad_norm": 29.320058822631836, + "learning_rate": 1.1927609902463394e-06, + "loss": 0.409, + "num_input_tokens_seen": 24348672, + "step": 7725 + }, + { + "epoch": 0.49484668075027205, + "grad_norm": 44.419612884521484, + "learning_rate": 1.1916644572793314e-06, + "loss": 0.4346, + "num_input_tokens_seen": 24363648, + "step": 7730 + }, + { + "epoch": 0.495166762691249, + "grad_norm": 74.09778594970703, + "learning_rate": 1.190567685016998e-06, + "loss": 0.4964, + "num_input_tokens_seen": 24380992, + "step": 7735 + }, + { + "epoch": 0.49548684463222586, + "grad_norm": 27.674976348876953, + "learning_rate": 1.189470674828672e-06, + "loss": 0.4107, + "num_input_tokens_seen": 24395776, + "step": 7740 + }, + { + "epoch": 0.49580692657320274, + "grad_norm": 25.768115997314453, + "learning_rate": 1.188373428083984e-06, + "loss": 0.3878, + "num_input_tokens_seen": 24411584, + "step": 7745 + }, + { + "epoch": 0.4961270085141796, + "grad_norm": 44.345550537109375, + "learning_rate": 1.1872759461528596e-06, + "loss": 0.5219, + "num_input_tokens_seen": 24426560, + "step": 7750 + }, + { + "epoch": 0.4964470904551565, + "grad_norm": 13.35042667388916, + "learning_rate": 1.1861782304055174e-06, + "loss": 0.39, + "num_input_tokens_seen": 24441856, + "step": 7755 + }, + { + "epoch": 0.4967671723961334, + "grad_norm": 18.407421112060547, + "learning_rate": 1.1850802822124686e-06, + "loss": 0.3345, + "num_input_tokens_seen": 24457472, + "step": 7760 + }, + { + "epoch": 0.4970872543371103, + "grad_norm": 57.33185577392578, + "learning_rate": 1.1839821029445143e-06, + "loss": 0.5005, + "num_input_tokens_seen": 24471936, + "step": 7765 + }, + { + "epoch": 0.4974073362780872, + "grad_norm": 35.684871673583984, + "learning_rate": 1.1828836939727442e-06, + "loss": 0.3195, + "num_input_tokens_seen": 24487616, + "step": 7770 + }, + { + "epoch": 0.4977274182190641, + "grad_norm": 39.44476318359375, + "learning_rate": 1.181785056668535e-06, + "loss": 0.433, + "num_input_tokens_seen": 24503936, + "step": 7775 + }, + { + "epoch": 0.49804750016004096, + "grad_norm": 31.5116024017334, + "learning_rate": 1.180686192403548e-06, + "loss": 0.4212, + "num_input_tokens_seen": 24518464, + "step": 7780 + }, + { + "epoch": 0.49836758210101784, + "grad_norm": 69.69412231445312, + "learning_rate": 1.1795871025497285e-06, + "loss": 0.3439, + "num_input_tokens_seen": 24533184, + "step": 7785 + }, + { + "epoch": 0.49868766404199477, + "grad_norm": 33.76158905029297, + "learning_rate": 1.1784877884793029e-06, + "loss": 0.4122, + "num_input_tokens_seen": 24548992, + "step": 7790 + }, + { + "epoch": 0.49900774598297165, + "grad_norm": 32.13736343383789, + "learning_rate": 1.1773882515647776e-06, + "loss": 0.3627, + "num_input_tokens_seen": 24566592, + "step": 7795 + }, + { + "epoch": 0.49932782792394853, + "grad_norm": 26.241132736206055, + "learning_rate": 1.1762884931789376e-06, + "loss": 0.4811, + "num_input_tokens_seen": 24583552, + "step": 7800 + }, + { + "epoch": 0.4996479098649254, + "grad_norm": 15.578927040100098, + "learning_rate": 1.1751885146948436e-06, + "loss": 0.4548, + "num_input_tokens_seen": 24599552, + "step": 7805 + }, + { + "epoch": 0.4999679918059023, + "grad_norm": 34.21600341796875, + "learning_rate": 1.1740883174858327e-06, + "loss": 0.3633, + "num_input_tokens_seen": 24614912, + "step": 7810 + }, + { + "epoch": 0.5002880737468792, + "grad_norm": 33.92721939086914, + "learning_rate": 1.1729879029255127e-06, + "loss": 0.3649, + "num_input_tokens_seen": 24629696, + "step": 7815 + }, + { + "epoch": 0.5006081556878561, + "grad_norm": 32.14542007446289, + "learning_rate": 1.171887272387765e-06, + "loss": 0.3939, + "num_input_tokens_seen": 24646208, + "step": 7820 + }, + { + "epoch": 0.5006081556878561, + "eval_loss": 0.4134162962436676, + "eval_runtime": 49.1457, + "eval_samples_per_second": 282.548, + "eval_steps_per_second": 35.324, + "num_input_tokens_seen": 24646208, + "step": 7820 + }, + { + "epoch": 0.500928237628833, + "grad_norm": 79.38529205322266, + "learning_rate": 1.1707864272467397e-06, + "loss": 0.4985, + "num_input_tokens_seen": 24661120, + "step": 7825 + }, + { + "epoch": 0.5012483195698099, + "grad_norm": 39.66872024536133, + "learning_rate": 1.169685368876855e-06, + "loss": 0.423, + "num_input_tokens_seen": 24678336, + "step": 7830 + }, + { + "epoch": 0.5015684015107867, + "grad_norm": 61.929866790771484, + "learning_rate": 1.1685840986527946e-06, + "loss": 0.5534, + "num_input_tokens_seen": 24694336, + "step": 7835 + }, + { + "epoch": 0.5018884834517636, + "grad_norm": 36.273685455322266, + "learning_rate": 1.1674826179495076e-06, + "loss": 0.4044, + "num_input_tokens_seen": 24708608, + "step": 7840 + }, + { + "epoch": 0.5022085653927405, + "grad_norm": 33.48814010620117, + "learning_rate": 1.1663809281422056e-06, + "loss": 0.415, + "num_input_tokens_seen": 24724672, + "step": 7845 + }, + { + "epoch": 0.5025286473337174, + "grad_norm": 42.979496002197266, + "learning_rate": 1.1652790306063615e-06, + "loss": 0.4562, + "num_input_tokens_seen": 24740608, + "step": 7850 + }, + { + "epoch": 0.5028487292746944, + "grad_norm": 37.959041595458984, + "learning_rate": 1.164176926717707e-06, + "loss": 0.416, + "num_input_tokens_seen": 24758528, + "step": 7855 + }, + { + "epoch": 0.5031688112156713, + "grad_norm": 23.2774658203125, + "learning_rate": 1.1630746178522315e-06, + "loss": 0.3702, + "num_input_tokens_seen": 24772992, + "step": 7860 + }, + { + "epoch": 0.5034888931566481, + "grad_norm": 27.682905197143555, + "learning_rate": 1.1619721053861816e-06, + "loss": 0.4398, + "num_input_tokens_seen": 24788160, + "step": 7865 + }, + { + "epoch": 0.503808975097625, + "grad_norm": 19.770153045654297, + "learning_rate": 1.1608693906960558e-06, + "loss": 0.4093, + "num_input_tokens_seen": 24804224, + "step": 7870 + }, + { + "epoch": 0.5041290570386019, + "grad_norm": 30.391685485839844, + "learning_rate": 1.1597664751586069e-06, + "loss": 0.4426, + "num_input_tokens_seen": 24820928, + "step": 7875 + }, + { + "epoch": 0.5044491389795788, + "grad_norm": 49.482810974121094, + "learning_rate": 1.1586633601508382e-06, + "loss": 0.3837, + "num_input_tokens_seen": 24835776, + "step": 7880 + }, + { + "epoch": 0.5047692209205557, + "grad_norm": 46.44161605834961, + "learning_rate": 1.1575600470500014e-06, + "loss": 0.3858, + "num_input_tokens_seen": 24851648, + "step": 7885 + }, + { + "epoch": 0.5050893028615325, + "grad_norm": 59.1083869934082, + "learning_rate": 1.1564565372335957e-06, + "loss": 0.42, + "num_input_tokens_seen": 24866880, + "step": 7890 + }, + { + "epoch": 0.5054093848025094, + "grad_norm": 41.57418441772461, + "learning_rate": 1.1553528320793663e-06, + "loss": 0.3162, + "num_input_tokens_seen": 24881856, + "step": 7895 + }, + { + "epoch": 0.5057294667434863, + "grad_norm": 23.643510818481445, + "learning_rate": 1.1542489329653022e-06, + "loss": 0.4364, + "num_input_tokens_seen": 24898560, + "step": 7900 + }, + { + "epoch": 0.5060495486844632, + "grad_norm": 25.241592407226562, + "learning_rate": 1.1531448412696343e-06, + "loss": 0.3754, + "num_input_tokens_seen": 24913216, + "step": 7905 + }, + { + "epoch": 0.5063696306254402, + "grad_norm": 21.214923858642578, + "learning_rate": 1.1520405583708337e-06, + "loss": 0.4913, + "num_input_tokens_seen": 24928832, + "step": 7910 + }, + { + "epoch": 0.506689712566417, + "grad_norm": 33.57106018066406, + "learning_rate": 1.1509360856476109e-06, + "loss": 0.4917, + "num_input_tokens_seen": 24944512, + "step": 7915 + }, + { + "epoch": 0.5070097945073939, + "grad_norm": 37.114646911621094, + "learning_rate": 1.149831424478913e-06, + "loss": 0.4612, + "num_input_tokens_seen": 24959744, + "step": 7920 + }, + { + "epoch": 0.5073298764483708, + "grad_norm": 62.12904357910156, + "learning_rate": 1.1487265762439224e-06, + "loss": 0.3948, + "num_input_tokens_seen": 24975488, + "step": 7925 + }, + { + "epoch": 0.5076499583893477, + "grad_norm": 40.3009033203125, + "learning_rate": 1.1476215423220547e-06, + "loss": 0.362, + "num_input_tokens_seen": 24990272, + "step": 7930 + }, + { + "epoch": 0.5079700403303246, + "grad_norm": 39.82942199707031, + "learning_rate": 1.146516324092959e-06, + "loss": 0.3761, + "num_input_tokens_seen": 25006272, + "step": 7935 + }, + { + "epoch": 0.5082901222713014, + "grad_norm": 23.33016014099121, + "learning_rate": 1.1454109229365117e-06, + "loss": 0.2954, + "num_input_tokens_seen": 25022464, + "step": 7940 + }, + { + "epoch": 0.5086102042122783, + "grad_norm": 27.223312377929688, + "learning_rate": 1.14430534023282e-06, + "loss": 0.3132, + "num_input_tokens_seen": 25037376, + "step": 7945 + }, + { + "epoch": 0.5089302861532552, + "grad_norm": 36.93307876586914, + "learning_rate": 1.1431995773622167e-06, + "loss": 0.4736, + "num_input_tokens_seen": 25053440, + "step": 7950 + }, + { + "epoch": 0.5092503680942321, + "grad_norm": 21.982830047607422, + "learning_rate": 1.1420936357052597e-06, + "loss": 0.4369, + "num_input_tokens_seen": 25069120, + "step": 7955 + }, + { + "epoch": 0.5095704500352091, + "grad_norm": 22.12405014038086, + "learning_rate": 1.1409875166427303e-06, + "loss": 0.3078, + "num_input_tokens_seen": 25084224, + "step": 7960 + }, + { + "epoch": 0.509890531976186, + "grad_norm": 37.66783142089844, + "learning_rate": 1.1398812215556308e-06, + "loss": 0.4996, + "num_input_tokens_seen": 25099520, + "step": 7965 + }, + { + "epoch": 0.5102106139171628, + "grad_norm": 28.573827743530273, + "learning_rate": 1.1387747518251837e-06, + "loss": 0.362, + "num_input_tokens_seen": 25115200, + "step": 7970 + }, + { + "epoch": 0.5105306958581397, + "grad_norm": 20.292476654052734, + "learning_rate": 1.13766810883283e-06, + "loss": 0.3266, + "num_input_tokens_seen": 25131520, + "step": 7975 + }, + { + "epoch": 0.5108507777991166, + "grad_norm": 36.63866424560547, + "learning_rate": 1.1365612939602255e-06, + "loss": 0.5172, + "num_input_tokens_seen": 25147776, + "step": 7980 + }, + { + "epoch": 0.5111708597400935, + "grad_norm": 22.338659286499023, + "learning_rate": 1.1354543085892423e-06, + "loss": 0.3683, + "num_input_tokens_seen": 25162816, + "step": 7985 + }, + { + "epoch": 0.5114909416810703, + "grad_norm": 34.683868408203125, + "learning_rate": 1.1343471541019646e-06, + "loss": 0.3333, + "num_input_tokens_seen": 25178752, + "step": 7990 + }, + { + "epoch": 0.5118110236220472, + "grad_norm": 57.14018249511719, + "learning_rate": 1.1332398318806872e-06, + "loss": 0.3719, + "num_input_tokens_seen": 25194048, + "step": 7995 + }, + { + "epoch": 0.5121311055630241, + "grad_norm": 32.1242561340332, + "learning_rate": 1.1321323433079158e-06, + "loss": 0.3796, + "num_input_tokens_seen": 25209216, + "step": 8000 + }, + { + "epoch": 0.512451187504001, + "grad_norm": 28.248655319213867, + "learning_rate": 1.1310246897663623e-06, + "loss": 0.379, + "num_input_tokens_seen": 25224640, + "step": 8005 + }, + { + "epoch": 0.5127712694449779, + "grad_norm": 19.069774627685547, + "learning_rate": 1.1299168726389447e-06, + "loss": 0.408, + "num_input_tokens_seen": 25239808, + "step": 8010 + }, + { + "epoch": 0.5130913513859549, + "grad_norm": 42.42983627319336, + "learning_rate": 1.1288088933087868e-06, + "loss": 0.3354, + "num_input_tokens_seen": 25257344, + "step": 8015 + }, + { + "epoch": 0.5134114333269317, + "grad_norm": 22.4074764251709, + "learning_rate": 1.1277007531592127e-06, + "loss": 0.3365, + "num_input_tokens_seen": 25272064, + "step": 8020 + }, + { + "epoch": 0.5137315152679086, + "grad_norm": 28.663759231567383, + "learning_rate": 1.1265924535737492e-06, + "loss": 0.3619, + "num_input_tokens_seen": 25287936, + "step": 8025 + }, + { + "epoch": 0.5140515972088855, + "grad_norm": 39.256492614746094, + "learning_rate": 1.125483995936121e-06, + "loss": 0.3007, + "num_input_tokens_seen": 25303232, + "step": 8030 + }, + { + "epoch": 0.5143716791498624, + "grad_norm": 20.142274856567383, + "learning_rate": 1.1243753816302507e-06, + "loss": 0.376, + "num_input_tokens_seen": 25318656, + "step": 8035 + }, + { + "epoch": 0.5146917610908393, + "grad_norm": 46.976951599121094, + "learning_rate": 1.1232666120402558e-06, + "loss": 0.417, + "num_input_tokens_seen": 25333760, + "step": 8040 + }, + { + "epoch": 0.5150118430318161, + "grad_norm": 35.951576232910156, + "learning_rate": 1.1221576885504487e-06, + "loss": 0.3827, + "num_input_tokens_seen": 25349824, + "step": 8045 + }, + { + "epoch": 0.515331924972793, + "grad_norm": 19.6291561126709, + "learning_rate": 1.121048612545333e-06, + "loss": 0.4027, + "num_input_tokens_seen": 25365376, + "step": 8050 + }, + { + "epoch": 0.5156520069137699, + "grad_norm": 44.66822052001953, + "learning_rate": 1.1199393854096034e-06, + "loss": 0.4599, + "num_input_tokens_seen": 25380928, + "step": 8055 + }, + { + "epoch": 0.5159720888547468, + "grad_norm": 79.27295684814453, + "learning_rate": 1.118830008528143e-06, + "loss": 0.3487, + "num_input_tokens_seen": 25396352, + "step": 8060 + }, + { + "epoch": 0.5162921707957238, + "grad_norm": 21.75312042236328, + "learning_rate": 1.1177204832860212e-06, + "loss": 0.3159, + "num_input_tokens_seen": 25411456, + "step": 8065 + }, + { + "epoch": 0.5166122527367006, + "grad_norm": 19.3381290435791, + "learning_rate": 1.1166108110684947e-06, + "loss": 0.4322, + "num_input_tokens_seen": 25428544, + "step": 8070 + }, + { + "epoch": 0.5169323346776775, + "grad_norm": 37.30630111694336, + "learning_rate": 1.1155009932610003e-06, + "loss": 0.3988, + "num_input_tokens_seen": 25443968, + "step": 8075 + }, + { + "epoch": 0.5172524166186544, + "grad_norm": 45.22068786621094, + "learning_rate": 1.1143910312491605e-06, + "loss": 0.3273, + "num_input_tokens_seen": 25458880, + "step": 8080 + }, + { + "epoch": 0.5175724985596313, + "grad_norm": 53.44335174560547, + "learning_rate": 1.1132809264187748e-06, + "loss": 0.3196, + "num_input_tokens_seen": 25474304, + "step": 8085 + }, + { + "epoch": 0.5178925805006082, + "grad_norm": 59.70965576171875, + "learning_rate": 1.1121706801558226e-06, + "loss": 0.3884, + "num_input_tokens_seen": 25489472, + "step": 8090 + }, + { + "epoch": 0.518212662441585, + "grad_norm": 44.1774787902832, + "learning_rate": 1.111060293846459e-06, + "loss": 0.3827, + "num_input_tokens_seen": 25504896, + "step": 8095 + }, + { + "epoch": 0.5185327443825619, + "grad_norm": 79.03081512451172, + "learning_rate": 1.1099497688770148e-06, + "loss": 0.4807, + "num_input_tokens_seen": 25519360, + "step": 8100 + }, + { + "epoch": 0.5188528263235388, + "grad_norm": 35.3879280090332, + "learning_rate": 1.1088391066339928e-06, + "loss": 0.4418, + "num_input_tokens_seen": 25535680, + "step": 8105 + }, + { + "epoch": 0.5191729082645157, + "grad_norm": 43.35395050048828, + "learning_rate": 1.1077283085040684e-06, + "loss": 0.5327, + "num_input_tokens_seen": 25550592, + "step": 8110 + }, + { + "epoch": 0.5194929902054926, + "grad_norm": 39.26498031616211, + "learning_rate": 1.1066173758740863e-06, + "loss": 0.4083, + "num_input_tokens_seen": 25565696, + "step": 8115 + }, + { + "epoch": 0.5198130721464695, + "grad_norm": 17.995386123657227, + "learning_rate": 1.105506310131058e-06, + "loss": 0.3485, + "num_input_tokens_seen": 25581568, + "step": 8120 + }, + { + "epoch": 0.5201331540874464, + "grad_norm": 56.82388687133789, + "learning_rate": 1.1043951126621634e-06, + "loss": 0.466, + "num_input_tokens_seen": 25597760, + "step": 8125 + }, + { + "epoch": 0.5204532360284233, + "grad_norm": 31.271780014038086, + "learning_rate": 1.1032837848547445e-06, + "loss": 0.4111, + "num_input_tokens_seen": 25615424, + "step": 8130 + }, + { + "epoch": 0.5207733179694002, + "grad_norm": 33.19522476196289, + "learning_rate": 1.1021723280963074e-06, + "loss": 0.4094, + "num_input_tokens_seen": 25630720, + "step": 8135 + }, + { + "epoch": 0.5210933999103771, + "grad_norm": 40.24439239501953, + "learning_rate": 1.1010607437745194e-06, + "loss": 0.4886, + "num_input_tokens_seen": 25649280, + "step": 8140 + }, + { + "epoch": 0.5214134818513539, + "grad_norm": 49.17844009399414, + "learning_rate": 1.0999490332772057e-06, + "loss": 0.5002, + "num_input_tokens_seen": 25664576, + "step": 8145 + }, + { + "epoch": 0.5217335637923308, + "grad_norm": 26.123889923095703, + "learning_rate": 1.0988371979923507e-06, + "loss": 0.4193, + "num_input_tokens_seen": 25680384, + "step": 8150 + }, + { + "epoch": 0.5220536457333077, + "grad_norm": 26.953947067260742, + "learning_rate": 1.097725239308094e-06, + "loss": 0.4017, + "num_input_tokens_seen": 25696128, + "step": 8155 + }, + { + "epoch": 0.5223737276742846, + "grad_norm": 15.423673629760742, + "learning_rate": 1.0966131586127278e-06, + "loss": 0.2794, + "num_input_tokens_seen": 25712768, + "step": 8160 + }, + { + "epoch": 0.5226938096152615, + "grad_norm": 25.20142936706543, + "learning_rate": 1.0955009572946992e-06, + "loss": 0.4033, + "num_input_tokens_seen": 25727616, + "step": 8165 + }, + { + "epoch": 0.5230138915562383, + "grad_norm": 22.9870548248291, + "learning_rate": 1.094388636742604e-06, + "loss": 0.4149, + "num_input_tokens_seen": 25744384, + "step": 8170 + }, + { + "epoch": 0.5233339734972153, + "grad_norm": 31.26616859436035, + "learning_rate": 1.0932761983451878e-06, + "loss": 0.3376, + "num_input_tokens_seen": 25760640, + "step": 8175 + }, + { + "epoch": 0.5236540554381922, + "grad_norm": 32.35393142700195, + "learning_rate": 1.0921636434913425e-06, + "loss": 0.3116, + "num_input_tokens_seen": 25776640, + "step": 8180 + }, + { + "epoch": 0.5239741373791691, + "grad_norm": 26.09176254272461, + "learning_rate": 1.091050973570106e-06, + "loss": 0.2977, + "num_input_tokens_seen": 25791744, + "step": 8185 + }, + { + "epoch": 0.524294219320146, + "grad_norm": 49.68628692626953, + "learning_rate": 1.08993818997066e-06, + "loss": 0.5531, + "num_input_tokens_seen": 25808256, + "step": 8190 + }, + { + "epoch": 0.5246143012611229, + "grad_norm": 36.49836730957031, + "learning_rate": 1.0888252940823283e-06, + "loss": 0.4378, + "num_input_tokens_seen": 25824128, + "step": 8195 + }, + { + "epoch": 0.5249343832020997, + "grad_norm": 39.86119842529297, + "learning_rate": 1.0877122872945737e-06, + "loss": 0.4676, + "num_input_tokens_seen": 25840576, + "step": 8200 + }, + { + "epoch": 0.5252544651430766, + "grad_norm": 32.07432556152344, + "learning_rate": 1.0865991709969983e-06, + "loss": 0.317, + "num_input_tokens_seen": 25856256, + "step": 8205 + }, + { + "epoch": 0.5255745470840535, + "grad_norm": 20.993459701538086, + "learning_rate": 1.0854859465793416e-06, + "loss": 0.4482, + "num_input_tokens_seen": 25871424, + "step": 8210 + }, + { + "epoch": 0.5258946290250304, + "grad_norm": 33.609657287597656, + "learning_rate": 1.0843726154314767e-06, + "loss": 0.4974, + "num_input_tokens_seen": 25886272, + "step": 8215 + }, + { + "epoch": 0.5262147109660072, + "grad_norm": 30.594623565673828, + "learning_rate": 1.083259178943411e-06, + "loss": 0.4376, + "num_input_tokens_seen": 25901952, + "step": 8220 + }, + { + "epoch": 0.5265347929069842, + "grad_norm": 20.63231086730957, + "learning_rate": 1.0821456385052822e-06, + "loss": 0.3694, + "num_input_tokens_seen": 25917888, + "step": 8225 + }, + { + "epoch": 0.5268548748479611, + "grad_norm": 46.33021545410156, + "learning_rate": 1.0810319955073598e-06, + "loss": 0.4199, + "num_input_tokens_seen": 25933824, + "step": 8230 + }, + { + "epoch": 0.527174956788938, + "grad_norm": 36.321929931640625, + "learning_rate": 1.0799182513400393e-06, + "loss": 0.3888, + "num_input_tokens_seen": 25951360, + "step": 8235 + }, + { + "epoch": 0.5274950387299149, + "grad_norm": 37.35638427734375, + "learning_rate": 1.0788044073938438e-06, + "loss": 0.3594, + "num_input_tokens_seen": 25967232, + "step": 8240 + }, + { + "epoch": 0.5278151206708918, + "grad_norm": 37.84722900390625, + "learning_rate": 1.0776904650594205e-06, + "loss": 0.4146, + "num_input_tokens_seen": 25982592, + "step": 8245 + }, + { + "epoch": 0.5281352026118686, + "grad_norm": 67.66139221191406, + "learning_rate": 1.0765764257275394e-06, + "loss": 0.4094, + "num_input_tokens_seen": 25997824, + "step": 8250 + }, + { + "epoch": 0.5284552845528455, + "grad_norm": 32.80574035644531, + "learning_rate": 1.0754622907890914e-06, + "loss": 0.4292, + "num_input_tokens_seen": 26013632, + "step": 8255 + }, + { + "epoch": 0.5287753664938224, + "grad_norm": 28.530445098876953, + "learning_rate": 1.0743480616350873e-06, + "loss": 0.3249, + "num_input_tokens_seen": 26028800, + "step": 8260 + }, + { + "epoch": 0.5290954484347993, + "grad_norm": 30.938467025756836, + "learning_rate": 1.0732337396566558e-06, + "loss": 0.339, + "num_input_tokens_seen": 26044672, + "step": 8265 + }, + { + "epoch": 0.5294155303757762, + "grad_norm": 20.649280548095703, + "learning_rate": 1.07211932624504e-06, + "loss": 0.396, + "num_input_tokens_seen": 26060544, + "step": 8270 + }, + { + "epoch": 0.529735612316753, + "grad_norm": 18.15691566467285, + "learning_rate": 1.0710048227915988e-06, + "loss": 0.3786, + "num_input_tokens_seen": 26076160, + "step": 8275 + }, + { + "epoch": 0.53005569425773, + "grad_norm": 24.960102081298828, + "learning_rate": 1.0698902306878024e-06, + "loss": 0.4186, + "num_input_tokens_seen": 26092352, + "step": 8280 + }, + { + "epoch": 0.5303757761987069, + "grad_norm": 25.81612205505371, + "learning_rate": 1.0687755513252325e-06, + "loss": 0.3024, + "num_input_tokens_seen": 26107776, + "step": 8285 + }, + { + "epoch": 0.5306958581396838, + "grad_norm": 11.139862060546875, + "learning_rate": 1.0676607860955794e-06, + "loss": 0.31, + "num_input_tokens_seen": 26123712, + "step": 8290 + }, + { + "epoch": 0.5310159400806607, + "grad_norm": 42.41699981689453, + "learning_rate": 1.0665459363906404e-06, + "loss": 0.386, + "num_input_tokens_seen": 26139200, + "step": 8295 + }, + { + "epoch": 0.5313360220216375, + "grad_norm": 23.389768600463867, + "learning_rate": 1.0654310036023185e-06, + "loss": 0.4355, + "num_input_tokens_seen": 26153600, + "step": 8300 + }, + { + "epoch": 0.5316561039626144, + "grad_norm": 19.833234786987305, + "learning_rate": 1.0643159891226203e-06, + "loss": 0.4206, + "num_input_tokens_seen": 26169600, + "step": 8305 + }, + { + "epoch": 0.5319761859035913, + "grad_norm": 33.841224670410156, + "learning_rate": 1.0632008943436545e-06, + "loss": 0.3398, + "num_input_tokens_seen": 26185536, + "step": 8310 + }, + { + "epoch": 0.5322962678445682, + "grad_norm": 17.150596618652344, + "learning_rate": 1.0620857206576299e-06, + "loss": 0.453, + "num_input_tokens_seen": 26201536, + "step": 8315 + }, + { + "epoch": 0.5326163497855451, + "grad_norm": 14.26513957977295, + "learning_rate": 1.0609704694568546e-06, + "loss": 0.2888, + "num_input_tokens_seen": 26216576, + "step": 8320 + }, + { + "epoch": 0.5329364317265219, + "grad_norm": 23.111820220947266, + "learning_rate": 1.0598551421337318e-06, + "loss": 0.2904, + "num_input_tokens_seen": 26232640, + "step": 8325 + }, + { + "epoch": 0.5332565136674989, + "grad_norm": 20.46584701538086, + "learning_rate": 1.0587397400807617e-06, + "loss": 0.5146, + "num_input_tokens_seen": 26248448, + "step": 8330 + }, + { + "epoch": 0.5335765956084758, + "grad_norm": 36.023284912109375, + "learning_rate": 1.057624264690536e-06, + "loss": 0.519, + "num_input_tokens_seen": 26263872, + "step": 8335 + }, + { + "epoch": 0.5338966775494527, + "grad_norm": 36.1595573425293, + "learning_rate": 1.0565087173557394e-06, + "loss": 0.4598, + "num_input_tokens_seen": 26279872, + "step": 8340 + }, + { + "epoch": 0.5342167594904296, + "grad_norm": 24.1319580078125, + "learning_rate": 1.055393099469146e-06, + "loss": 0.3428, + "num_input_tokens_seen": 26295680, + "step": 8345 + }, + { + "epoch": 0.5345368414314065, + "grad_norm": 34.465797424316406, + "learning_rate": 1.054277412423617e-06, + "loss": 0.4057, + "num_input_tokens_seen": 26311040, + "step": 8350 + }, + { + "epoch": 0.5348569233723833, + "grad_norm": 24.986618041992188, + "learning_rate": 1.0531616576121017e-06, + "loss": 0.4603, + "num_input_tokens_seen": 26326144, + "step": 8355 + }, + { + "epoch": 0.5351770053133602, + "grad_norm": 25.222026824951172, + "learning_rate": 1.0520458364276325e-06, + "loss": 0.3347, + "num_input_tokens_seen": 26341952, + "step": 8360 + }, + { + "epoch": 0.5354970872543371, + "grad_norm": 37.025054931640625, + "learning_rate": 1.0509299502633256e-06, + "loss": 0.3565, + "num_input_tokens_seen": 26356672, + "step": 8365 + }, + { + "epoch": 0.535817169195314, + "grad_norm": 20.434568405151367, + "learning_rate": 1.0498140005123777e-06, + "loss": 0.4493, + "num_input_tokens_seen": 26373056, + "step": 8370 + }, + { + "epoch": 0.5361372511362908, + "grad_norm": 12.433558464050293, + "learning_rate": 1.0486979885680653e-06, + "loss": 0.426, + "num_input_tokens_seen": 26388032, + "step": 8375 + }, + { + "epoch": 0.5364573330772677, + "grad_norm": 54.505035400390625, + "learning_rate": 1.0475819158237424e-06, + "loss": 0.4115, + "num_input_tokens_seen": 26402880, + "step": 8380 + }, + { + "epoch": 0.5367774150182447, + "grad_norm": 22.174421310424805, + "learning_rate": 1.0464657836728389e-06, + "loss": 0.4713, + "num_input_tokens_seen": 26419328, + "step": 8385 + }, + { + "epoch": 0.5370974969592216, + "grad_norm": 33.491397857666016, + "learning_rate": 1.045349593508859e-06, + "loss": 0.3981, + "num_input_tokens_seen": 26434112, + "step": 8390 + }, + { + "epoch": 0.5374175789001985, + "grad_norm": 22.270578384399414, + "learning_rate": 1.0442333467253788e-06, + "loss": 0.297, + "num_input_tokens_seen": 26450688, + "step": 8395 + }, + { + "epoch": 0.5377376608411754, + "grad_norm": 32.83494186401367, + "learning_rate": 1.0431170447160463e-06, + "loss": 0.3602, + "num_input_tokens_seen": 26466368, + "step": 8400 + }, + { + "epoch": 0.5380577427821522, + "grad_norm": 21.519004821777344, + "learning_rate": 1.0420006888745767e-06, + "loss": 0.3495, + "num_input_tokens_seen": 26482624, + "step": 8405 + }, + { + "epoch": 0.5383778247231291, + "grad_norm": 22.21971321105957, + "learning_rate": 1.0408842805947543e-06, + "loss": 0.3668, + "num_input_tokens_seen": 26499200, + "step": 8410 + }, + { + "epoch": 0.538697906664106, + "grad_norm": 32.608150482177734, + "learning_rate": 1.0397678212704276e-06, + "loss": 0.5119, + "num_input_tokens_seen": 26514048, + "step": 8415 + }, + { + "epoch": 0.5390179886050829, + "grad_norm": 32.62826919555664, + "learning_rate": 1.038651312295509e-06, + "loss": 0.4034, + "num_input_tokens_seen": 26529216, + "step": 8420 + }, + { + "epoch": 0.5393380705460598, + "grad_norm": 24.986495971679688, + "learning_rate": 1.037534755063973e-06, + "loss": 0.4192, + "num_input_tokens_seen": 26545152, + "step": 8425 + }, + { + "epoch": 0.5396581524870366, + "grad_norm": 44.1995964050293, + "learning_rate": 1.0364181509698548e-06, + "loss": 0.4147, + "num_input_tokens_seen": 26560512, + "step": 8430 + }, + { + "epoch": 0.5399782344280136, + "grad_norm": 29.369369506835938, + "learning_rate": 1.0353015014072476e-06, + "loss": 0.35, + "num_input_tokens_seen": 26575488, + "step": 8435 + }, + { + "epoch": 0.5402983163689905, + "grad_norm": 50.47454071044922, + "learning_rate": 1.0341848077703013e-06, + "loss": 0.405, + "num_input_tokens_seen": 26591040, + "step": 8440 + }, + { + "epoch": 0.5406183983099674, + "grad_norm": 26.94370460510254, + "learning_rate": 1.033068071453221e-06, + "loss": 0.3229, + "num_input_tokens_seen": 26606976, + "step": 8445 + }, + { + "epoch": 0.5409384802509443, + "grad_norm": 35.96391677856445, + "learning_rate": 1.0319512938502653e-06, + "loss": 0.3623, + "num_input_tokens_seen": 26623296, + "step": 8450 + }, + { + "epoch": 0.5412585621919211, + "grad_norm": 32.38021469116211, + "learning_rate": 1.0308344763557444e-06, + "loss": 0.3123, + "num_input_tokens_seen": 26638336, + "step": 8455 + }, + { + "epoch": 0.541578644132898, + "grad_norm": 15.892178535461426, + "learning_rate": 1.0297176203640175e-06, + "loss": 0.2841, + "num_input_tokens_seen": 26654400, + "step": 8460 + }, + { + "epoch": 0.5418987260738749, + "grad_norm": 54.3671760559082, + "learning_rate": 1.0286007272694924e-06, + "loss": 0.3482, + "num_input_tokens_seen": 26669568, + "step": 8465 + }, + { + "epoch": 0.5422188080148518, + "grad_norm": 27.727298736572266, + "learning_rate": 1.0274837984666239e-06, + "loss": 0.4695, + "num_input_tokens_seen": 26686016, + "step": 8470 + }, + { + "epoch": 0.5425388899558287, + "grad_norm": 31.10105323791504, + "learning_rate": 1.02636683534991e-06, + "loss": 0.4184, + "num_input_tokens_seen": 26701504, + "step": 8475 + }, + { + "epoch": 0.5428589718968055, + "grad_norm": 62.131317138671875, + "learning_rate": 1.0252498393138928e-06, + "loss": 0.5884, + "num_input_tokens_seen": 26717120, + "step": 8480 + }, + { + "epoch": 0.5431790538377824, + "grad_norm": 70.49308776855469, + "learning_rate": 1.0241328117531546e-06, + "loss": 0.4193, + "num_input_tokens_seen": 26732736, + "step": 8485 + }, + { + "epoch": 0.5434991357787594, + "grad_norm": 30.73244285583496, + "learning_rate": 1.0230157540623174e-06, + "loss": 0.4126, + "num_input_tokens_seen": 26747392, + "step": 8490 + }, + { + "epoch": 0.5438192177197363, + "grad_norm": 22.281478881835938, + "learning_rate": 1.0218986676360415e-06, + "loss": 0.4462, + "num_input_tokens_seen": 26762112, + "step": 8495 + }, + { + "epoch": 0.5441392996607132, + "grad_norm": 22.789291381835938, + "learning_rate": 1.0207815538690216e-06, + "loss": 0.3709, + "num_input_tokens_seen": 26777856, + "step": 8500 + }, + { + "epoch": 0.54445938160169, + "grad_norm": 51.15581512451172, + "learning_rate": 1.0196644141559877e-06, + "loss": 0.3055, + "num_input_tokens_seen": 26794048, + "step": 8505 + }, + { + "epoch": 0.5447794635426669, + "grad_norm": 42.44687271118164, + "learning_rate": 1.0185472498917021e-06, + "loss": 0.3509, + "num_input_tokens_seen": 26809792, + "step": 8510 + }, + { + "epoch": 0.5450995454836438, + "grad_norm": 53.03976058959961, + "learning_rate": 1.017430062470957e-06, + "loss": 0.4421, + "num_input_tokens_seen": 26825024, + "step": 8515 + }, + { + "epoch": 0.5454196274246207, + "grad_norm": 29.0567569732666, + "learning_rate": 1.016312853288574e-06, + "loss": 0.3472, + "num_input_tokens_seen": 26841536, + "step": 8520 + }, + { + "epoch": 0.5457397093655976, + "grad_norm": 21.90899085998535, + "learning_rate": 1.0151956237394027e-06, + "loss": 0.395, + "num_input_tokens_seen": 26857600, + "step": 8525 + }, + { + "epoch": 0.5460597913065744, + "grad_norm": 27.42255210876465, + "learning_rate": 1.0140783752183164e-06, + "loss": 0.3942, + "num_input_tokens_seen": 26874176, + "step": 8530 + }, + { + "epoch": 0.5463798732475513, + "grad_norm": 26.120128631591797, + "learning_rate": 1.0129611091202138e-06, + "loss": 0.4162, + "num_input_tokens_seen": 26890176, + "step": 8535 + }, + { + "epoch": 0.5466999551885282, + "grad_norm": 25.828702926635742, + "learning_rate": 1.0118438268400135e-06, + "loss": 0.2897, + "num_input_tokens_seen": 26905728, + "step": 8540 + }, + { + "epoch": 0.5470200371295052, + "grad_norm": 46.99468994140625, + "learning_rate": 1.0107265297726568e-06, + "loss": 0.4655, + "num_input_tokens_seen": 26921280, + "step": 8545 + }, + { + "epoch": 0.5473401190704821, + "grad_norm": 34.46550369262695, + "learning_rate": 1.009609219313102e-06, + "loss": 0.4065, + "num_input_tokens_seen": 26936704, + "step": 8550 + }, + { + "epoch": 0.547660201011459, + "grad_norm": 16.38555145263672, + "learning_rate": 1.0084918968563236e-06, + "loss": 0.4008, + "num_input_tokens_seen": 26952448, + "step": 8555 + }, + { + "epoch": 0.5479802829524358, + "grad_norm": 30.922161102294922, + "learning_rate": 1.0073745637973124e-06, + "loss": 0.3928, + "num_input_tokens_seen": 26967680, + "step": 8560 + }, + { + "epoch": 0.5483003648934127, + "grad_norm": 17.125778198242188, + "learning_rate": 1.0062572215310718e-06, + "loss": 0.3489, + "num_input_tokens_seen": 26982400, + "step": 8565 + }, + { + "epoch": 0.5486204468343896, + "grad_norm": 45.65067672729492, + "learning_rate": 1.0051398714526165e-06, + "loss": 0.313, + "num_input_tokens_seen": 26998400, + "step": 8570 + }, + { + "epoch": 0.5489405287753665, + "grad_norm": 45.19715118408203, + "learning_rate": 1.0040225149569712e-06, + "loss": 0.3506, + "num_input_tokens_seen": 27015936, + "step": 8575 + }, + { + "epoch": 0.5492606107163434, + "grad_norm": 36.80413055419922, + "learning_rate": 1.0029051534391693e-06, + "loss": 0.3263, + "num_input_tokens_seen": 27030528, + "step": 8580 + }, + { + "epoch": 0.5495806926573202, + "grad_norm": 21.942888259887695, + "learning_rate": 1.001787788294249e-06, + "loss": 0.3621, + "num_input_tokens_seen": 27046080, + "step": 8585 + }, + { + "epoch": 0.5499007745982971, + "grad_norm": 22.532997131347656, + "learning_rate": 1.0006704209172537e-06, + "loss": 0.4206, + "num_input_tokens_seen": 27061504, + "step": 8590 + }, + { + "epoch": 0.5502208565392741, + "grad_norm": 47.835289001464844, + "learning_rate": 9.995530527032301e-07, + "loss": 0.4297, + "num_input_tokens_seen": 27077056, + "step": 8595 + }, + { + "epoch": 0.550540938480251, + "grad_norm": 27.61309051513672, + "learning_rate": 9.984356850472257e-07, + "loss": 0.3382, + "num_input_tokens_seen": 27095168, + "step": 8600 + }, + { + "epoch": 0.5506689712566417, + "eval_loss": 0.3985471725463867, + "eval_runtime": 49.1827, + "eval_samples_per_second": 282.335, + "eval_steps_per_second": 35.297, + "num_input_tokens_seen": 27101056, + "step": 8602 + }, + { + "epoch": 0.5508610204212279, + "grad_norm": 21.579906463623047, + "learning_rate": 9.97318319344287e-07, + "loss": 0.3698, + "num_input_tokens_seen": 27110144, + "step": 8605 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 25.68075180053711, + "learning_rate": 9.962009569894577e-07, + "loss": 0.5311, + "num_input_tokens_seen": 27124864, + "step": 8610 + }, + { + "epoch": 0.5515011843031816, + "grad_norm": 29.338640213012695, + "learning_rate": 9.95083599377778e-07, + "loss": 0.3769, + "num_input_tokens_seen": 27140160, + "step": 8615 + }, + { + "epoch": 0.5518212662441585, + "grad_norm": 28.375497817993164, + "learning_rate": 9.939662479042828e-07, + "loss": 0.374, + "num_input_tokens_seen": 27155712, + "step": 8620 + }, + { + "epoch": 0.5521413481851354, + "grad_norm": 62.96663284301758, + "learning_rate": 9.92848903963998e-07, + "loss": 0.4573, + "num_input_tokens_seen": 27171520, + "step": 8625 + }, + { + "epoch": 0.5524614301261123, + "grad_norm": 33.80502700805664, + "learning_rate": 9.9173156895194e-07, + "loss": 0.4487, + "num_input_tokens_seen": 27186752, + "step": 8630 + }, + { + "epoch": 0.5527815120670891, + "grad_norm": 25.58247184753418, + "learning_rate": 9.906142442631154e-07, + "loss": 0.3823, + "num_input_tokens_seen": 27201664, + "step": 8635 + }, + { + "epoch": 0.553101594008066, + "grad_norm": 16.91172218322754, + "learning_rate": 9.894969312925171e-07, + "loss": 0.3804, + "num_input_tokens_seen": 27218880, + "step": 8640 + }, + { + "epoch": 0.5534216759490429, + "grad_norm": 51.58964157104492, + "learning_rate": 9.883796314351234e-07, + "loss": 0.3448, + "num_input_tokens_seen": 27235648, + "step": 8645 + }, + { + "epoch": 0.5537417578900199, + "grad_norm": 13.989603996276855, + "learning_rate": 9.872623460858966e-07, + "loss": 0.3997, + "num_input_tokens_seen": 27250880, + "step": 8650 + }, + { + "epoch": 0.5540618398309968, + "grad_norm": 12.994277954101562, + "learning_rate": 9.861450766397799e-07, + "loss": 0.3163, + "num_input_tokens_seen": 27266880, + "step": 8655 + }, + { + "epoch": 0.5543819217719737, + "grad_norm": 24.744857788085938, + "learning_rate": 9.850278244916976e-07, + "loss": 0.411, + "num_input_tokens_seen": 27282816, + "step": 8660 + }, + { + "epoch": 0.5547020037129505, + "grad_norm": 19.459922790527344, + "learning_rate": 9.839105910365524e-07, + "loss": 0.4309, + "num_input_tokens_seen": 27298496, + "step": 8665 + }, + { + "epoch": 0.5550220856539274, + "grad_norm": 38.75739288330078, + "learning_rate": 9.827933776692235e-07, + "loss": 0.331, + "num_input_tokens_seen": 27313856, + "step": 8670 + }, + { + "epoch": 0.5553421675949043, + "grad_norm": 30.53208351135254, + "learning_rate": 9.81676185784564e-07, + "loss": 0.34, + "num_input_tokens_seen": 27328448, + "step": 8675 + }, + { + "epoch": 0.5556622495358812, + "grad_norm": 15.985432624816895, + "learning_rate": 9.805590167774021e-07, + "loss": 0.3916, + "num_input_tokens_seen": 27343872, + "step": 8680 + }, + { + "epoch": 0.555982331476858, + "grad_norm": 75.76026153564453, + "learning_rate": 9.79441872042536e-07, + "loss": 0.5632, + "num_input_tokens_seen": 27358720, + "step": 8685 + }, + { + "epoch": 0.5563024134178349, + "grad_norm": 32.97372817993164, + "learning_rate": 9.783247529747338e-07, + "loss": 0.3856, + "num_input_tokens_seen": 27373312, + "step": 8690 + }, + { + "epoch": 0.5566224953588118, + "grad_norm": 24.052457809448242, + "learning_rate": 9.772076609687323e-07, + "loss": 0.3571, + "num_input_tokens_seen": 27388544, + "step": 8695 + }, + { + "epoch": 0.5569425772997888, + "grad_norm": 35.491371154785156, + "learning_rate": 9.760905974192334e-07, + "loss": 0.3259, + "num_input_tokens_seen": 27405120, + "step": 8700 + }, + { + "epoch": 0.5572626592407657, + "grad_norm": 22.80748748779297, + "learning_rate": 9.749735637209044e-07, + "loss": 0.4078, + "num_input_tokens_seen": 27420544, + "step": 8705 + }, + { + "epoch": 0.5575827411817426, + "grad_norm": 16.720609664916992, + "learning_rate": 9.738565612683754e-07, + "loss": 0.3137, + "num_input_tokens_seen": 27435456, + "step": 8710 + }, + { + "epoch": 0.5579028231227194, + "grad_norm": 28.667015075683594, + "learning_rate": 9.727395914562363e-07, + "loss": 0.3477, + "num_input_tokens_seen": 27452032, + "step": 8715 + }, + { + "epoch": 0.5582229050636963, + "grad_norm": 25.73943328857422, + "learning_rate": 9.716226556790372e-07, + "loss": 0.4159, + "num_input_tokens_seen": 27467520, + "step": 8720 + }, + { + "epoch": 0.5585429870046732, + "grad_norm": 29.19492530822754, + "learning_rate": 9.705057553312855e-07, + "loss": 0.312, + "num_input_tokens_seen": 27482816, + "step": 8725 + }, + { + "epoch": 0.5588630689456501, + "grad_norm": 24.96323013305664, + "learning_rate": 9.693888918074452e-07, + "loss": 0.374, + "num_input_tokens_seen": 27497600, + "step": 8730 + }, + { + "epoch": 0.559183150886627, + "grad_norm": 40.72119140625, + "learning_rate": 9.682720665019325e-07, + "loss": 0.4861, + "num_input_tokens_seen": 27513344, + "step": 8735 + }, + { + "epoch": 0.5595032328276038, + "grad_norm": 20.98204231262207, + "learning_rate": 9.671552808091172e-07, + "loss": 0.4204, + "num_input_tokens_seen": 27530304, + "step": 8740 + }, + { + "epoch": 0.5598233147685807, + "grad_norm": 21.96649932861328, + "learning_rate": 9.660385361233195e-07, + "loss": 0.3409, + "num_input_tokens_seen": 27545664, + "step": 8745 + }, + { + "epoch": 0.5601433967095576, + "grad_norm": 20.051984786987305, + "learning_rate": 9.649218338388084e-07, + "loss": 0.2987, + "num_input_tokens_seen": 27560704, + "step": 8750 + }, + { + "epoch": 0.5604634786505346, + "grad_norm": 21.695167541503906, + "learning_rate": 9.638051753497994e-07, + "loss": 0.4353, + "num_input_tokens_seen": 27577472, + "step": 8755 + }, + { + "epoch": 0.5607835605915115, + "grad_norm": 20.893781661987305, + "learning_rate": 9.62688562050454e-07, + "loss": 0.3597, + "num_input_tokens_seen": 27592960, + "step": 8760 + }, + { + "epoch": 0.5611036425324883, + "grad_norm": 17.552732467651367, + "learning_rate": 9.615719953348772e-07, + "loss": 0.4033, + "num_input_tokens_seen": 27610304, + "step": 8765 + }, + { + "epoch": 0.5614237244734652, + "grad_norm": 37.0562629699707, + "learning_rate": 9.604554765971148e-07, + "loss": 0.5574, + "num_input_tokens_seen": 27628288, + "step": 8770 + }, + { + "epoch": 0.5617438064144421, + "grad_norm": 20.61250114440918, + "learning_rate": 9.593390072311549e-07, + "loss": 0.4069, + "num_input_tokens_seen": 27643904, + "step": 8775 + }, + { + "epoch": 0.562063888355419, + "grad_norm": 20.135055541992188, + "learning_rate": 9.582225886309216e-07, + "loss": 0.3576, + "num_input_tokens_seen": 27660224, + "step": 8780 + }, + { + "epoch": 0.5623839702963959, + "grad_norm": 18.054454803466797, + "learning_rate": 9.571062221902767e-07, + "loss": 0.3015, + "num_input_tokens_seen": 27675136, + "step": 8785 + }, + { + "epoch": 0.5627040522373727, + "grad_norm": 56.18107223510742, + "learning_rate": 9.559899093030175e-07, + "loss": 0.3485, + "num_input_tokens_seen": 27690176, + "step": 8790 + }, + { + "epoch": 0.5630241341783496, + "grad_norm": 34.77610778808594, + "learning_rate": 9.54873651362873e-07, + "loss": 0.3061, + "num_input_tokens_seen": 27704512, + "step": 8795 + }, + { + "epoch": 0.5633442161193265, + "grad_norm": 49.7370491027832, + "learning_rate": 9.537574497635043e-07, + "loss": 0.46, + "num_input_tokens_seen": 27720448, + "step": 8800 + }, + { + "epoch": 0.5636642980603035, + "grad_norm": 27.712543487548828, + "learning_rate": 9.52641305898503e-07, + "loss": 0.4966, + "num_input_tokens_seen": 27735808, + "step": 8805 + }, + { + "epoch": 0.5639843800012804, + "grad_norm": 27.37342643737793, + "learning_rate": 9.515252211613873e-07, + "loss": 0.3122, + "num_input_tokens_seen": 27750464, + "step": 8810 + }, + { + "epoch": 0.5643044619422573, + "grad_norm": 35.751590728759766, + "learning_rate": 9.504091969456021e-07, + "loss": 0.4586, + "num_input_tokens_seen": 27764352, + "step": 8815 + }, + { + "epoch": 0.5646245438832341, + "grad_norm": 20.632070541381836, + "learning_rate": 9.492932346445165e-07, + "loss": 0.338, + "num_input_tokens_seen": 27779840, + "step": 8820 + }, + { + "epoch": 0.564944625824211, + "grad_norm": 23.484317779541016, + "learning_rate": 9.48177335651423e-07, + "loss": 0.27, + "num_input_tokens_seen": 27796352, + "step": 8825 + }, + { + "epoch": 0.5652647077651879, + "grad_norm": 33.279884338378906, + "learning_rate": 9.470615013595346e-07, + "loss": 0.3325, + "num_input_tokens_seen": 27810624, + "step": 8830 + }, + { + "epoch": 0.5655847897061648, + "grad_norm": 42.17190933227539, + "learning_rate": 9.459457331619829e-07, + "loss": 0.4447, + "num_input_tokens_seen": 27825152, + "step": 8835 + }, + { + "epoch": 0.5659048716471416, + "grad_norm": 33.045230865478516, + "learning_rate": 9.448300324518182e-07, + "loss": 0.4076, + "num_input_tokens_seen": 27840384, + "step": 8840 + }, + { + "epoch": 0.5662249535881185, + "grad_norm": 32.98795700073242, + "learning_rate": 9.437144006220058e-07, + "loss": 0.3017, + "num_input_tokens_seen": 27856640, + "step": 8845 + }, + { + "epoch": 0.5665450355290954, + "grad_norm": 9.297707557678223, + "learning_rate": 9.425988390654249e-07, + "loss": 0.2027, + "num_input_tokens_seen": 27872768, + "step": 8850 + }, + { + "epoch": 0.5668651174700723, + "grad_norm": 40.04125213623047, + "learning_rate": 9.414833491748677e-07, + "loss": 0.4955, + "num_input_tokens_seen": 27887488, + "step": 8855 + }, + { + "epoch": 0.5671851994110493, + "grad_norm": 45.78459167480469, + "learning_rate": 9.40367932343036e-07, + "loss": 0.3024, + "num_input_tokens_seen": 27902720, + "step": 8860 + }, + { + "epoch": 0.5675052813520262, + "grad_norm": 28.001405715942383, + "learning_rate": 9.392525899625407e-07, + "loss": 0.374, + "num_input_tokens_seen": 27918080, + "step": 8865 + }, + { + "epoch": 0.567825363293003, + "grad_norm": 48.28670120239258, + "learning_rate": 9.381373234259004e-07, + "loss": 0.4011, + "num_input_tokens_seen": 27933760, + "step": 8870 + }, + { + "epoch": 0.5681454452339799, + "grad_norm": 42.333187103271484, + "learning_rate": 9.370221341255382e-07, + "loss": 0.375, + "num_input_tokens_seen": 27948992, + "step": 8875 + }, + { + "epoch": 0.5684655271749568, + "grad_norm": 28.905458450317383, + "learning_rate": 9.359070234537807e-07, + "loss": 0.3382, + "num_input_tokens_seen": 27966848, + "step": 8880 + }, + { + "epoch": 0.5687856091159337, + "grad_norm": 27.128929138183594, + "learning_rate": 9.34791992802857e-07, + "loss": 0.3803, + "num_input_tokens_seen": 27981696, + "step": 8885 + }, + { + "epoch": 0.5691056910569106, + "grad_norm": 29.13878631591797, + "learning_rate": 9.336770435648963e-07, + "loss": 0.2607, + "num_input_tokens_seen": 27997376, + "step": 8890 + }, + { + "epoch": 0.5694257729978874, + "grad_norm": 25.84345054626465, + "learning_rate": 9.325621771319246e-07, + "loss": 0.4075, + "num_input_tokens_seen": 28014016, + "step": 8895 + }, + { + "epoch": 0.5697458549388643, + "grad_norm": 21.55052947998047, + "learning_rate": 9.314473948958673e-07, + "loss": 0.4178, + "num_input_tokens_seen": 28030400, + "step": 8900 + }, + { + "epoch": 0.5700659368798412, + "grad_norm": 25.94553565979004, + "learning_rate": 9.303326982485422e-07, + "loss": 0.3456, + "num_input_tokens_seen": 28047104, + "step": 8905 + }, + { + "epoch": 0.5703860188208181, + "grad_norm": 49.04792785644531, + "learning_rate": 9.29218088581661e-07, + "loss": 0.3546, + "num_input_tokens_seen": 28063168, + "step": 8910 + }, + { + "epoch": 0.5707061007617951, + "grad_norm": 28.955217361450195, + "learning_rate": 9.281035672868278e-07, + "loss": 0.3462, + "num_input_tokens_seen": 28079104, + "step": 8915 + }, + { + "epoch": 0.571026182702772, + "grad_norm": 27.242048263549805, + "learning_rate": 9.269891357555348e-07, + "loss": 0.3912, + "num_input_tokens_seen": 28094720, + "step": 8920 + }, + { + "epoch": 0.5713462646437488, + "grad_norm": 39.87770462036133, + "learning_rate": 9.25874795379163e-07, + "loss": 0.2754, + "num_input_tokens_seen": 28110848, + "step": 8925 + }, + { + "epoch": 0.5716663465847257, + "grad_norm": 22.331693649291992, + "learning_rate": 9.247605475489793e-07, + "loss": 0.4172, + "num_input_tokens_seen": 28127040, + "step": 8930 + }, + { + "epoch": 0.5719864285257026, + "grad_norm": 33.441993713378906, + "learning_rate": 9.236463936561358e-07, + "loss": 0.3062, + "num_input_tokens_seen": 28143424, + "step": 8935 + }, + { + "epoch": 0.5723065104666795, + "grad_norm": 48.873287200927734, + "learning_rate": 9.225323350916661e-07, + "loss": 0.5365, + "num_input_tokens_seen": 28158528, + "step": 8940 + }, + { + "epoch": 0.5726265924076563, + "grad_norm": 35.569923400878906, + "learning_rate": 9.214183732464855e-07, + "loss": 0.3948, + "num_input_tokens_seen": 28173888, + "step": 8945 + }, + { + "epoch": 0.5729466743486332, + "grad_norm": 20.366697311401367, + "learning_rate": 9.203045095113886e-07, + "loss": 0.3671, + "num_input_tokens_seen": 28191872, + "step": 8950 + }, + { + "epoch": 0.5732667562896101, + "grad_norm": 45.24616622924805, + "learning_rate": 9.191907452770476e-07, + "loss": 0.4305, + "num_input_tokens_seen": 28206912, + "step": 8955 + }, + { + "epoch": 0.573586838230587, + "grad_norm": 29.864273071289062, + "learning_rate": 9.180770819340095e-07, + "loss": 0.4233, + "num_input_tokens_seen": 28222336, + "step": 8960 + }, + { + "epoch": 0.573906920171564, + "grad_norm": 14.063233375549316, + "learning_rate": 9.169635208726967e-07, + "loss": 0.376, + "num_input_tokens_seen": 28238144, + "step": 8965 + }, + { + "epoch": 0.5742270021125409, + "grad_norm": 62.739784240722656, + "learning_rate": 9.15850063483403e-07, + "loss": 0.3787, + "num_input_tokens_seen": 28253376, + "step": 8970 + }, + { + "epoch": 0.5745470840535177, + "grad_norm": 28.41097068786621, + "learning_rate": 9.147367111562928e-07, + "loss": 0.3493, + "num_input_tokens_seen": 28269248, + "step": 8975 + }, + { + "epoch": 0.5748671659944946, + "grad_norm": 35.87826919555664, + "learning_rate": 9.136234652814005e-07, + "loss": 0.4094, + "num_input_tokens_seen": 28285440, + "step": 8980 + }, + { + "epoch": 0.5751872479354715, + "grad_norm": 27.88485336303711, + "learning_rate": 9.125103272486255e-07, + "loss": 0.2965, + "num_input_tokens_seen": 28300736, + "step": 8985 + }, + { + "epoch": 0.5755073298764484, + "grad_norm": 30.880252838134766, + "learning_rate": 9.11397298447734e-07, + "loss": 0.361, + "num_input_tokens_seen": 28315712, + "step": 8990 + }, + { + "epoch": 0.5758274118174252, + "grad_norm": 30.014013290405273, + "learning_rate": 9.10284380268356e-07, + "loss": 0.3287, + "num_input_tokens_seen": 28332032, + "step": 8995 + }, + { + "epoch": 0.5761474937584021, + "grad_norm": 26.396350860595703, + "learning_rate": 9.091715740999828e-07, + "loss": 0.4476, + "num_input_tokens_seen": 28347968, + "step": 9000 + }, + { + "epoch": 0.576467575699379, + "grad_norm": 23.355926513671875, + "learning_rate": 9.080588813319654e-07, + "loss": 0.3849, + "num_input_tokens_seen": 28362944, + "step": 9005 + }, + { + "epoch": 0.5767876576403559, + "grad_norm": 42.71702194213867, + "learning_rate": 9.069463033535143e-07, + "loss": 0.3032, + "num_input_tokens_seen": 28378624, + "step": 9010 + }, + { + "epoch": 0.5771077395813328, + "grad_norm": 62.55430603027344, + "learning_rate": 9.058338415536962e-07, + "loss": 0.3865, + "num_input_tokens_seen": 28394048, + "step": 9015 + }, + { + "epoch": 0.5774278215223098, + "grad_norm": 38.583648681640625, + "learning_rate": 9.04721497321432e-07, + "loss": 0.3808, + "num_input_tokens_seen": 28409664, + "step": 9020 + }, + { + "epoch": 0.5777479034632866, + "grad_norm": 31.30422592163086, + "learning_rate": 9.036092720454977e-07, + "loss": 0.3744, + "num_input_tokens_seen": 28424768, + "step": 9025 + }, + { + "epoch": 0.5780679854042635, + "grad_norm": 29.469755172729492, + "learning_rate": 9.024971671145189e-07, + "loss": 0.3387, + "num_input_tokens_seen": 28439424, + "step": 9030 + }, + { + "epoch": 0.5783880673452404, + "grad_norm": 41.49711608886719, + "learning_rate": 9.013851839169718e-07, + "loss": 0.4406, + "num_input_tokens_seen": 28456064, + "step": 9035 + }, + { + "epoch": 0.5787081492862173, + "grad_norm": 42.17570495605469, + "learning_rate": 9.002733238411801e-07, + "loss": 0.3388, + "num_input_tokens_seen": 28472768, + "step": 9040 + }, + { + "epoch": 0.5790282312271942, + "grad_norm": 31.11846160888672, + "learning_rate": 8.991615882753147e-07, + "loss": 0.3489, + "num_input_tokens_seen": 28488704, + "step": 9045 + }, + { + "epoch": 0.579348313168171, + "grad_norm": 55.96306610107422, + "learning_rate": 8.980499786073904e-07, + "loss": 0.4431, + "num_input_tokens_seen": 28503808, + "step": 9050 + }, + { + "epoch": 0.5796683951091479, + "grad_norm": 54.62471008300781, + "learning_rate": 8.969384962252645e-07, + "loss": 0.4759, + "num_input_tokens_seen": 28520320, + "step": 9055 + }, + { + "epoch": 0.5799884770501248, + "grad_norm": 47.783241271972656, + "learning_rate": 8.958271425166366e-07, + "loss": 0.4431, + "num_input_tokens_seen": 28535680, + "step": 9060 + }, + { + "epoch": 0.5803085589911017, + "grad_norm": 22.617599487304688, + "learning_rate": 8.947159188690442e-07, + "loss": 0.396, + "num_input_tokens_seen": 28551488, + "step": 9065 + }, + { + "epoch": 0.5806286409320787, + "grad_norm": 67.4439697265625, + "learning_rate": 8.93604826669863e-07, + "loss": 0.4786, + "num_input_tokens_seen": 28567040, + "step": 9070 + }, + { + "epoch": 0.5809487228730555, + "grad_norm": 26.622365951538086, + "learning_rate": 8.924938673063052e-07, + "loss": 0.3986, + "num_input_tokens_seen": 28581568, + "step": 9075 + }, + { + "epoch": 0.5812688048140324, + "grad_norm": 15.871992111206055, + "learning_rate": 8.913830421654166e-07, + "loss": 0.3559, + "num_input_tokens_seen": 28596992, + "step": 9080 + }, + { + "epoch": 0.5815888867550093, + "grad_norm": 22.36756134033203, + "learning_rate": 8.902723526340746e-07, + "loss": 0.4757, + "num_input_tokens_seen": 28613952, + "step": 9085 + }, + { + "epoch": 0.5819089686959862, + "grad_norm": 26.785381317138672, + "learning_rate": 8.89161800098989e-07, + "loss": 0.4202, + "num_input_tokens_seen": 28628736, + "step": 9090 + }, + { + "epoch": 0.5822290506369631, + "grad_norm": 54.52938461303711, + "learning_rate": 8.880513859466974e-07, + "loss": 0.3704, + "num_input_tokens_seen": 28644928, + "step": 9095 + }, + { + "epoch": 0.5825491325779399, + "grad_norm": 17.885007858276367, + "learning_rate": 8.869411115635645e-07, + "loss": 0.278, + "num_input_tokens_seen": 28661184, + "step": 9100 + }, + { + "epoch": 0.5828692145189168, + "grad_norm": 17.88958740234375, + "learning_rate": 8.858309783357816e-07, + "loss": 0.2772, + "num_input_tokens_seen": 28675776, + "step": 9105 + }, + { + "epoch": 0.5831892964598937, + "grad_norm": 53.37077713012695, + "learning_rate": 8.847209876493629e-07, + "loss": 0.4318, + "num_input_tokens_seen": 28692160, + "step": 9110 + }, + { + "epoch": 0.5835093784008706, + "grad_norm": 30.646394729614258, + "learning_rate": 8.836111408901441e-07, + "loss": 0.2576, + "num_input_tokens_seen": 28707328, + "step": 9115 + }, + { + "epoch": 0.5838294603418475, + "grad_norm": 43.16847610473633, + "learning_rate": 8.825014394437828e-07, + "loss": 0.4235, + "num_input_tokens_seen": 28722624, + "step": 9120 + }, + { + "epoch": 0.5841495422828245, + "grad_norm": 14.40605640411377, + "learning_rate": 8.813918846957542e-07, + "loss": 0.3748, + "num_input_tokens_seen": 28737856, + "step": 9125 + }, + { + "epoch": 0.5844696242238013, + "grad_norm": 20.49512481689453, + "learning_rate": 8.802824780313499e-07, + "loss": 0.4501, + "num_input_tokens_seen": 28752448, + "step": 9130 + }, + { + "epoch": 0.5847897061647782, + "grad_norm": 22.4967098236084, + "learning_rate": 8.791732208356771e-07, + "loss": 0.3958, + "num_input_tokens_seen": 28767616, + "step": 9135 + }, + { + "epoch": 0.5851097881057551, + "grad_norm": 15.978533744812012, + "learning_rate": 8.780641144936573e-07, + "loss": 0.4649, + "num_input_tokens_seen": 28782400, + "step": 9140 + }, + { + "epoch": 0.585429870046732, + "grad_norm": 48.71504211425781, + "learning_rate": 8.76955160390022e-07, + "loss": 0.4457, + "num_input_tokens_seen": 28798336, + "step": 9145 + }, + { + "epoch": 0.5857499519877089, + "grad_norm": 16.611661911010742, + "learning_rate": 8.758463599093136e-07, + "loss": 0.2868, + "num_input_tokens_seen": 28814336, + "step": 9150 + }, + { + "epoch": 0.5860700339286857, + "grad_norm": 39.33195495605469, + "learning_rate": 8.747377144358825e-07, + "loss": 0.5273, + "num_input_tokens_seen": 28830656, + "step": 9155 + }, + { + "epoch": 0.5863901158696626, + "grad_norm": 42.789817810058594, + "learning_rate": 8.736292253538861e-07, + "loss": 0.418, + "num_input_tokens_seen": 28846656, + "step": 9160 + }, + { + "epoch": 0.5867101978106395, + "grad_norm": 33.47774887084961, + "learning_rate": 8.725208940472851e-07, + "loss": 0.309, + "num_input_tokens_seen": 28862848, + "step": 9165 + }, + { + "epoch": 0.5870302797516164, + "grad_norm": 14.912242889404297, + "learning_rate": 8.714127218998448e-07, + "loss": 0.4083, + "num_input_tokens_seen": 28878400, + "step": 9170 + }, + { + "epoch": 0.5873503616925934, + "grad_norm": 67.51158905029297, + "learning_rate": 8.70304710295131e-07, + "loss": 0.5084, + "num_input_tokens_seen": 28893568, + "step": 9175 + }, + { + "epoch": 0.5876704436335702, + "grad_norm": 29.94365692138672, + "learning_rate": 8.691968606165092e-07, + "loss": 0.367, + "num_input_tokens_seen": 28909824, + "step": 9180 + }, + { + "epoch": 0.5879905255745471, + "grad_norm": 30.510108947753906, + "learning_rate": 8.680891742471429e-07, + "loss": 0.3078, + "num_input_tokens_seen": 28925568, + "step": 9185 + }, + { + "epoch": 0.588310607515524, + "grad_norm": 27.14842987060547, + "learning_rate": 8.669816525699912e-07, + "loss": 0.3272, + "num_input_tokens_seen": 28941056, + "step": 9190 + }, + { + "epoch": 0.5886306894565009, + "grad_norm": 36.03899002075195, + "learning_rate": 8.658742969678079e-07, + "loss": 0.4143, + "num_input_tokens_seen": 28955456, + "step": 9195 + }, + { + "epoch": 0.5889507713974778, + "grad_norm": 33.955684661865234, + "learning_rate": 8.647671088231398e-07, + "loss": 0.2927, + "num_input_tokens_seen": 28971136, + "step": 9200 + }, + { + "epoch": 0.5892708533384546, + "grad_norm": 57.654293060302734, + "learning_rate": 8.636600895183245e-07, + "loss": 0.4087, + "num_input_tokens_seen": 28988480, + "step": 9205 + }, + { + "epoch": 0.5895909352794315, + "grad_norm": 45.632225036621094, + "learning_rate": 8.625532404354877e-07, + "loss": 0.3669, + "num_input_tokens_seen": 29004544, + "step": 9210 + }, + { + "epoch": 0.5899110172204084, + "grad_norm": 14.44135570526123, + "learning_rate": 8.614465629565443e-07, + "loss": 0.3809, + "num_input_tokens_seen": 29019328, + "step": 9215 + }, + { + "epoch": 0.5902310991613853, + "grad_norm": 24.873798370361328, + "learning_rate": 8.603400584631939e-07, + "loss": 0.3336, + "num_input_tokens_seen": 29034752, + "step": 9220 + }, + { + "epoch": 0.5905511811023622, + "grad_norm": 34.6170654296875, + "learning_rate": 8.592337283369198e-07, + "loss": 0.4422, + "num_input_tokens_seen": 29050816, + "step": 9225 + }, + { + "epoch": 0.5908712630433391, + "grad_norm": 26.38481903076172, + "learning_rate": 8.581275739589893e-07, + "loss": 0.2752, + "num_input_tokens_seen": 29065920, + "step": 9230 + }, + { + "epoch": 0.591191344984316, + "grad_norm": 36.17750549316406, + "learning_rate": 8.570215967104481e-07, + "loss": 0.483, + "num_input_tokens_seen": 29080960, + "step": 9235 + }, + { + "epoch": 0.5915114269252929, + "grad_norm": 24.824047088623047, + "learning_rate": 8.559157979721225e-07, + "loss": 0.4786, + "num_input_tokens_seen": 29096768, + "step": 9240 + }, + { + "epoch": 0.5918315088662698, + "grad_norm": 35.19805908203125, + "learning_rate": 8.548101791246145e-07, + "loss": 0.5513, + "num_input_tokens_seen": 29112448, + "step": 9245 + }, + { + "epoch": 0.5921515908072467, + "grad_norm": 30.23106575012207, + "learning_rate": 8.537047415483028e-07, + "loss": 0.3392, + "num_input_tokens_seen": 29127808, + "step": 9250 + }, + { + "epoch": 0.5924716727482235, + "grad_norm": 13.602792739868164, + "learning_rate": 8.525994866233388e-07, + "loss": 0.2774, + "num_input_tokens_seen": 29142912, + "step": 9255 + }, + { + "epoch": 0.5927917546892004, + "grad_norm": 45.087398529052734, + "learning_rate": 8.514944157296464e-07, + "loss": 0.3847, + "num_input_tokens_seen": 29159168, + "step": 9260 + }, + { + "epoch": 0.5931118366301773, + "grad_norm": 38.43781280517578, + "learning_rate": 8.503895302469199e-07, + "loss": 0.3826, + "num_input_tokens_seen": 29175488, + "step": 9265 + }, + { + "epoch": 0.5934319185711542, + "grad_norm": 33.70762634277344, + "learning_rate": 8.492848315546214e-07, + "loss": 0.4143, + "num_input_tokens_seen": 29191104, + "step": 9270 + }, + { + "epoch": 0.5937520005121311, + "grad_norm": 17.961454391479492, + "learning_rate": 8.4818032103198e-07, + "loss": 0.4172, + "num_input_tokens_seen": 29206208, + "step": 9275 + }, + { + "epoch": 0.5940720824531079, + "grad_norm": 42.23419189453125, + "learning_rate": 8.470760000579906e-07, + "loss": 0.4169, + "num_input_tokens_seen": 29221312, + "step": 9280 + }, + { + "epoch": 0.5943921643940849, + "grad_norm": 46.78962707519531, + "learning_rate": 8.459718700114108e-07, + "loss": 0.4932, + "num_input_tokens_seen": 29236800, + "step": 9285 + }, + { + "epoch": 0.5947122463350618, + "grad_norm": 26.358369827270508, + "learning_rate": 8.448679322707595e-07, + "loss": 0.4521, + "num_input_tokens_seen": 29252480, + "step": 9290 + }, + { + "epoch": 0.5950323282760387, + "grad_norm": 41.36620330810547, + "learning_rate": 8.437641882143163e-07, + "loss": 0.5845, + "num_input_tokens_seen": 29266944, + "step": 9295 + }, + { + "epoch": 0.5953524102170156, + "grad_norm": 17.812028884887695, + "learning_rate": 8.426606392201185e-07, + "loss": 0.319, + "num_input_tokens_seen": 29282816, + "step": 9300 + }, + { + "epoch": 0.5956724921579925, + "grad_norm": 22.074562072753906, + "learning_rate": 8.415572866659599e-07, + "loss": 0.3009, + "num_input_tokens_seen": 29297984, + "step": 9305 + }, + { + "epoch": 0.5959925740989693, + "grad_norm": 24.042194366455078, + "learning_rate": 8.404541319293896e-07, + "loss": 0.376, + "num_input_tokens_seen": 29313664, + "step": 9310 + }, + { + "epoch": 0.5963126560399462, + "grad_norm": 20.160175323486328, + "learning_rate": 8.393511763877086e-07, + "loss": 0.5842, + "num_input_tokens_seen": 29329472, + "step": 9315 + }, + { + "epoch": 0.5966327379809231, + "grad_norm": 33.067359924316406, + "learning_rate": 8.3824842141797e-07, + "loss": 0.4463, + "num_input_tokens_seen": 29346048, + "step": 9320 + }, + { + "epoch": 0.5969528199219, + "grad_norm": 27.763477325439453, + "learning_rate": 8.371458683969765e-07, + "loss": 0.3801, + "num_input_tokens_seen": 29361664, + "step": 9325 + }, + { + "epoch": 0.5972729018628768, + "grad_norm": 23.89577865600586, + "learning_rate": 8.360435187012787e-07, + "loss": 0.3887, + "num_input_tokens_seen": 29376896, + "step": 9330 + }, + { + "epoch": 0.5975929838038538, + "grad_norm": 36.93418502807617, + "learning_rate": 8.349413737071725e-07, + "loss": 0.3767, + "num_input_tokens_seen": 29392640, + "step": 9335 + }, + { + "epoch": 0.5979130657448307, + "grad_norm": 29.668235778808594, + "learning_rate": 8.338394347906994e-07, + "loss": 0.4399, + "num_input_tokens_seen": 29407808, + "step": 9340 + }, + { + "epoch": 0.5982331476858076, + "grad_norm": 36.61244201660156, + "learning_rate": 8.327377033276431e-07, + "loss": 0.2995, + "num_input_tokens_seen": 29422528, + "step": 9345 + }, + { + "epoch": 0.5985532296267845, + "grad_norm": 25.591800689697266, + "learning_rate": 8.316361806935279e-07, + "loss": 0.3481, + "num_input_tokens_seen": 29438272, + "step": 9350 + }, + { + "epoch": 0.5988733115677614, + "grad_norm": 30.289875030517578, + "learning_rate": 8.305348682636177e-07, + "loss": 0.4557, + "num_input_tokens_seen": 29453376, + "step": 9355 + }, + { + "epoch": 0.5991933935087382, + "grad_norm": 33.169734954833984, + "learning_rate": 8.294337674129144e-07, + "loss": 0.4204, + "num_input_tokens_seen": 29469248, + "step": 9360 + }, + { + "epoch": 0.5995134754497151, + "grad_norm": 35.08827209472656, + "learning_rate": 8.283328795161554e-07, + "loss": 0.2783, + "num_input_tokens_seen": 29485888, + "step": 9365 + }, + { + "epoch": 0.599833557390692, + "grad_norm": 28.095083236694336, + "learning_rate": 8.272322059478114e-07, + "loss": 0.3194, + "num_input_tokens_seen": 29500864, + "step": 9370 + }, + { + "epoch": 0.6001536393316689, + "grad_norm": 18.85226821899414, + "learning_rate": 8.261317480820871e-07, + "loss": 0.2312, + "num_input_tokens_seen": 29516288, + "step": 9375 + }, + { + "epoch": 0.6004737212726458, + "grad_norm": 34.60100173950195, + "learning_rate": 8.250315072929168e-07, + "loss": 0.4, + "num_input_tokens_seen": 29530880, + "step": 9380 + }, + { + "epoch": 0.6007297868254273, + "eval_loss": 0.3916759490966797, + "eval_runtime": 49.1281, + "eval_samples_per_second": 282.649, + "eval_steps_per_second": 35.336, + "num_input_tokens_seen": 29544576, + "step": 9384 + }, + { + "epoch": 0.6007938032136226, + "grad_norm": 20.751314163208008, + "learning_rate": 8.239314849539637e-07, + "loss": 0.3513, + "num_input_tokens_seen": 29547840, + "step": 9385 + }, + { + "epoch": 0.6011138851545996, + "grad_norm": 31.6501522064209, + "learning_rate": 8.228316824386193e-07, + "loss": 0.4204, + "num_input_tokens_seen": 29564096, + "step": 9390 + }, + { + "epoch": 0.6014339670955765, + "grad_norm": 33.23552322387695, + "learning_rate": 8.217321011199995e-07, + "loss": 0.3633, + "num_input_tokens_seen": 29579520, + "step": 9395 + }, + { + "epoch": 0.6017540490365534, + "grad_norm": 49.13716125488281, + "learning_rate": 8.206327423709441e-07, + "loss": 0.4256, + "num_input_tokens_seen": 29594048, + "step": 9400 + }, + { + "epoch": 0.6020741309775303, + "grad_norm": 23.02613067626953, + "learning_rate": 8.195336075640163e-07, + "loss": 0.3871, + "num_input_tokens_seen": 29610368, + "step": 9405 + }, + { + "epoch": 0.6023942129185071, + "grad_norm": 32.443267822265625, + "learning_rate": 8.184346980714984e-07, + "loss": 0.4232, + "num_input_tokens_seen": 29625792, + "step": 9410 + }, + { + "epoch": 0.602714294859484, + "grad_norm": 40.73899459838867, + "learning_rate": 8.173360152653914e-07, + "loss": 0.3399, + "num_input_tokens_seen": 29642240, + "step": 9415 + }, + { + "epoch": 0.6030343768004609, + "grad_norm": 28.00251007080078, + "learning_rate": 8.162375605174143e-07, + "loss": 0.293, + "num_input_tokens_seen": 29658176, + "step": 9420 + }, + { + "epoch": 0.6033544587414378, + "grad_norm": 26.76416778564453, + "learning_rate": 8.151393351990005e-07, + "loss": 0.3118, + "num_input_tokens_seen": 29675392, + "step": 9425 + }, + { + "epoch": 0.6036745406824147, + "grad_norm": 29.030107498168945, + "learning_rate": 8.140413406812971e-07, + "loss": 0.4241, + "num_input_tokens_seen": 29690048, + "step": 9430 + }, + { + "epoch": 0.6039946226233915, + "grad_norm": 33.374656677246094, + "learning_rate": 8.129435783351635e-07, + "loss": 0.3052, + "num_input_tokens_seen": 29705088, + "step": 9435 + }, + { + "epoch": 0.6043147045643685, + "grad_norm": 29.674457550048828, + "learning_rate": 8.118460495311685e-07, + "loss": 0.4482, + "num_input_tokens_seen": 29720576, + "step": 9440 + }, + { + "epoch": 0.6046347865053454, + "grad_norm": 30.353450775146484, + "learning_rate": 8.107487556395901e-07, + "loss": 0.4204, + "num_input_tokens_seen": 29736896, + "step": 9445 + }, + { + "epoch": 0.6049548684463223, + "grad_norm": 29.06775665283203, + "learning_rate": 8.096516980304115e-07, + "loss": 0.3567, + "num_input_tokens_seen": 29752768, + "step": 9450 + }, + { + "epoch": 0.6052749503872992, + "grad_norm": 50.72957229614258, + "learning_rate": 8.085548780733238e-07, + "loss": 0.3355, + "num_input_tokens_seen": 29768640, + "step": 9455 + }, + { + "epoch": 0.605595032328276, + "grad_norm": 32.87676239013672, + "learning_rate": 8.074582971377182e-07, + "loss": 0.338, + "num_input_tokens_seen": 29786240, + "step": 9460 + }, + { + "epoch": 0.6059151142692529, + "grad_norm": 40.09199142456055, + "learning_rate": 8.063619565926892e-07, + "loss": 0.4356, + "num_input_tokens_seen": 29802176, + "step": 9465 + }, + { + "epoch": 0.6062351962102298, + "grad_norm": 16.3148250579834, + "learning_rate": 8.052658578070313e-07, + "loss": 0.3912, + "num_input_tokens_seen": 29817600, + "step": 9470 + }, + { + "epoch": 0.6065552781512067, + "grad_norm": 13.280025482177734, + "learning_rate": 8.041700021492362e-07, + "loss": 0.3313, + "num_input_tokens_seen": 29832960, + "step": 9475 + }, + { + "epoch": 0.6068753600921836, + "grad_norm": 23.65538215637207, + "learning_rate": 8.030743909874924e-07, + "loss": 0.2888, + "num_input_tokens_seen": 29848448, + "step": 9480 + }, + { + "epoch": 0.6071954420331604, + "grad_norm": 16.695858001708984, + "learning_rate": 8.019790256896839e-07, + "loss": 0.3247, + "num_input_tokens_seen": 29863296, + "step": 9485 + }, + { + "epoch": 0.6075155239741373, + "grad_norm": 45.717647552490234, + "learning_rate": 8.008839076233871e-07, + "loss": 0.3806, + "num_input_tokens_seen": 29880128, + "step": 9490 + }, + { + "epoch": 0.6078356059151143, + "grad_norm": 24.243160247802734, + "learning_rate": 7.997890381558691e-07, + "loss": 0.3618, + "num_input_tokens_seen": 29895296, + "step": 9495 + }, + { + "epoch": 0.6081556878560912, + "grad_norm": 33.516685485839844, + "learning_rate": 7.986944186540878e-07, + "loss": 0.4291, + "num_input_tokens_seen": 29911296, + "step": 9500 + }, + { + "epoch": 0.6084757697970681, + "grad_norm": 45.87578582763672, + "learning_rate": 7.976000504846885e-07, + "loss": 0.4594, + "num_input_tokens_seen": 29926912, + "step": 9505 + }, + { + "epoch": 0.608795851738045, + "grad_norm": 104.76370239257812, + "learning_rate": 7.965059350140024e-07, + "loss": 0.4726, + "num_input_tokens_seen": 29942272, + "step": 9510 + }, + { + "epoch": 0.6091159336790218, + "grad_norm": 38.258480072021484, + "learning_rate": 7.954120736080461e-07, + "loss": 0.4037, + "num_input_tokens_seen": 29958016, + "step": 9515 + }, + { + "epoch": 0.6094360156199987, + "grad_norm": 24.145002365112305, + "learning_rate": 7.943184676325178e-07, + "loss": 0.5797, + "num_input_tokens_seen": 29974720, + "step": 9520 + }, + { + "epoch": 0.6097560975609756, + "grad_norm": 27.14354133605957, + "learning_rate": 7.932251184527974e-07, + "loss": 0.4342, + "num_input_tokens_seen": 29991680, + "step": 9525 + }, + { + "epoch": 0.6100761795019525, + "grad_norm": 27.287010192871094, + "learning_rate": 7.921320274339446e-07, + "loss": 0.2753, + "num_input_tokens_seen": 30007168, + "step": 9530 + }, + { + "epoch": 0.6103962614429294, + "grad_norm": 39.53981018066406, + "learning_rate": 7.910391959406966e-07, + "loss": 0.3337, + "num_input_tokens_seen": 30022656, + "step": 9535 + }, + { + "epoch": 0.6107163433839062, + "grad_norm": 33.61812210083008, + "learning_rate": 7.899466253374653e-07, + "loss": 0.3943, + "num_input_tokens_seen": 30038144, + "step": 9540 + }, + { + "epoch": 0.6110364253248832, + "grad_norm": 34.27006149291992, + "learning_rate": 7.88854316988339e-07, + "loss": 0.3347, + "num_input_tokens_seen": 30055488, + "step": 9545 + }, + { + "epoch": 0.6113565072658601, + "grad_norm": 39.317073822021484, + "learning_rate": 7.877622722570771e-07, + "loss": 0.3016, + "num_input_tokens_seen": 30071040, + "step": 9550 + }, + { + "epoch": 0.611676589206837, + "grad_norm": 23.81880760192871, + "learning_rate": 7.866704925071101e-07, + "loss": 0.4185, + "num_input_tokens_seen": 30088000, + "step": 9555 + }, + { + "epoch": 0.6119966711478139, + "grad_norm": 24.980806350708008, + "learning_rate": 7.855789791015377e-07, + "loss": 0.422, + "num_input_tokens_seen": 30103040, + "step": 9560 + }, + { + "epoch": 0.6123167530887907, + "grad_norm": 42.49583053588867, + "learning_rate": 7.844877334031277e-07, + "loss": 0.3946, + "num_input_tokens_seen": 30117760, + "step": 9565 + }, + { + "epoch": 0.6126368350297676, + "grad_norm": 32.370361328125, + "learning_rate": 7.833967567743131e-07, + "loss": 0.4797, + "num_input_tokens_seen": 30133888, + "step": 9570 + }, + { + "epoch": 0.6129569169707445, + "grad_norm": 30.043428421020508, + "learning_rate": 7.823060505771903e-07, + "loss": 0.3747, + "num_input_tokens_seen": 30149312, + "step": 9575 + }, + { + "epoch": 0.6132769989117214, + "grad_norm": 39.43803787231445, + "learning_rate": 7.812156161735199e-07, + "loss": 0.3944, + "num_input_tokens_seen": 30163840, + "step": 9580 + }, + { + "epoch": 0.6135970808526983, + "grad_norm": 69.42517852783203, + "learning_rate": 7.801254549247215e-07, + "loss": 0.5462, + "num_input_tokens_seen": 30180544, + "step": 9585 + }, + { + "epoch": 0.6139171627936751, + "grad_norm": 18.023378372192383, + "learning_rate": 7.790355681918739e-07, + "loss": 0.3212, + "num_input_tokens_seen": 30197120, + "step": 9590 + }, + { + "epoch": 0.614237244734652, + "grad_norm": 52.89658737182617, + "learning_rate": 7.779459573357144e-07, + "loss": 0.421, + "num_input_tokens_seen": 30213376, + "step": 9595 + }, + { + "epoch": 0.614557326675629, + "grad_norm": 20.749906539916992, + "learning_rate": 7.768566237166338e-07, + "loss": 0.4225, + "num_input_tokens_seen": 30229120, + "step": 9600 + }, + { + "epoch": 0.6148774086166059, + "grad_norm": 45.14435958862305, + "learning_rate": 7.757675686946786e-07, + "loss": 0.5064, + "num_input_tokens_seen": 30244544, + "step": 9605 + }, + { + "epoch": 0.6151974905575828, + "grad_norm": 31.990671157836914, + "learning_rate": 7.746787936295468e-07, + "loss": 0.4207, + "num_input_tokens_seen": 30260864, + "step": 9610 + }, + { + "epoch": 0.6155175724985597, + "grad_norm": 42.7758674621582, + "learning_rate": 7.735902998805868e-07, + "loss": 0.3739, + "num_input_tokens_seen": 30275456, + "step": 9615 + }, + { + "epoch": 0.6158376544395365, + "grad_norm": 42.92548751831055, + "learning_rate": 7.725020888067955e-07, + "loss": 0.4195, + "num_input_tokens_seen": 30291008, + "step": 9620 + }, + { + "epoch": 0.6161577363805134, + "grad_norm": 18.282148361206055, + "learning_rate": 7.714141617668176e-07, + "loss": 0.4814, + "num_input_tokens_seen": 30306816, + "step": 9625 + }, + { + "epoch": 0.6164778183214903, + "grad_norm": 25.10959815979004, + "learning_rate": 7.703265201189426e-07, + "loss": 0.3298, + "num_input_tokens_seen": 30322240, + "step": 9630 + }, + { + "epoch": 0.6167979002624672, + "grad_norm": 17.638351440429688, + "learning_rate": 7.692391652211036e-07, + "loss": 0.3357, + "num_input_tokens_seen": 30338048, + "step": 9635 + }, + { + "epoch": 0.617117982203444, + "grad_norm": 40.34111404418945, + "learning_rate": 7.681520984308769e-07, + "loss": 0.3313, + "num_input_tokens_seen": 30353984, + "step": 9640 + }, + { + "epoch": 0.6174380641444209, + "grad_norm": 39.976497650146484, + "learning_rate": 7.670653211054772e-07, + "loss": 0.4902, + "num_input_tokens_seen": 30370048, + "step": 9645 + }, + { + "epoch": 0.6177581460853978, + "grad_norm": 35.88365936279297, + "learning_rate": 7.659788346017591e-07, + "loss": 0.413, + "num_input_tokens_seen": 30385344, + "step": 9650 + }, + { + "epoch": 0.6180782280263748, + "grad_norm": 35.93766784667969, + "learning_rate": 7.648926402762133e-07, + "loss": 0.3813, + "num_input_tokens_seen": 30400576, + "step": 9655 + }, + { + "epoch": 0.6183983099673517, + "grad_norm": 38.066795349121094, + "learning_rate": 7.638067394849671e-07, + "loss": 0.3867, + "num_input_tokens_seen": 30415424, + "step": 9660 + }, + { + "epoch": 0.6187183919083286, + "grad_norm": 44.817840576171875, + "learning_rate": 7.627211335837797e-07, + "loss": 0.4056, + "num_input_tokens_seen": 30430592, + "step": 9665 + }, + { + "epoch": 0.6190384738493054, + "grad_norm": 21.97688865661621, + "learning_rate": 7.616358239280427e-07, + "loss": 0.4352, + "num_input_tokens_seen": 30445952, + "step": 9670 + }, + { + "epoch": 0.6193585557902823, + "grad_norm": 30.693403244018555, + "learning_rate": 7.605508118727787e-07, + "loss": 0.3274, + "num_input_tokens_seen": 30461568, + "step": 9675 + }, + { + "epoch": 0.6196786377312592, + "grad_norm": 25.588163375854492, + "learning_rate": 7.594660987726373e-07, + "loss": 0.3611, + "num_input_tokens_seen": 30476672, + "step": 9680 + }, + { + "epoch": 0.6199987196722361, + "grad_norm": 42.19605255126953, + "learning_rate": 7.583816859818956e-07, + "loss": 0.4013, + "num_input_tokens_seen": 30492672, + "step": 9685 + }, + { + "epoch": 0.620318801613213, + "grad_norm": 23.23065948486328, + "learning_rate": 7.57297574854456e-07, + "loss": 0.3785, + "num_input_tokens_seen": 30507712, + "step": 9690 + }, + { + "epoch": 0.6206388835541898, + "grad_norm": 72.38654327392578, + "learning_rate": 7.56213766743844e-07, + "loss": 0.4395, + "num_input_tokens_seen": 30524032, + "step": 9695 + }, + { + "epoch": 0.6209589654951667, + "grad_norm": 16.888713836669922, + "learning_rate": 7.551302630032064e-07, + "loss": 0.333, + "num_input_tokens_seen": 30539776, + "step": 9700 + }, + { + "epoch": 0.6212790474361437, + "grad_norm": 18.87851905822754, + "learning_rate": 7.540470649853106e-07, + "loss": 0.3693, + "num_input_tokens_seen": 30554752, + "step": 9705 + }, + { + "epoch": 0.6215991293771206, + "grad_norm": 25.751543045043945, + "learning_rate": 7.529641740425419e-07, + "loss": 0.4034, + "num_input_tokens_seen": 30571968, + "step": 9710 + }, + { + "epoch": 0.6219192113180975, + "grad_norm": 30.196582794189453, + "learning_rate": 7.518815915269023e-07, + "loss": 0.4351, + "num_input_tokens_seen": 30587264, + "step": 9715 + }, + { + "epoch": 0.6222392932590743, + "grad_norm": 18.58189582824707, + "learning_rate": 7.507993187900092e-07, + "loss": 0.3948, + "num_input_tokens_seen": 30603200, + "step": 9720 + }, + { + "epoch": 0.6225593752000512, + "grad_norm": 29.565282821655273, + "learning_rate": 7.497173571830926e-07, + "loss": 0.4253, + "num_input_tokens_seen": 30617856, + "step": 9725 + }, + { + "epoch": 0.6228794571410281, + "grad_norm": 43.09429168701172, + "learning_rate": 7.486357080569938e-07, + "loss": 0.4732, + "num_input_tokens_seen": 30632448, + "step": 9730 + }, + { + "epoch": 0.623199539082005, + "grad_norm": 23.361135482788086, + "learning_rate": 7.47554372762165e-07, + "loss": 0.3747, + "num_input_tokens_seen": 30647680, + "step": 9735 + }, + { + "epoch": 0.6235196210229819, + "grad_norm": 59.268245697021484, + "learning_rate": 7.464733526486662e-07, + "loss": 0.4905, + "num_input_tokens_seen": 30663616, + "step": 9740 + }, + { + "epoch": 0.6238397029639587, + "grad_norm": 40.878173828125, + "learning_rate": 7.453926490661628e-07, + "loss": 0.3424, + "num_input_tokens_seen": 30682496, + "step": 9745 + }, + { + "epoch": 0.6241597849049356, + "grad_norm": 51.57231521606445, + "learning_rate": 7.443122633639267e-07, + "loss": 0.3639, + "num_input_tokens_seen": 30697664, + "step": 9750 + }, + { + "epoch": 0.6244798668459125, + "grad_norm": 61.236114501953125, + "learning_rate": 7.432321968908319e-07, + "loss": 0.3835, + "num_input_tokens_seen": 30713408, + "step": 9755 + }, + { + "epoch": 0.6247999487868895, + "grad_norm": 22.234743118286133, + "learning_rate": 7.421524509953543e-07, + "loss": 0.3173, + "num_input_tokens_seen": 30730496, + "step": 9760 + }, + { + "epoch": 0.6251200307278664, + "grad_norm": 29.365135192871094, + "learning_rate": 7.410730270255687e-07, + "loss": 0.4158, + "num_input_tokens_seen": 30745664, + "step": 9765 + }, + { + "epoch": 0.6254401126688433, + "grad_norm": 32.928707122802734, + "learning_rate": 7.399939263291493e-07, + "loss": 0.3655, + "num_input_tokens_seen": 30760960, + "step": 9770 + }, + { + "epoch": 0.6257601946098201, + "grad_norm": 33.555416107177734, + "learning_rate": 7.389151502533657e-07, + "loss": 0.4854, + "num_input_tokens_seen": 30775872, + "step": 9775 + }, + { + "epoch": 0.626080276550797, + "grad_norm": 17.007144927978516, + "learning_rate": 7.378367001450819e-07, + "loss": 0.3683, + "num_input_tokens_seen": 30791424, + "step": 9780 + }, + { + "epoch": 0.6264003584917739, + "grad_norm": 55.41214370727539, + "learning_rate": 7.367585773507567e-07, + "loss": 0.4317, + "num_input_tokens_seen": 30807680, + "step": 9785 + }, + { + "epoch": 0.6267204404327508, + "grad_norm": 41.18684387207031, + "learning_rate": 7.356807832164385e-07, + "loss": 0.4428, + "num_input_tokens_seen": 30823680, + "step": 9790 + }, + { + "epoch": 0.6270405223737276, + "grad_norm": 18.37259292602539, + "learning_rate": 7.346033190877654e-07, + "loss": 0.4404, + "num_input_tokens_seen": 30839360, + "step": 9795 + }, + { + "epoch": 0.6273606043147045, + "grad_norm": 36.56877136230469, + "learning_rate": 7.335261863099651e-07, + "loss": 0.3596, + "num_input_tokens_seen": 30854784, + "step": 9800 + }, + { + "epoch": 0.6276806862556814, + "grad_norm": 31.16109275817871, + "learning_rate": 7.324493862278498e-07, + "loss": 0.3969, + "num_input_tokens_seen": 30870592, + "step": 9805 + }, + { + "epoch": 0.6280007681966584, + "grad_norm": 39.3582649230957, + "learning_rate": 7.313729201858167e-07, + "loss": 0.4546, + "num_input_tokens_seen": 30885952, + "step": 9810 + }, + { + "epoch": 0.6283208501376353, + "grad_norm": 21.64111328125, + "learning_rate": 7.302967895278473e-07, + "loss": 0.3285, + "num_input_tokens_seen": 30902080, + "step": 9815 + }, + { + "epoch": 0.6286409320786122, + "grad_norm": 29.953590393066406, + "learning_rate": 7.292209955975028e-07, + "loss": 0.4045, + "num_input_tokens_seen": 30919232, + "step": 9820 + }, + { + "epoch": 0.628961014019589, + "grad_norm": 37.44114685058594, + "learning_rate": 7.281455397379244e-07, + "loss": 0.4068, + "num_input_tokens_seen": 30936448, + "step": 9825 + }, + { + "epoch": 0.6292810959605659, + "grad_norm": 37.291465759277344, + "learning_rate": 7.270704232918316e-07, + "loss": 0.3249, + "num_input_tokens_seen": 30952256, + "step": 9830 + }, + { + "epoch": 0.6296011779015428, + "grad_norm": 53.09471130371094, + "learning_rate": 7.2599564760152e-07, + "loss": 0.401, + "num_input_tokens_seen": 30967360, + "step": 9835 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 31.090974807739258, + "learning_rate": 7.249212140088592e-07, + "loss": 0.3851, + "num_input_tokens_seen": 30982016, + "step": 9840 + }, + { + "epoch": 0.6302413417834966, + "grad_norm": 19.057065963745117, + "learning_rate": 7.23847123855293e-07, + "loss": 0.3347, + "num_input_tokens_seen": 30998080, + "step": 9845 + }, + { + "epoch": 0.6305614237244734, + "grad_norm": 22.847869873046875, + "learning_rate": 7.227733784818349e-07, + "loss": 0.274, + "num_input_tokens_seen": 31013184, + "step": 9850 + }, + { + "epoch": 0.6308815056654503, + "grad_norm": 10.46581745147705, + "learning_rate": 7.216999792290683e-07, + "loss": 0.3758, + "num_input_tokens_seen": 31028800, + "step": 9855 + }, + { + "epoch": 0.6312015876064272, + "grad_norm": 32.733524322509766, + "learning_rate": 7.206269274371457e-07, + "loss": 0.4837, + "num_input_tokens_seen": 31044736, + "step": 9860 + }, + { + "epoch": 0.6315216695474042, + "grad_norm": 16.939966201782227, + "learning_rate": 7.195542244457845e-07, + "loss": 0.3489, + "num_input_tokens_seen": 31059968, + "step": 9865 + }, + { + "epoch": 0.6318417514883811, + "grad_norm": 21.10120391845703, + "learning_rate": 7.184818715942666e-07, + "loss": 0.3215, + "num_input_tokens_seen": 31074880, + "step": 9870 + }, + { + "epoch": 0.6321618334293579, + "grad_norm": 32.21525573730469, + "learning_rate": 7.174098702214374e-07, + "loss": 0.3499, + "num_input_tokens_seen": 31090432, + "step": 9875 + }, + { + "epoch": 0.6324819153703348, + "grad_norm": 27.200403213500977, + "learning_rate": 7.163382216657033e-07, + "loss": 0.372, + "num_input_tokens_seen": 31107264, + "step": 9880 + }, + { + "epoch": 0.6328019973113117, + "grad_norm": 50.323707580566406, + "learning_rate": 7.152669272650302e-07, + "loss": 0.3531, + "num_input_tokens_seen": 31124096, + "step": 9885 + }, + { + "epoch": 0.6331220792522886, + "grad_norm": 51.96805191040039, + "learning_rate": 7.141959883569411e-07, + "loss": 0.3881, + "num_input_tokens_seen": 31138752, + "step": 9890 + }, + { + "epoch": 0.6334421611932655, + "grad_norm": 28.28093147277832, + "learning_rate": 7.131254062785165e-07, + "loss": 0.4624, + "num_input_tokens_seen": 31154048, + "step": 9895 + }, + { + "epoch": 0.6337622431342423, + "grad_norm": 26.130292892456055, + "learning_rate": 7.120551823663907e-07, + "loss": 0.5159, + "num_input_tokens_seen": 31170304, + "step": 9900 + }, + { + "epoch": 0.6340823250752192, + "grad_norm": 13.736687660217285, + "learning_rate": 7.109853179567499e-07, + "loss": 0.2778, + "num_input_tokens_seen": 31186368, + "step": 9905 + }, + { + "epoch": 0.6344024070161961, + "grad_norm": 22.524595260620117, + "learning_rate": 7.099158143853337e-07, + "loss": 0.4266, + "num_input_tokens_seen": 31201664, + "step": 9910 + }, + { + "epoch": 0.634722488957173, + "grad_norm": 48.70823287963867, + "learning_rate": 7.088466729874289e-07, + "loss": 0.396, + "num_input_tokens_seen": 31217216, + "step": 9915 + }, + { + "epoch": 0.63504257089815, + "grad_norm": 29.29494285583496, + "learning_rate": 7.077778950978713e-07, + "loss": 0.3762, + "num_input_tokens_seen": 31233728, + "step": 9920 + }, + { + "epoch": 0.6353626528391269, + "grad_norm": 17.022003173828125, + "learning_rate": 7.06709482051043e-07, + "loss": 0.4657, + "num_input_tokens_seen": 31249664, + "step": 9925 + }, + { + "epoch": 0.6356827347801037, + "grad_norm": 18.935352325439453, + "learning_rate": 7.056414351808698e-07, + "loss": 0.2958, + "num_input_tokens_seen": 31265408, + "step": 9930 + }, + { + "epoch": 0.6360028167210806, + "grad_norm": 25.703018188476562, + "learning_rate": 7.045737558208206e-07, + "loss": 0.3557, + "num_input_tokens_seen": 31281088, + "step": 9935 + }, + { + "epoch": 0.6363228986620575, + "grad_norm": 28.873281478881836, + "learning_rate": 7.035064453039064e-07, + "loss": 0.4025, + "num_input_tokens_seen": 31296512, + "step": 9940 + }, + { + "epoch": 0.6366429806030344, + "grad_norm": 14.413522720336914, + "learning_rate": 7.024395049626766e-07, + "loss": 0.3796, + "num_input_tokens_seen": 31312000, + "step": 9945 + }, + { + "epoch": 0.6369630625440112, + "grad_norm": 42.59180450439453, + "learning_rate": 7.013729361292182e-07, + "loss": 0.3378, + "num_input_tokens_seen": 31327488, + "step": 9950 + }, + { + "epoch": 0.6372831444849881, + "grad_norm": 37.88176727294922, + "learning_rate": 7.003067401351554e-07, + "loss": 0.2992, + "num_input_tokens_seen": 31343936, + "step": 9955 + }, + { + "epoch": 0.637603226425965, + "grad_norm": 69.40776062011719, + "learning_rate": 6.992409183116465e-07, + "loss": 0.3971, + "num_input_tokens_seen": 31359232, + "step": 9960 + }, + { + "epoch": 0.6379233083669419, + "grad_norm": 18.821264266967773, + "learning_rate": 6.981754719893826e-07, + "loss": 0.3715, + "num_input_tokens_seen": 31375616, + "step": 9965 + }, + { + "epoch": 0.6382433903079189, + "grad_norm": 49.44694137573242, + "learning_rate": 6.971104024985852e-07, + "loss": 0.4687, + "num_input_tokens_seen": 31391680, + "step": 9970 + }, + { + "epoch": 0.6385634722488958, + "grad_norm": 28.005033493041992, + "learning_rate": 6.960457111690068e-07, + "loss": 0.3829, + "num_input_tokens_seen": 31407424, + "step": 9975 + }, + { + "epoch": 0.6388835541898726, + "grad_norm": 18.54348373413086, + "learning_rate": 6.94981399329927e-07, + "loss": 0.3854, + "num_input_tokens_seen": 31422912, + "step": 9980 + }, + { + "epoch": 0.6392036361308495, + "grad_norm": 53.93000030517578, + "learning_rate": 6.939174683101509e-07, + "loss": 0.3806, + "num_input_tokens_seen": 31438912, + "step": 9985 + }, + { + "epoch": 0.6395237180718264, + "grad_norm": 25.696611404418945, + "learning_rate": 6.9285391943801e-07, + "loss": 0.2888, + "num_input_tokens_seen": 31455168, + "step": 9990 + }, + { + "epoch": 0.6398438000128033, + "grad_norm": 32.05419158935547, + "learning_rate": 6.917907540413569e-07, + "loss": 0.32, + "num_input_tokens_seen": 31470592, + "step": 9995 + }, + { + "epoch": 0.6401638819537802, + "grad_norm": 31.22829818725586, + "learning_rate": 6.907279734475659e-07, + "loss": 0.3466, + "num_input_tokens_seen": 31485632, + "step": 10000 + }, + { + "epoch": 0.640483963894757, + "grad_norm": 47.022422790527344, + "learning_rate": 6.896655789835317e-07, + "loss": 0.353, + "num_input_tokens_seen": 31500352, + "step": 10005 + }, + { + "epoch": 0.6408040458357339, + "grad_norm": 38.05258560180664, + "learning_rate": 6.886035719756656e-07, + "loss": 0.365, + "num_input_tokens_seen": 31516928, + "step": 10010 + }, + { + "epoch": 0.6411241277767108, + "grad_norm": 19.052812576293945, + "learning_rate": 6.875419537498959e-07, + "loss": 0.272, + "num_input_tokens_seen": 31532608, + "step": 10015 + }, + { + "epoch": 0.6414442097176877, + "grad_norm": 54.83672332763672, + "learning_rate": 6.864807256316658e-07, + "loss": 0.5903, + "num_input_tokens_seen": 31548608, + "step": 10020 + }, + { + "epoch": 0.6417642916586647, + "grad_norm": 21.3775691986084, + "learning_rate": 6.854198889459311e-07, + "loss": 0.4124, + "num_input_tokens_seen": 31564224, + "step": 10025 + }, + { + "epoch": 0.6420843735996415, + "grad_norm": 8.760631561279297, + "learning_rate": 6.84359445017158e-07, + "loss": 0.2575, + "num_input_tokens_seen": 31579200, + "step": 10030 + }, + { + "epoch": 0.6424044555406184, + "grad_norm": 46.2386589050293, + "learning_rate": 6.832993951693244e-07, + "loss": 0.4146, + "num_input_tokens_seen": 31594816, + "step": 10035 + }, + { + "epoch": 0.6427245374815953, + "grad_norm": 14.027458190917969, + "learning_rate": 6.822397407259144e-07, + "loss": 0.3439, + "num_input_tokens_seen": 31610432, + "step": 10040 + }, + { + "epoch": 0.6430446194225722, + "grad_norm": 40.68043899536133, + "learning_rate": 6.811804830099186e-07, + "loss": 0.3688, + "num_input_tokens_seen": 31627520, + "step": 10045 + }, + { + "epoch": 0.6433647013635491, + "grad_norm": 46.507389068603516, + "learning_rate": 6.801216233438336e-07, + "loss": 0.3446, + "num_input_tokens_seen": 31644352, + "step": 10050 + }, + { + "epoch": 0.6436847833045259, + "grad_norm": 30.864545822143555, + "learning_rate": 6.790631630496575e-07, + "loss": 0.3831, + "num_input_tokens_seen": 31660160, + "step": 10055 + }, + { + "epoch": 0.6440048652455028, + "grad_norm": 34.4333610534668, + "learning_rate": 6.780051034488903e-07, + "loss": 0.4395, + "num_input_tokens_seen": 31676352, + "step": 10060 + }, + { + "epoch": 0.6443249471864797, + "grad_norm": 80.57857513427734, + "learning_rate": 6.769474458625323e-07, + "loss": 0.3439, + "num_input_tokens_seen": 31692160, + "step": 10065 + }, + { + "epoch": 0.6446450291274566, + "grad_norm": 17.465198516845703, + "learning_rate": 6.758901916110813e-07, + "loss": 0.3099, + "num_input_tokens_seen": 31707712, + "step": 10070 + }, + { + "epoch": 0.6449651110684336, + "grad_norm": 16.61797523498535, + "learning_rate": 6.748333420145315e-07, + "loss": 0.3246, + "num_input_tokens_seen": 31723776, + "step": 10075 + }, + { + "epoch": 0.6452851930094105, + "grad_norm": 21.947399139404297, + "learning_rate": 6.737768983923718e-07, + "loss": 0.3972, + "num_input_tokens_seen": 31740672, + "step": 10080 + }, + { + "epoch": 0.6456052749503873, + "grad_norm": 32.63840103149414, + "learning_rate": 6.727208620635849e-07, + "loss": 0.2989, + "num_input_tokens_seen": 31755648, + "step": 10085 + }, + { + "epoch": 0.6459253568913642, + "grad_norm": 37.15324783325195, + "learning_rate": 6.716652343466446e-07, + "loss": 0.4543, + "num_input_tokens_seen": 31770624, + "step": 10090 + }, + { + "epoch": 0.6462454388323411, + "grad_norm": 40.501869201660156, + "learning_rate": 6.706100165595139e-07, + "loss": 0.3094, + "num_input_tokens_seen": 31786816, + "step": 10095 + }, + { + "epoch": 0.646565520773318, + "grad_norm": 33.34444808959961, + "learning_rate": 6.695552100196452e-07, + "loss": 0.396, + "num_input_tokens_seen": 31801792, + "step": 10100 + }, + { + "epoch": 0.6468856027142948, + "grad_norm": 66.4857177734375, + "learning_rate": 6.685008160439769e-07, + "loss": 0.5142, + "num_input_tokens_seen": 31818944, + "step": 10105 + }, + { + "epoch": 0.6472056846552717, + "grad_norm": 35.997623443603516, + "learning_rate": 6.674468359489313e-07, + "loss": 0.4128, + "num_input_tokens_seen": 31834176, + "step": 10110 + }, + { + "epoch": 0.6475257665962486, + "grad_norm": 34.614864349365234, + "learning_rate": 6.663932710504163e-07, + "loss": 0.3496, + "num_input_tokens_seen": 31850176, + "step": 10115 + }, + { + "epoch": 0.6478458485372255, + "grad_norm": 44.908058166503906, + "learning_rate": 6.653401226638192e-07, + "loss": 0.3894, + "num_input_tokens_seen": 31865600, + "step": 10120 + }, + { + "epoch": 0.6481659304782024, + "grad_norm": 23.878267288208008, + "learning_rate": 6.64287392104008e-07, + "loss": 0.3921, + "num_input_tokens_seen": 31880512, + "step": 10125 + }, + { + "epoch": 0.6484860124191794, + "grad_norm": 23.294103622436523, + "learning_rate": 6.632350806853299e-07, + "loss": 0.4388, + "num_input_tokens_seen": 31896512, + "step": 10130 + }, + { + "epoch": 0.6488060943601562, + "grad_norm": 41.51081848144531, + "learning_rate": 6.621831897216074e-07, + "loss": 0.4029, + "num_input_tokens_seen": 31912768, + "step": 10135 + }, + { + "epoch": 0.6491261763011331, + "grad_norm": 166.3177032470703, + "learning_rate": 6.611317205261387e-07, + "loss": 0.4345, + "num_input_tokens_seen": 31927488, + "step": 10140 + }, + { + "epoch": 0.64944625824211, + "grad_norm": 28.243562698364258, + "learning_rate": 6.60080674411696e-07, + "loss": 0.3416, + "num_input_tokens_seen": 31942784, + "step": 10145 + }, + { + "epoch": 0.6497663401830869, + "grad_norm": 15.446354866027832, + "learning_rate": 6.590300526905225e-07, + "loss": 0.3172, + "num_input_tokens_seen": 31958528, + "step": 10150 + }, + { + "epoch": 0.6500864221240638, + "grad_norm": 35.624691009521484, + "learning_rate": 6.579798566743313e-07, + "loss": 0.4676, + "num_input_tokens_seen": 31974016, + "step": 10155 + }, + { + "epoch": 0.6504065040650406, + "grad_norm": 41.85055160522461, + "learning_rate": 6.569300876743049e-07, + "loss": 0.3143, + "num_input_tokens_seen": 31990720, + "step": 10160 + }, + { + "epoch": 0.6507265860060175, + "grad_norm": 31.454090118408203, + "learning_rate": 6.558807470010923e-07, + "loss": 0.3188, + "num_input_tokens_seen": 32007168, + "step": 10165 + }, + { + "epoch": 0.6507906023942129, + "eval_loss": 0.37842774391174316, + "eval_runtime": 49.1741, + "eval_samples_per_second": 282.384, + "eval_steps_per_second": 35.303, + "num_input_tokens_seen": 32010176, + "step": 10166 + }, + { + "epoch": 0.6510466679469944, + "grad_norm": 29.121973037719727, + "learning_rate": 6.548318359648071e-07, + "loss": 0.3642, + "num_input_tokens_seen": 32022208, + "step": 10170 + }, + { + "epoch": 0.6513667498879713, + "grad_norm": 41.79141616821289, + "learning_rate": 6.537833558750279e-07, + "loss": 0.3967, + "num_input_tokens_seen": 32037760, + "step": 10175 + }, + { + "epoch": 0.6516868318289483, + "grad_norm": 48.895450592041016, + "learning_rate": 6.527353080407938e-07, + "loss": 0.3055, + "num_input_tokens_seen": 32052800, + "step": 10180 + }, + { + "epoch": 0.6520069137699251, + "grad_norm": 25.074914932250977, + "learning_rate": 6.516876937706048e-07, + "loss": 0.3366, + "num_input_tokens_seen": 32068288, + "step": 10185 + }, + { + "epoch": 0.652326995710902, + "grad_norm": 24.659767150878906, + "learning_rate": 6.506405143724196e-07, + "loss": 0.3758, + "num_input_tokens_seen": 32083200, + "step": 10190 + }, + { + "epoch": 0.6526470776518789, + "grad_norm": 51.243431091308594, + "learning_rate": 6.495937711536546e-07, + "loss": 0.4635, + "num_input_tokens_seen": 32098432, + "step": 10195 + }, + { + "epoch": 0.6529671595928558, + "grad_norm": 37.73176574707031, + "learning_rate": 6.485474654211803e-07, + "loss": 0.4226, + "num_input_tokens_seen": 32114944, + "step": 10200 + }, + { + "epoch": 0.6532872415338327, + "grad_norm": 38.4589958190918, + "learning_rate": 6.475015984813217e-07, + "loss": 0.3044, + "num_input_tokens_seen": 32131520, + "step": 10205 + }, + { + "epoch": 0.6536073234748095, + "grad_norm": 12.53635311126709, + "learning_rate": 6.464561716398564e-07, + "loss": 0.3158, + "num_input_tokens_seen": 32147008, + "step": 10210 + }, + { + "epoch": 0.6539274054157864, + "grad_norm": 31.261737823486328, + "learning_rate": 6.454111862020122e-07, + "loss": 0.3734, + "num_input_tokens_seen": 32162560, + "step": 10215 + }, + { + "epoch": 0.6542474873567633, + "grad_norm": 27.614994049072266, + "learning_rate": 6.443666434724649e-07, + "loss": 0.3636, + "num_input_tokens_seen": 32177024, + "step": 10220 + }, + { + "epoch": 0.6545675692977402, + "grad_norm": 25.943843841552734, + "learning_rate": 6.43322544755339e-07, + "loss": 0.5155, + "num_input_tokens_seen": 32193024, + "step": 10225 + }, + { + "epoch": 0.6548876512387171, + "grad_norm": 29.868574142456055, + "learning_rate": 6.422788913542038e-07, + "loss": 0.3365, + "num_input_tokens_seen": 32208896, + "step": 10230 + }, + { + "epoch": 0.655207733179694, + "grad_norm": 15.805740356445312, + "learning_rate": 6.412356845720726e-07, + "loss": 0.3296, + "num_input_tokens_seen": 32225280, + "step": 10235 + }, + { + "epoch": 0.6555278151206709, + "grad_norm": 16.956401824951172, + "learning_rate": 6.40192925711402e-07, + "loss": 0.3605, + "num_input_tokens_seen": 32240768, + "step": 10240 + }, + { + "epoch": 0.6558478970616478, + "grad_norm": 27.354637145996094, + "learning_rate": 6.39150616074088e-07, + "loss": 0.3264, + "num_input_tokens_seen": 32255872, + "step": 10245 + }, + { + "epoch": 0.6561679790026247, + "grad_norm": 30.604806900024414, + "learning_rate": 6.381087569614668e-07, + "loss": 0.4193, + "num_input_tokens_seen": 32272512, + "step": 10250 + }, + { + "epoch": 0.6564880609436016, + "grad_norm": 13.617461204528809, + "learning_rate": 6.370673496743116e-07, + "loss": 0.3828, + "num_input_tokens_seen": 32286272, + "step": 10255 + }, + { + "epoch": 0.6568081428845784, + "grad_norm": 25.074222564697266, + "learning_rate": 6.360263955128315e-07, + "loss": 0.4331, + "num_input_tokens_seen": 32301952, + "step": 10260 + }, + { + "epoch": 0.6571282248255553, + "grad_norm": 16.422725677490234, + "learning_rate": 6.349858957766701e-07, + "loss": 0.3602, + "num_input_tokens_seen": 32318208, + "step": 10265 + }, + { + "epoch": 0.6574483067665322, + "grad_norm": 24.204320907592773, + "learning_rate": 6.339458517649036e-07, + "loss": 0.336, + "num_input_tokens_seen": 32333504, + "step": 10270 + }, + { + "epoch": 0.6577683887075091, + "grad_norm": 32.412906646728516, + "learning_rate": 6.329062647760395e-07, + "loss": 0.3626, + "num_input_tokens_seen": 32350208, + "step": 10275 + }, + { + "epoch": 0.658088470648486, + "grad_norm": 35.1417121887207, + "learning_rate": 6.318671361080137e-07, + "loss": 0.3351, + "num_input_tokens_seen": 32365376, + "step": 10280 + }, + { + "epoch": 0.6584085525894628, + "grad_norm": 16.784576416015625, + "learning_rate": 6.308284670581906e-07, + "loss": 0.3306, + "num_input_tokens_seen": 32381248, + "step": 10285 + }, + { + "epoch": 0.6587286345304398, + "grad_norm": 27.784454345703125, + "learning_rate": 6.297902589233612e-07, + "loss": 0.4558, + "num_input_tokens_seen": 32395968, + "step": 10290 + }, + { + "epoch": 0.6590487164714167, + "grad_norm": 32.86067581176758, + "learning_rate": 6.287525129997404e-07, + "loss": 0.3737, + "num_input_tokens_seen": 32411456, + "step": 10295 + }, + { + "epoch": 0.6593687984123936, + "grad_norm": 24.458457946777344, + "learning_rate": 6.277152305829656e-07, + "loss": 0.3865, + "num_input_tokens_seen": 32426880, + "step": 10300 + }, + { + "epoch": 0.6596888803533705, + "grad_norm": 30.00528907775879, + "learning_rate": 6.266784129680968e-07, + "loss": 0.3281, + "num_input_tokens_seen": 32442368, + "step": 10305 + }, + { + "epoch": 0.6600089622943474, + "grad_norm": 37.038761138916016, + "learning_rate": 6.256420614496129e-07, + "loss": 0.3781, + "num_input_tokens_seen": 32457920, + "step": 10310 + }, + { + "epoch": 0.6603290442353242, + "grad_norm": 34.41950988769531, + "learning_rate": 6.246061773214102e-07, + "loss": 0.4085, + "num_input_tokens_seen": 32473536, + "step": 10315 + }, + { + "epoch": 0.6606491261763011, + "grad_norm": 33.36504364013672, + "learning_rate": 6.235707618768032e-07, + "loss": 0.3956, + "num_input_tokens_seen": 32490240, + "step": 10320 + }, + { + "epoch": 0.660969208117278, + "grad_norm": 63.69960021972656, + "learning_rate": 6.225358164085196e-07, + "loss": 0.3506, + "num_input_tokens_seen": 32505728, + "step": 10325 + }, + { + "epoch": 0.6612892900582549, + "grad_norm": 47.17720031738281, + "learning_rate": 6.21501342208701e-07, + "loss": 0.3521, + "num_input_tokens_seen": 32520960, + "step": 10330 + }, + { + "epoch": 0.6616093719992318, + "grad_norm": 22.675373077392578, + "learning_rate": 6.204673405689007e-07, + "loss": 0.4036, + "num_input_tokens_seen": 32535872, + "step": 10335 + }, + { + "epoch": 0.6619294539402087, + "grad_norm": 21.25689697265625, + "learning_rate": 6.194338127800823e-07, + "loss": 0.3158, + "num_input_tokens_seen": 32552448, + "step": 10340 + }, + { + "epoch": 0.6622495358811856, + "grad_norm": 35.447052001953125, + "learning_rate": 6.184007601326165e-07, + "loss": 0.3866, + "num_input_tokens_seen": 32567232, + "step": 10345 + }, + { + "epoch": 0.6625696178221625, + "grad_norm": 30.18397331237793, + "learning_rate": 6.173681839162824e-07, + "loss": 0.3515, + "num_input_tokens_seen": 32583360, + "step": 10350 + }, + { + "epoch": 0.6628896997631394, + "grad_norm": 30.015911102294922, + "learning_rate": 6.163360854202635e-07, + "loss": 0.3336, + "num_input_tokens_seen": 32598656, + "step": 10355 + }, + { + "epoch": 0.6632097817041163, + "grad_norm": 19.04948616027832, + "learning_rate": 6.153044659331461e-07, + "loss": 0.306, + "num_input_tokens_seen": 32614144, + "step": 10360 + }, + { + "epoch": 0.6635298636450931, + "grad_norm": 30.63086700439453, + "learning_rate": 6.142733267429203e-07, + "loss": 0.3687, + "num_input_tokens_seen": 32629120, + "step": 10365 + }, + { + "epoch": 0.66384994558607, + "grad_norm": 25.801145553588867, + "learning_rate": 6.132426691369748e-07, + "loss": 0.4287, + "num_input_tokens_seen": 32645952, + "step": 10370 + }, + { + "epoch": 0.6641700275270469, + "grad_norm": 12.77051067352295, + "learning_rate": 6.122124944020977e-07, + "loss": 0.3988, + "num_input_tokens_seen": 32661696, + "step": 10375 + }, + { + "epoch": 0.6644901094680238, + "grad_norm": 23.705963134765625, + "learning_rate": 6.111828038244749e-07, + "loss": 0.3753, + "num_input_tokens_seen": 32677760, + "step": 10380 + }, + { + "epoch": 0.6648101914090007, + "grad_norm": 14.284012794494629, + "learning_rate": 6.101535986896866e-07, + "loss": 0.2948, + "num_input_tokens_seen": 32693568, + "step": 10385 + }, + { + "epoch": 0.6651302733499775, + "grad_norm": 15.676067352294922, + "learning_rate": 6.091248802827076e-07, + "loss": 0.2899, + "num_input_tokens_seen": 32708736, + "step": 10390 + }, + { + "epoch": 0.6654503552909545, + "grad_norm": 20.25788688659668, + "learning_rate": 6.080966498879048e-07, + "loss": 0.3218, + "num_input_tokens_seen": 32725440, + "step": 10395 + }, + { + "epoch": 0.6657704372319314, + "grad_norm": 40.33934020996094, + "learning_rate": 6.070689087890363e-07, + "loss": 0.2962, + "num_input_tokens_seen": 32740608, + "step": 10400 + }, + { + "epoch": 0.6660905191729083, + "grad_norm": 20.199983596801758, + "learning_rate": 6.060416582692487e-07, + "loss": 0.3974, + "num_input_tokens_seen": 32756416, + "step": 10405 + }, + { + "epoch": 0.6664106011138852, + "grad_norm": 28.62371253967285, + "learning_rate": 6.05014899611076e-07, + "loss": 0.3358, + "num_input_tokens_seen": 32771904, + "step": 10410 + }, + { + "epoch": 0.666730683054862, + "grad_norm": 53.33070755004883, + "learning_rate": 6.039886340964391e-07, + "loss": 0.3724, + "num_input_tokens_seen": 32787392, + "step": 10415 + }, + { + "epoch": 0.6670507649958389, + "grad_norm": 19.005868911743164, + "learning_rate": 6.029628630066423e-07, + "loss": 0.334, + "num_input_tokens_seen": 32803136, + "step": 10420 + }, + { + "epoch": 0.6673708469368158, + "grad_norm": 30.18621063232422, + "learning_rate": 6.019375876223724e-07, + "loss": 0.4173, + "num_input_tokens_seen": 32818624, + "step": 10425 + }, + { + "epoch": 0.6676909288777927, + "grad_norm": 26.04371452331543, + "learning_rate": 6.009128092236982e-07, + "loss": 0.4672, + "num_input_tokens_seen": 32833920, + "step": 10430 + }, + { + "epoch": 0.6680110108187696, + "grad_norm": 19.67214584350586, + "learning_rate": 5.998885290900679e-07, + "loss": 0.3859, + "num_input_tokens_seen": 32848512, + "step": 10435 + }, + { + "epoch": 0.6683310927597464, + "grad_norm": 26.48846435546875, + "learning_rate": 5.988647485003061e-07, + "loss": 0.3391, + "num_input_tokens_seen": 32865088, + "step": 10440 + }, + { + "epoch": 0.6686511747007234, + "grad_norm": 61.240257263183594, + "learning_rate": 5.978414687326164e-07, + "loss": 0.4559, + "num_input_tokens_seen": 32882048, + "step": 10445 + }, + { + "epoch": 0.6689712566417003, + "grad_norm": 28.02547836303711, + "learning_rate": 5.968186910645745e-07, + "loss": 0.365, + "num_input_tokens_seen": 32898624, + "step": 10450 + }, + { + "epoch": 0.6692913385826772, + "grad_norm": 33.85887145996094, + "learning_rate": 5.957964167731305e-07, + "loss": 0.505, + "num_input_tokens_seen": 32914176, + "step": 10455 + }, + { + "epoch": 0.6696114205236541, + "grad_norm": 40.76100158691406, + "learning_rate": 5.947746471346065e-07, + "loss": 0.4068, + "num_input_tokens_seen": 32931136, + "step": 10460 + }, + { + "epoch": 0.669931502464631, + "grad_norm": 47.47494888305664, + "learning_rate": 5.937533834246932e-07, + "loss": 0.3349, + "num_input_tokens_seen": 32947648, + "step": 10465 + }, + { + "epoch": 0.6702515844056078, + "grad_norm": 24.66529083251953, + "learning_rate": 5.927326269184504e-07, + "loss": 0.3745, + "num_input_tokens_seen": 32964224, + "step": 10470 + }, + { + "epoch": 0.6705716663465847, + "grad_norm": 45.57734680175781, + "learning_rate": 5.917123788903049e-07, + "loss": 0.4498, + "num_input_tokens_seen": 32982080, + "step": 10475 + }, + { + "epoch": 0.6708917482875616, + "grad_norm": 37.37847137451172, + "learning_rate": 5.906926406140484e-07, + "loss": 0.4642, + "num_input_tokens_seen": 32997440, + "step": 10480 + }, + { + "epoch": 0.6712118302285385, + "grad_norm": 37.47283935546875, + "learning_rate": 5.896734133628354e-07, + "loss": 0.4298, + "num_input_tokens_seen": 33013056, + "step": 10485 + }, + { + "epoch": 0.6715319121695154, + "grad_norm": 24.019437789916992, + "learning_rate": 5.886546984091838e-07, + "loss": 0.3736, + "num_input_tokens_seen": 33028416, + "step": 10490 + }, + { + "epoch": 0.6718519941104922, + "grad_norm": 29.65847396850586, + "learning_rate": 5.876364970249711e-07, + "loss": 0.3415, + "num_input_tokens_seen": 33042880, + "step": 10495 + }, + { + "epoch": 0.6721720760514692, + "grad_norm": 33.120933532714844, + "learning_rate": 5.866188104814336e-07, + "loss": 0.2735, + "num_input_tokens_seen": 33058240, + "step": 10500 + }, + { + "epoch": 0.6724921579924461, + "grad_norm": 16.05461883544922, + "learning_rate": 5.856016400491646e-07, + "loss": 0.3792, + "num_input_tokens_seen": 33073920, + "step": 10505 + }, + { + "epoch": 0.672812239933423, + "grad_norm": 8.638588905334473, + "learning_rate": 5.845849869981136e-07, + "loss": 0.3192, + "num_input_tokens_seen": 33089344, + "step": 10510 + }, + { + "epoch": 0.6731323218743999, + "grad_norm": 19.994060516357422, + "learning_rate": 5.835688525975842e-07, + "loss": 0.3458, + "num_input_tokens_seen": 33104384, + "step": 10515 + }, + { + "epoch": 0.6734524038153767, + "grad_norm": 22.6699275970459, + "learning_rate": 5.825532381162311e-07, + "loss": 0.3931, + "num_input_tokens_seen": 33120064, + "step": 10520 + }, + { + "epoch": 0.6737724857563536, + "grad_norm": 22.203550338745117, + "learning_rate": 5.815381448220619e-07, + "loss": 0.3866, + "num_input_tokens_seen": 33136128, + "step": 10525 + }, + { + "epoch": 0.6740925676973305, + "grad_norm": 22.488792419433594, + "learning_rate": 5.805235739824327e-07, + "loss": 0.3452, + "num_input_tokens_seen": 33154816, + "step": 10530 + }, + { + "epoch": 0.6744126496383074, + "grad_norm": 37.78539276123047, + "learning_rate": 5.795095268640458e-07, + "loss": 0.5023, + "num_input_tokens_seen": 33169920, + "step": 10535 + }, + { + "epoch": 0.6747327315792843, + "grad_norm": 35.91427230834961, + "learning_rate": 5.784960047329519e-07, + "loss": 0.541, + "num_input_tokens_seen": 33187712, + "step": 10540 + }, + { + "epoch": 0.6750528135202611, + "grad_norm": 14.399126052856445, + "learning_rate": 5.774830088545452e-07, + "loss": 0.3866, + "num_input_tokens_seen": 33202880, + "step": 10545 + }, + { + "epoch": 0.6753728954612381, + "grad_norm": 15.00992202758789, + "learning_rate": 5.76470540493563e-07, + "loss": 0.2997, + "num_input_tokens_seen": 33218944, + "step": 10550 + }, + { + "epoch": 0.675692977402215, + "grad_norm": 27.697614669799805, + "learning_rate": 5.754586009140836e-07, + "loss": 0.4652, + "num_input_tokens_seen": 33234688, + "step": 10555 + }, + { + "epoch": 0.6760130593431919, + "grad_norm": 48.48150634765625, + "learning_rate": 5.744471913795256e-07, + "loss": 0.3679, + "num_input_tokens_seen": 33249920, + "step": 10560 + }, + { + "epoch": 0.6763331412841688, + "grad_norm": 34.35981369018555, + "learning_rate": 5.734363131526459e-07, + "loss": 0.3365, + "num_input_tokens_seen": 33265792, + "step": 10565 + }, + { + "epoch": 0.6766532232251457, + "grad_norm": 37.82774353027344, + "learning_rate": 5.724259674955377e-07, + "loss": 0.3742, + "num_input_tokens_seen": 33280832, + "step": 10570 + }, + { + "epoch": 0.6769733051661225, + "grad_norm": 28.590476989746094, + "learning_rate": 5.714161556696291e-07, + "loss": 0.3888, + "num_input_tokens_seen": 33296576, + "step": 10575 + }, + { + "epoch": 0.6772933871070994, + "grad_norm": 42.37991714477539, + "learning_rate": 5.704068789356824e-07, + "loss": 0.3388, + "num_input_tokens_seen": 33316672, + "step": 10580 + }, + { + "epoch": 0.6776134690480763, + "grad_norm": 28.075489044189453, + "learning_rate": 5.693981385537912e-07, + "loss": 0.3496, + "num_input_tokens_seen": 33331456, + "step": 10585 + }, + { + "epoch": 0.6779335509890532, + "grad_norm": 24.502607345581055, + "learning_rate": 5.683899357833801e-07, + "loss": 0.3447, + "num_input_tokens_seen": 33346752, + "step": 10590 + }, + { + "epoch": 0.67825363293003, + "grad_norm": 34.75849151611328, + "learning_rate": 5.673822718832015e-07, + "loss": 0.455, + "num_input_tokens_seen": 33362688, + "step": 10595 + }, + { + "epoch": 0.6785737148710069, + "grad_norm": 40.48807144165039, + "learning_rate": 5.663751481113362e-07, + "loss": 0.3697, + "num_input_tokens_seen": 33377600, + "step": 10600 + }, + { + "epoch": 0.6788937968119839, + "grad_norm": 24.50969696044922, + "learning_rate": 5.653685657251896e-07, + "loss": 0.4282, + "num_input_tokens_seen": 33393280, + "step": 10605 + }, + { + "epoch": 0.6792138787529608, + "grad_norm": 41.67803192138672, + "learning_rate": 5.643625259814922e-07, + "loss": 0.3746, + "num_input_tokens_seen": 33410112, + "step": 10610 + }, + { + "epoch": 0.6795339606939377, + "grad_norm": 17.74659538269043, + "learning_rate": 5.633570301362953e-07, + "loss": 0.3664, + "num_input_tokens_seen": 33426624, + "step": 10615 + }, + { + "epoch": 0.6798540426349146, + "grad_norm": 37.33218002319336, + "learning_rate": 5.623520794449739e-07, + "loss": 0.36, + "num_input_tokens_seen": 33442240, + "step": 10620 + }, + { + "epoch": 0.6801741245758914, + "grad_norm": 36.54777908325195, + "learning_rate": 5.613476751622195e-07, + "loss": 0.4713, + "num_input_tokens_seen": 33458432, + "step": 10625 + }, + { + "epoch": 0.6804942065168683, + "grad_norm": 26.362565994262695, + "learning_rate": 5.603438185420426e-07, + "loss": 0.4368, + "num_input_tokens_seen": 33473856, + "step": 10630 + }, + { + "epoch": 0.6808142884578452, + "grad_norm": 58.49364471435547, + "learning_rate": 5.593405108377714e-07, + "loss": 0.4714, + "num_input_tokens_seen": 33489216, + "step": 10635 + }, + { + "epoch": 0.6811343703988221, + "grad_norm": 23.39803695678711, + "learning_rate": 5.583377533020457e-07, + "loss": 0.4586, + "num_input_tokens_seen": 33505280, + "step": 10640 + }, + { + "epoch": 0.681454452339799, + "grad_norm": 40.31536865234375, + "learning_rate": 5.573355471868201e-07, + "loss": 0.2834, + "num_input_tokens_seen": 33520512, + "step": 10645 + }, + { + "epoch": 0.6817745342807758, + "grad_norm": 24.481168746948242, + "learning_rate": 5.563338937433621e-07, + "loss": 0.3532, + "num_input_tokens_seen": 33537344, + "step": 10650 + }, + { + "epoch": 0.6820946162217527, + "grad_norm": 15.533621788024902, + "learning_rate": 5.553327942222472e-07, + "loss": 0.2438, + "num_input_tokens_seen": 33552128, + "step": 10655 + }, + { + "epoch": 0.6824146981627297, + "grad_norm": 26.63052749633789, + "learning_rate": 5.54332249873359e-07, + "loss": 0.3547, + "num_input_tokens_seen": 33566784, + "step": 10660 + }, + { + "epoch": 0.6827347801037066, + "grad_norm": 21.95829963684082, + "learning_rate": 5.533322619458896e-07, + "loss": 0.3052, + "num_input_tokens_seen": 33582080, + "step": 10665 + }, + { + "epoch": 0.6830548620446835, + "grad_norm": 45.98701477050781, + "learning_rate": 5.52332831688336e-07, + "loss": 0.4079, + "num_input_tokens_seen": 33596864, + "step": 10670 + }, + { + "epoch": 0.6833749439856603, + "grad_norm": 79.64530944824219, + "learning_rate": 5.513339603484981e-07, + "loss": 0.3454, + "num_input_tokens_seen": 33613056, + "step": 10675 + }, + { + "epoch": 0.6836950259266372, + "grad_norm": 69.9050064086914, + "learning_rate": 5.503356491734785e-07, + "loss": 0.5049, + "num_input_tokens_seen": 33628160, + "step": 10680 + }, + { + "epoch": 0.6840151078676141, + "grad_norm": 18.264413833618164, + "learning_rate": 5.493378994096806e-07, + "loss": 0.4346, + "num_input_tokens_seen": 33645184, + "step": 10685 + }, + { + "epoch": 0.684335189808591, + "grad_norm": 18.561819076538086, + "learning_rate": 5.483407123028067e-07, + "loss": 0.3909, + "num_input_tokens_seen": 33660800, + "step": 10690 + }, + { + "epoch": 0.6846552717495679, + "grad_norm": 38.80720138549805, + "learning_rate": 5.473440890978566e-07, + "loss": 0.4766, + "num_input_tokens_seen": 33676736, + "step": 10695 + }, + { + "epoch": 0.6849753536905447, + "grad_norm": 25.19498634338379, + "learning_rate": 5.463480310391261e-07, + "loss": 0.4079, + "num_input_tokens_seen": 33692928, + "step": 10700 + }, + { + "epoch": 0.6852954356315216, + "grad_norm": 23.25238800048828, + "learning_rate": 5.453525393702052e-07, + "loss": 0.3839, + "num_input_tokens_seen": 33708352, + "step": 10705 + }, + { + "epoch": 0.6856155175724986, + "grad_norm": 32.19915771484375, + "learning_rate": 5.443576153339771e-07, + "loss": 0.3644, + "num_input_tokens_seen": 33723968, + "step": 10710 + }, + { + "epoch": 0.6859355995134755, + "grad_norm": 46.10927963256836, + "learning_rate": 5.433632601726159e-07, + "loss": 0.3272, + "num_input_tokens_seen": 33739200, + "step": 10715 + }, + { + "epoch": 0.6862556814544524, + "grad_norm": 33.03512191772461, + "learning_rate": 5.42369475127586e-07, + "loss": 0.3404, + "num_input_tokens_seen": 33754944, + "step": 10720 + }, + { + "epoch": 0.6865757633954293, + "grad_norm": 60.13679504394531, + "learning_rate": 5.413762614396396e-07, + "loss": 0.4709, + "num_input_tokens_seen": 33769472, + "step": 10725 + }, + { + "epoch": 0.6868958453364061, + "grad_norm": 33.97296142578125, + "learning_rate": 5.403836203488157e-07, + "loss": 0.4262, + "num_input_tokens_seen": 33784896, + "step": 10730 + }, + { + "epoch": 0.687215927277383, + "grad_norm": 18.200382232666016, + "learning_rate": 5.393915530944382e-07, + "loss": 0.3638, + "num_input_tokens_seen": 33800320, + "step": 10735 + }, + { + "epoch": 0.6875360092183599, + "grad_norm": 24.23163414001465, + "learning_rate": 5.384000609151145e-07, + "loss": 0.3765, + "num_input_tokens_seen": 33816896, + "step": 10740 + }, + { + "epoch": 0.6878560911593368, + "grad_norm": 21.162240982055664, + "learning_rate": 5.374091450487353e-07, + "loss": 0.3763, + "num_input_tokens_seen": 33833344, + "step": 10745 + }, + { + "epoch": 0.6881761731003136, + "grad_norm": 29.74762535095215, + "learning_rate": 5.364188067324693e-07, + "loss": 0.3352, + "num_input_tokens_seen": 33849856, + "step": 10750 + }, + { + "epoch": 0.6884962550412905, + "grad_norm": 13.640717506408691, + "learning_rate": 5.354290472027659e-07, + "loss": 0.3441, + "num_input_tokens_seen": 33865344, + "step": 10755 + }, + { + "epoch": 0.6888163369822674, + "grad_norm": 71.6620864868164, + "learning_rate": 5.344398676953525e-07, + "loss": 0.4955, + "num_input_tokens_seen": 33881792, + "step": 10760 + }, + { + "epoch": 0.6891364189232444, + "grad_norm": 31.854103088378906, + "learning_rate": 5.334512694452303e-07, + "loss": 0.4902, + "num_input_tokens_seen": 33898368, + "step": 10765 + }, + { + "epoch": 0.6894565008642213, + "grad_norm": 22.185178756713867, + "learning_rate": 5.324632536866755e-07, + "loss": 0.3489, + "num_input_tokens_seen": 33914368, + "step": 10770 + }, + { + "epoch": 0.6897765828051982, + "grad_norm": 40.81916046142578, + "learning_rate": 5.314758216532386e-07, + "loss": 0.3526, + "num_input_tokens_seen": 33929728, + "step": 10775 + }, + { + "epoch": 0.690096664746175, + "grad_norm": 20.197229385375977, + "learning_rate": 5.304889745777396e-07, + "loss": 0.3743, + "num_input_tokens_seen": 33944704, + "step": 10780 + }, + { + "epoch": 0.6904167466871519, + "grad_norm": 31.70199203491211, + "learning_rate": 5.295027136922678e-07, + "loss": 0.6418, + "num_input_tokens_seen": 33960128, + "step": 10785 + }, + { + "epoch": 0.6907368286281288, + "grad_norm": 22.89275360107422, + "learning_rate": 5.285170402281827e-07, + "loss": 0.4207, + "num_input_tokens_seen": 33975104, + "step": 10790 + }, + { + "epoch": 0.6910569105691057, + "grad_norm": 33.831241607666016, + "learning_rate": 5.275319554161087e-07, + "loss": 0.4588, + "num_input_tokens_seen": 33990720, + "step": 10795 + }, + { + "epoch": 0.6913769925100826, + "grad_norm": 31.06147575378418, + "learning_rate": 5.265474604859356e-07, + "loss": 0.4123, + "num_input_tokens_seen": 34006272, + "step": 10800 + }, + { + "epoch": 0.6916970744510594, + "grad_norm": 26.169334411621094, + "learning_rate": 5.255635566668171e-07, + "loss": 0.3902, + "num_input_tokens_seen": 34022400, + "step": 10805 + }, + { + "epoch": 0.6920171563920363, + "grad_norm": 22.71941566467285, + "learning_rate": 5.245802451871686e-07, + "loss": 0.3704, + "num_input_tokens_seen": 34038720, + "step": 10810 + }, + { + "epoch": 0.6923372383330133, + "grad_norm": 23.15312957763672, + "learning_rate": 5.235975272746663e-07, + "loss": 0.4316, + "num_input_tokens_seen": 34053760, + "step": 10815 + }, + { + "epoch": 0.6926573202739902, + "grad_norm": 22.503173828125, + "learning_rate": 5.226154041562442e-07, + "loss": 0.3024, + "num_input_tokens_seen": 34069568, + "step": 10820 + }, + { + "epoch": 0.6929774022149671, + "grad_norm": 23.336326599121094, + "learning_rate": 5.216338770580953e-07, + "loss": 0.406, + "num_input_tokens_seen": 34086912, + "step": 10825 + }, + { + "epoch": 0.6932974841559439, + "grad_norm": 22.208585739135742, + "learning_rate": 5.206529472056678e-07, + "loss": 0.3649, + "num_input_tokens_seen": 34101696, + "step": 10830 + }, + { + "epoch": 0.6936175660969208, + "grad_norm": 15.775872230529785, + "learning_rate": 5.196726158236637e-07, + "loss": 0.3168, + "num_input_tokens_seen": 34115904, + "step": 10835 + }, + { + "epoch": 0.6939376480378977, + "grad_norm": 23.13541603088379, + "learning_rate": 5.186928841360384e-07, + "loss": 0.3372, + "num_input_tokens_seen": 34131328, + "step": 10840 + }, + { + "epoch": 0.6942577299788746, + "grad_norm": 29.86430549621582, + "learning_rate": 5.177137533659985e-07, + "loss": 0.4395, + "num_input_tokens_seen": 34148544, + "step": 10845 + }, + { + "epoch": 0.6945778119198515, + "grad_norm": 20.5509033203125, + "learning_rate": 5.167352247360002e-07, + "loss": 0.4564, + "num_input_tokens_seen": 34163520, + "step": 10850 + }, + { + "epoch": 0.6948978938608283, + "grad_norm": 27.466720581054688, + "learning_rate": 5.157572994677479e-07, + "loss": 0.3993, + "num_input_tokens_seen": 34178368, + "step": 10855 + }, + { + "epoch": 0.6952179758018052, + "grad_norm": 32.89216232299805, + "learning_rate": 5.147799787821929e-07, + "loss": 0.4055, + "num_input_tokens_seen": 34193920, + "step": 10860 + }, + { + "epoch": 0.6955380577427821, + "grad_norm": 37.79446792602539, + "learning_rate": 5.138032638995315e-07, + "loss": 0.485, + "num_input_tokens_seen": 34210176, + "step": 10865 + }, + { + "epoch": 0.6958581396837591, + "grad_norm": 53.44511032104492, + "learning_rate": 5.128271560392037e-07, + "loss": 0.3575, + "num_input_tokens_seen": 34227328, + "step": 10870 + }, + { + "epoch": 0.696178221624736, + "grad_norm": 32.73928451538086, + "learning_rate": 5.118516564198916e-07, + "loss": 0.3901, + "num_input_tokens_seen": 34241984, + "step": 10875 + }, + { + "epoch": 0.6964983035657129, + "grad_norm": 23.722578048706055, + "learning_rate": 5.108767662595175e-07, + "loss": 0.3371, + "num_input_tokens_seen": 34256896, + "step": 10880 + }, + { + "epoch": 0.6968183855066897, + "grad_norm": 20.10529899597168, + "learning_rate": 5.099024867752446e-07, + "loss": 0.3824, + "num_input_tokens_seen": 34273792, + "step": 10885 + }, + { + "epoch": 0.6971384674476666, + "grad_norm": 33.20995330810547, + "learning_rate": 5.089288191834709e-07, + "loss": 0.3219, + "num_input_tokens_seen": 34290752, + "step": 10890 + }, + { + "epoch": 0.6974585493886435, + "grad_norm": 32.262474060058594, + "learning_rate": 5.079557646998318e-07, + "loss": 0.3367, + "num_input_tokens_seen": 34308416, + "step": 10895 + }, + { + "epoch": 0.6977786313296204, + "grad_norm": 13.212915420532227, + "learning_rate": 5.069833245391981e-07, + "loss": 0.403, + "num_input_tokens_seen": 34323776, + "step": 10900 + }, + { + "epoch": 0.6980987132705972, + "grad_norm": 24.54563331604004, + "learning_rate": 5.060114999156728e-07, + "loss": 0.322, + "num_input_tokens_seen": 34338944, + "step": 10905 + }, + { + "epoch": 0.6984187952115741, + "grad_norm": 37.85472869873047, + "learning_rate": 5.050402920425895e-07, + "loss": 0.3462, + "num_input_tokens_seen": 34354432, + "step": 10910 + }, + { + "epoch": 0.698738877152551, + "grad_norm": 17.395889282226562, + "learning_rate": 5.040697021325128e-07, + "loss": 0.2526, + "num_input_tokens_seen": 34370432, + "step": 10915 + }, + { + "epoch": 0.699058959093528, + "grad_norm": 32.64187240600586, + "learning_rate": 5.030997313972361e-07, + "loss": 0.437, + "num_input_tokens_seen": 34386496, + "step": 10920 + }, + { + "epoch": 0.6993790410345049, + "grad_norm": 19.760494232177734, + "learning_rate": 5.021303810477795e-07, + "loss": 0.368, + "num_input_tokens_seen": 34402560, + "step": 10925 + }, + { + "epoch": 0.6996991229754818, + "grad_norm": 16.46942710876465, + "learning_rate": 5.011616522943869e-07, + "loss": 0.2859, + "num_input_tokens_seen": 34418496, + "step": 10930 + }, + { + "epoch": 0.7000192049164586, + "grad_norm": 50.63234329223633, + "learning_rate": 5.001935463465289e-07, + "loss": 0.2731, + "num_input_tokens_seen": 34434752, + "step": 10935 + }, + { + "epoch": 0.7003392868574355, + "grad_norm": 22.748510360717773, + "learning_rate": 4.99226064412897e-07, + "loss": 0.3965, + "num_input_tokens_seen": 34450176, + "step": 10940 + }, + { + "epoch": 0.7006593687984124, + "grad_norm": 18.267223358154297, + "learning_rate": 4.982592077014026e-07, + "loss": 0.4233, + "num_input_tokens_seen": 34465600, + "step": 10945 + }, + { + "epoch": 0.7008514179629985, + "eval_loss": 0.37222641706466675, + "eval_runtime": 49.2115, + "eval_samples_per_second": 282.17, + "eval_steps_per_second": 35.276, + "num_input_tokens_seen": 34475136, + "step": 10948 + }, + { + "epoch": 0.7009794507393893, + "grad_norm": 38.11653518676758, + "learning_rate": 4.97292977419179e-07, + "loss": 0.3026, + "num_input_tokens_seen": 34481600, + "step": 10950 + }, + { + "epoch": 0.7012995326803662, + "grad_norm": 19.48086166381836, + "learning_rate": 4.963273747725755e-07, + "loss": 0.2954, + "num_input_tokens_seen": 34498752, + "step": 10955 + }, + { + "epoch": 0.701619614621343, + "grad_norm": 26.763914108276367, + "learning_rate": 4.953624009671582e-07, + "loss": 0.4061, + "num_input_tokens_seen": 34514240, + "step": 10960 + }, + { + "epoch": 0.7019396965623199, + "grad_norm": 44.18442153930664, + "learning_rate": 4.943980572077086e-07, + "loss": 0.4161, + "num_input_tokens_seen": 34528704, + "step": 10965 + }, + { + "epoch": 0.7022597785032968, + "grad_norm": 38.56117630004883, + "learning_rate": 4.934343446982209e-07, + "loss": 0.3243, + "num_input_tokens_seen": 34544704, + "step": 10970 + }, + { + "epoch": 0.7025798604442738, + "grad_norm": 13.776517868041992, + "learning_rate": 4.924712646419016e-07, + "loss": 0.3698, + "num_input_tokens_seen": 34560000, + "step": 10975 + }, + { + "epoch": 0.7028999423852507, + "grad_norm": 70.76254272460938, + "learning_rate": 4.915088182411674e-07, + "loss": 0.3211, + "num_input_tokens_seen": 34575296, + "step": 10980 + }, + { + "epoch": 0.7032200243262275, + "grad_norm": 33.83591842651367, + "learning_rate": 4.905470066976439e-07, + "loss": 0.3715, + "num_input_tokens_seen": 34590528, + "step": 10985 + }, + { + "epoch": 0.7035401062672044, + "grad_norm": 37.384647369384766, + "learning_rate": 4.895858312121644e-07, + "loss": 0.4187, + "num_input_tokens_seen": 34605312, + "step": 10990 + }, + { + "epoch": 0.7038601882081813, + "grad_norm": 24.66256332397461, + "learning_rate": 4.886252929847674e-07, + "loss": 0.4337, + "num_input_tokens_seen": 34620736, + "step": 10995 + }, + { + "epoch": 0.7041802701491582, + "grad_norm": 42.17767333984375, + "learning_rate": 4.876653932146963e-07, + "loss": 0.4578, + "num_input_tokens_seen": 34636736, + "step": 11000 + }, + { + "epoch": 0.7045003520901351, + "grad_norm": 31.28046417236328, + "learning_rate": 4.86706133100397e-07, + "loss": 0.3782, + "num_input_tokens_seen": 34651776, + "step": 11005 + }, + { + "epoch": 0.7048204340311119, + "grad_norm": 52.68522644042969, + "learning_rate": 4.857475138395178e-07, + "loss": 0.2923, + "num_input_tokens_seen": 34666176, + "step": 11010 + }, + { + "epoch": 0.7051405159720888, + "grad_norm": 15.296350479125977, + "learning_rate": 4.847895366289054e-07, + "loss": 0.2529, + "num_input_tokens_seen": 34682112, + "step": 11015 + }, + { + "epoch": 0.7054605979130657, + "grad_norm": 32.735904693603516, + "learning_rate": 4.838322026646057e-07, + "loss": 0.3828, + "num_input_tokens_seen": 34697024, + "step": 11020 + }, + { + "epoch": 0.7057806798540426, + "grad_norm": 20.01278305053711, + "learning_rate": 4.82875513141861e-07, + "loss": 0.3577, + "num_input_tokens_seen": 34712704, + "step": 11025 + }, + { + "epoch": 0.7061007617950196, + "grad_norm": 29.205598831176758, + "learning_rate": 4.819194692551106e-07, + "loss": 0.3791, + "num_input_tokens_seen": 34728256, + "step": 11030 + }, + { + "epoch": 0.7064208437359965, + "grad_norm": 16.80168914794922, + "learning_rate": 4.809640721979855e-07, + "loss": 0.4268, + "num_input_tokens_seen": 34744512, + "step": 11035 + }, + { + "epoch": 0.7067409256769733, + "grad_norm": 47.780738830566406, + "learning_rate": 4.8000932316331e-07, + "loss": 0.4158, + "num_input_tokens_seen": 34758912, + "step": 11040 + }, + { + "epoch": 0.7070610076179502, + "grad_norm": 29.31734848022461, + "learning_rate": 4.790552233431002e-07, + "loss": 0.4037, + "num_input_tokens_seen": 34774848, + "step": 11045 + }, + { + "epoch": 0.7073810895589271, + "grad_norm": 34.01865005493164, + "learning_rate": 4.781017739285611e-07, + "loss": 0.4168, + "num_input_tokens_seen": 34790016, + "step": 11050 + }, + { + "epoch": 0.707701171499904, + "grad_norm": 13.347481727600098, + "learning_rate": 4.771489761100842e-07, + "loss": 0.3453, + "num_input_tokens_seen": 34804992, + "step": 11055 + }, + { + "epoch": 0.7080212534408808, + "grad_norm": 40.918357849121094, + "learning_rate": 4.761968310772501e-07, + "loss": 0.2687, + "num_input_tokens_seen": 34820288, + "step": 11060 + }, + { + "epoch": 0.7083413353818577, + "grad_norm": 40.15391540527344, + "learning_rate": 4.7524534001882267e-07, + "loss": 0.2718, + "num_input_tokens_seen": 34836096, + "step": 11065 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 28.034465789794922, + "learning_rate": 4.7429450412274897e-07, + "loss": 0.394, + "num_input_tokens_seen": 34851584, + "step": 11070 + }, + { + "epoch": 0.7089814992638115, + "grad_norm": 23.965686798095703, + "learning_rate": 4.733443245761596e-07, + "loss": 0.3458, + "num_input_tokens_seen": 34868032, + "step": 11075 + }, + { + "epoch": 0.7093015812047885, + "grad_norm": 25.30048179626465, + "learning_rate": 4.723948025653646e-07, + "loss": 0.3821, + "num_input_tokens_seen": 34884032, + "step": 11080 + }, + { + "epoch": 0.7096216631457654, + "grad_norm": 29.63812828063965, + "learning_rate": 4.714459392758534e-07, + "loss": 0.3254, + "num_input_tokens_seen": 34899456, + "step": 11085 + }, + { + "epoch": 0.7099417450867422, + "grad_norm": 51.972572326660156, + "learning_rate": 4.70497735892293e-07, + "loss": 0.3735, + "num_input_tokens_seen": 34915456, + "step": 11090 + }, + { + "epoch": 0.7102618270277191, + "grad_norm": 16.07594871520996, + "learning_rate": 4.695501935985263e-07, + "loss": 0.3331, + "num_input_tokens_seen": 34931328, + "step": 11095 + }, + { + "epoch": 0.710581908968696, + "grad_norm": 34.51850128173828, + "learning_rate": 4.686033135775711e-07, + "loss": 0.3999, + "num_input_tokens_seen": 34946816, + "step": 11100 + }, + { + "epoch": 0.7109019909096729, + "grad_norm": 25.590112686157227, + "learning_rate": 4.6765709701161817e-07, + "loss": 0.3245, + "num_input_tokens_seen": 34964544, + "step": 11105 + }, + { + "epoch": 0.7112220728506498, + "grad_norm": 66.03004455566406, + "learning_rate": 4.6671154508203003e-07, + "loss": 0.3996, + "num_input_tokens_seen": 34982208, + "step": 11110 + }, + { + "epoch": 0.7115421547916266, + "grad_norm": 42.14921188354492, + "learning_rate": 4.657666589693393e-07, + "loss": 0.3439, + "num_input_tokens_seen": 35000576, + "step": 11115 + }, + { + "epoch": 0.7118622367326035, + "grad_norm": 26.2552433013916, + "learning_rate": 4.6482243985324753e-07, + "loss": 0.3145, + "num_input_tokens_seen": 35014912, + "step": 11120 + }, + { + "epoch": 0.7121823186735804, + "grad_norm": 28.899272918701172, + "learning_rate": 4.638788889126232e-07, + "loss": 0.2914, + "num_input_tokens_seen": 35029632, + "step": 11125 + }, + { + "epoch": 0.7125024006145573, + "grad_norm": 27.084138870239258, + "learning_rate": 4.6293600732550085e-07, + "loss": 0.3239, + "num_input_tokens_seen": 35044992, + "step": 11130 + }, + { + "epoch": 0.7128224825555343, + "grad_norm": 16.42285919189453, + "learning_rate": 4.619937962690792e-07, + "loss": 0.4686, + "num_input_tokens_seen": 35060544, + "step": 11135 + }, + { + "epoch": 0.7131425644965111, + "grad_norm": 57.51594924926758, + "learning_rate": 4.610522569197197e-07, + "loss": 0.5105, + "num_input_tokens_seen": 35075648, + "step": 11140 + }, + { + "epoch": 0.713462646437488, + "grad_norm": 20.691587448120117, + "learning_rate": 4.6011139045294554e-07, + "loss": 0.3294, + "num_input_tokens_seen": 35090880, + "step": 11145 + }, + { + "epoch": 0.7137827283784649, + "grad_norm": 99.84747314453125, + "learning_rate": 4.59171198043439e-07, + "loss": 0.3904, + "num_input_tokens_seen": 35106432, + "step": 11150 + }, + { + "epoch": 0.7141028103194418, + "grad_norm": 28.633445739746094, + "learning_rate": 4.582316808650424e-07, + "loss": 0.4349, + "num_input_tokens_seen": 35121664, + "step": 11155 + }, + { + "epoch": 0.7144228922604187, + "grad_norm": 42.922950744628906, + "learning_rate": 4.572928400907529e-07, + "loss": 0.491, + "num_input_tokens_seen": 35137152, + "step": 11160 + }, + { + "epoch": 0.7147429742013955, + "grad_norm": 38.647911071777344, + "learning_rate": 4.5635467689272434e-07, + "loss": 0.3682, + "num_input_tokens_seen": 35153088, + "step": 11165 + }, + { + "epoch": 0.7150630561423724, + "grad_norm": 22.412986755371094, + "learning_rate": 4.554171924422655e-07, + "loss": 0.3654, + "num_input_tokens_seen": 35168192, + "step": 11170 + }, + { + "epoch": 0.7153831380833493, + "grad_norm": 20.65825653076172, + "learning_rate": 4.544803879098356e-07, + "loss": 0.3242, + "num_input_tokens_seen": 35184192, + "step": 11175 + }, + { + "epoch": 0.7157032200243262, + "grad_norm": 23.79654884338379, + "learning_rate": 4.535442644650462e-07, + "loss": 0.3848, + "num_input_tokens_seen": 35200256, + "step": 11180 + }, + { + "epoch": 0.7160233019653032, + "grad_norm": 24.546035766601562, + "learning_rate": 4.5260882327665906e-07, + "loss": 0.4889, + "num_input_tokens_seen": 35214720, + "step": 11185 + }, + { + "epoch": 0.71634338390628, + "grad_norm": 38.554954528808594, + "learning_rate": 4.5167406551258347e-07, + "loss": 0.5077, + "num_input_tokens_seen": 35230720, + "step": 11190 + }, + { + "epoch": 0.7166634658472569, + "grad_norm": 29.644372940063477, + "learning_rate": 4.5073999233987445e-07, + "loss": 0.3948, + "num_input_tokens_seen": 35246400, + "step": 11195 + }, + { + "epoch": 0.7169835477882338, + "grad_norm": 30.197397232055664, + "learning_rate": 4.4980660492473434e-07, + "loss": 0.4854, + "num_input_tokens_seen": 35262784, + "step": 11200 + }, + { + "epoch": 0.7173036297292107, + "grad_norm": 14.077301025390625, + "learning_rate": 4.4887390443250804e-07, + "loss": 0.2735, + "num_input_tokens_seen": 35277632, + "step": 11205 + }, + { + "epoch": 0.7176237116701876, + "grad_norm": 18.285058975219727, + "learning_rate": 4.4794189202768295e-07, + "loss": 0.2981, + "num_input_tokens_seen": 35292544, + "step": 11210 + }, + { + "epoch": 0.7179437936111644, + "grad_norm": 32.815086364746094, + "learning_rate": 4.4701056887388757e-07, + "loss": 0.3816, + "num_input_tokens_seen": 35308352, + "step": 11215 + }, + { + "epoch": 0.7182638755521413, + "grad_norm": 31.119327545166016, + "learning_rate": 4.460799361338897e-07, + "loss": 0.3307, + "num_input_tokens_seen": 35323904, + "step": 11220 + }, + { + "epoch": 0.7185839574931182, + "grad_norm": 18.72206687927246, + "learning_rate": 4.451499949695954e-07, + "loss": 0.4203, + "num_input_tokens_seen": 35340224, + "step": 11225 + }, + { + "epoch": 0.7189040394340951, + "grad_norm": 17.376712799072266, + "learning_rate": 4.44220746542047e-07, + "loss": 0.375, + "num_input_tokens_seen": 35355776, + "step": 11230 + }, + { + "epoch": 0.719224121375072, + "grad_norm": 38.786521911621094, + "learning_rate": 4.432921920114221e-07, + "loss": 0.474, + "num_input_tokens_seen": 35371072, + "step": 11235 + }, + { + "epoch": 0.719544203316049, + "grad_norm": 36.586570739746094, + "learning_rate": 4.4236433253703185e-07, + "loss": 0.3144, + "num_input_tokens_seen": 35387520, + "step": 11240 + }, + { + "epoch": 0.7198642852570258, + "grad_norm": 36.61032485961914, + "learning_rate": 4.4143716927732e-07, + "loss": 0.4042, + "num_input_tokens_seen": 35403840, + "step": 11245 + }, + { + "epoch": 0.7201843671980027, + "grad_norm": 26.34575843811035, + "learning_rate": 4.405107033898604e-07, + "loss": 0.3767, + "num_input_tokens_seen": 35420032, + "step": 11250 + }, + { + "epoch": 0.7205044491389796, + "grad_norm": 33.59138107299805, + "learning_rate": 4.395849360313568e-07, + "loss": 0.2887, + "num_input_tokens_seen": 35436032, + "step": 11255 + }, + { + "epoch": 0.7208245310799565, + "grad_norm": 44.58377456665039, + "learning_rate": 4.386598683576406e-07, + "loss": 0.3505, + "num_input_tokens_seen": 35451136, + "step": 11260 + }, + { + "epoch": 0.7211446130209334, + "grad_norm": 17.373126983642578, + "learning_rate": 4.377355015236696e-07, + "loss": 0.4744, + "num_input_tokens_seen": 35466816, + "step": 11265 + }, + { + "epoch": 0.7214646949619102, + "grad_norm": 33.182308197021484, + "learning_rate": 4.368118366835266e-07, + "loss": 0.3588, + "num_input_tokens_seen": 35483456, + "step": 11270 + }, + { + "epoch": 0.7217847769028871, + "grad_norm": 40.823421478271484, + "learning_rate": 4.358888749904177e-07, + "loss": 0.4691, + "num_input_tokens_seen": 35499584, + "step": 11275 + }, + { + "epoch": 0.722104858843864, + "grad_norm": 24.432401657104492, + "learning_rate": 4.349666175966725e-07, + "loss": 0.3521, + "num_input_tokens_seen": 35515328, + "step": 11280 + }, + { + "epoch": 0.7224249407848409, + "grad_norm": 18.420427322387695, + "learning_rate": 4.340450656537392e-07, + "loss": 0.4721, + "num_input_tokens_seen": 35530048, + "step": 11285 + }, + { + "epoch": 0.7227450227258178, + "grad_norm": 31.080825805664062, + "learning_rate": 4.331242203121861e-07, + "loss": 0.2995, + "num_input_tokens_seen": 35545792, + "step": 11290 + }, + { + "epoch": 0.7230651046667947, + "grad_norm": 43.900115966796875, + "learning_rate": 4.322040827217004e-07, + "loss": 0.3775, + "num_input_tokens_seen": 35561344, + "step": 11295 + }, + { + "epoch": 0.7233851866077716, + "grad_norm": 42.54143142700195, + "learning_rate": 4.312846540310838e-07, + "loss": 0.4064, + "num_input_tokens_seen": 35577024, + "step": 11300 + }, + { + "epoch": 0.7237052685487485, + "grad_norm": 25.552127838134766, + "learning_rate": 4.3036593538825373e-07, + "loss": 0.3527, + "num_input_tokens_seen": 35592192, + "step": 11305 + }, + { + "epoch": 0.7240253504897254, + "grad_norm": 15.031996726989746, + "learning_rate": 4.2944792794024196e-07, + "loss": 0.3375, + "num_input_tokens_seen": 35607872, + "step": 11310 + }, + { + "epoch": 0.7243454324307023, + "grad_norm": 23.10059928894043, + "learning_rate": 4.285306328331915e-07, + "loss": 0.3015, + "num_input_tokens_seen": 35623872, + "step": 11315 + }, + { + "epoch": 0.7246655143716791, + "grad_norm": 27.68567657470703, + "learning_rate": 4.2761405121235506e-07, + "loss": 0.3168, + "num_input_tokens_seen": 35638720, + "step": 11320 + }, + { + "epoch": 0.724985596312656, + "grad_norm": 21.363649368286133, + "learning_rate": 4.266981842220965e-07, + "loss": 0.538, + "num_input_tokens_seen": 35655680, + "step": 11325 + }, + { + "epoch": 0.7253056782536329, + "grad_norm": 25.60169219970703, + "learning_rate": 4.257830330058864e-07, + "loss": 0.2663, + "num_input_tokens_seen": 35671168, + "step": 11330 + }, + { + "epoch": 0.7256257601946098, + "grad_norm": 28.766132354736328, + "learning_rate": 4.248685987063019e-07, + "loss": 0.4085, + "num_input_tokens_seen": 35686848, + "step": 11335 + }, + { + "epoch": 0.7259458421355867, + "grad_norm": 25.852869033813477, + "learning_rate": 4.2395488246502396e-07, + "loss": 0.3486, + "num_input_tokens_seen": 35702720, + "step": 11340 + }, + { + "epoch": 0.7262659240765637, + "grad_norm": 35.1387939453125, + "learning_rate": 4.2304188542283913e-07, + "loss": 0.4532, + "num_input_tokens_seen": 35720640, + "step": 11345 + }, + { + "epoch": 0.7265860060175405, + "grad_norm": 63.59513854980469, + "learning_rate": 4.221296087196347e-07, + "loss": 0.3855, + "num_input_tokens_seen": 35735424, + "step": 11350 + }, + { + "epoch": 0.7269060879585174, + "grad_norm": 22.047700881958008, + "learning_rate": 4.2121805349439867e-07, + "loss": 0.46, + "num_input_tokens_seen": 35751168, + "step": 11355 + }, + { + "epoch": 0.7272261698994943, + "grad_norm": 29.550992965698242, + "learning_rate": 4.203072208852184e-07, + "loss": 0.3829, + "num_input_tokens_seen": 35767168, + "step": 11360 + }, + { + "epoch": 0.7275462518404712, + "grad_norm": 47.271080017089844, + "learning_rate": 4.193971120292793e-07, + "loss": 0.447, + "num_input_tokens_seen": 35782464, + "step": 11365 + }, + { + "epoch": 0.727866333781448, + "grad_norm": 18.53926658630371, + "learning_rate": 4.184877280628629e-07, + "loss": 0.4004, + "num_input_tokens_seen": 35798592, + "step": 11370 + }, + { + "epoch": 0.7281864157224249, + "grad_norm": 35.71843719482422, + "learning_rate": 4.1757907012134565e-07, + "loss": 0.3955, + "num_input_tokens_seen": 35814720, + "step": 11375 + }, + { + "epoch": 0.7285064976634018, + "grad_norm": 32.7597770690918, + "learning_rate": 4.166711393391978e-07, + "loss": 0.2807, + "num_input_tokens_seen": 35830016, + "step": 11380 + }, + { + "epoch": 0.7288265796043787, + "grad_norm": 17.185914993286133, + "learning_rate": 4.1576393684998146e-07, + "loss": 0.3365, + "num_input_tokens_seen": 35845632, + "step": 11385 + }, + { + "epoch": 0.7291466615453556, + "grad_norm": 23.883012771606445, + "learning_rate": 4.1485746378634966e-07, + "loss": 0.3505, + "num_input_tokens_seen": 35861184, + "step": 11390 + }, + { + "epoch": 0.7294667434863324, + "grad_norm": 36.17485046386719, + "learning_rate": 4.1395172128004473e-07, + "loss": 0.4186, + "num_input_tokens_seen": 35876864, + "step": 11395 + }, + { + "epoch": 0.7297868254273094, + "grad_norm": 23.241865158081055, + "learning_rate": 4.130467104618963e-07, + "loss": 0.3272, + "num_input_tokens_seen": 35893568, + "step": 11400 + }, + { + "epoch": 0.7301069073682863, + "grad_norm": 27.966672897338867, + "learning_rate": 4.1214243246182223e-07, + "loss": 0.3336, + "num_input_tokens_seen": 35909696, + "step": 11405 + }, + { + "epoch": 0.7304269893092632, + "grad_norm": 27.110546112060547, + "learning_rate": 4.1123888840882306e-07, + "loss": 0.465, + "num_input_tokens_seen": 35925120, + "step": 11410 + }, + { + "epoch": 0.7307470712502401, + "grad_norm": 27.762094497680664, + "learning_rate": 4.1033607943098415e-07, + "loss": 0.3184, + "num_input_tokens_seen": 35940800, + "step": 11415 + }, + { + "epoch": 0.731067153191217, + "grad_norm": 13.206759452819824, + "learning_rate": 4.0943400665547423e-07, + "loss": 0.3461, + "num_input_tokens_seen": 35955968, + "step": 11420 + }, + { + "epoch": 0.7313872351321938, + "grad_norm": 45.63411331176758, + "learning_rate": 4.0853267120854064e-07, + "loss": 0.3261, + "num_input_tokens_seen": 35972096, + "step": 11425 + }, + { + "epoch": 0.7317073170731707, + "grad_norm": 22.173538208007812, + "learning_rate": 4.076320742155117e-07, + "loss": 0.3358, + "num_input_tokens_seen": 35986624, + "step": 11430 + }, + { + "epoch": 0.7320273990141476, + "grad_norm": 12.858855247497559, + "learning_rate": 4.067322168007928e-07, + "loss": 0.3546, + "num_input_tokens_seen": 36003008, + "step": 11435 + }, + { + "epoch": 0.7323474809551245, + "grad_norm": 24.93453598022461, + "learning_rate": 4.0583310008786775e-07, + "loss": 0.3539, + "num_input_tokens_seen": 36017152, + "step": 11440 + }, + { + "epoch": 0.7326675628961014, + "grad_norm": 42.7269287109375, + "learning_rate": 4.049347251992932e-07, + "loss": 0.2777, + "num_input_tokens_seen": 36031936, + "step": 11445 + }, + { + "epoch": 0.7329876448370783, + "grad_norm": 27.098237991333008, + "learning_rate": 4.0403709325670064e-07, + "loss": 0.3461, + "num_input_tokens_seen": 36048064, + "step": 11450 + }, + { + "epoch": 0.7333077267780552, + "grad_norm": 55.99066162109375, + "learning_rate": 4.03140205380795e-07, + "loss": 0.4433, + "num_input_tokens_seen": 36064256, + "step": 11455 + }, + { + "epoch": 0.7336278087190321, + "grad_norm": 65.33406066894531, + "learning_rate": 4.0224406269135115e-07, + "loss": 0.6545, + "num_input_tokens_seen": 36079424, + "step": 11460 + }, + { + "epoch": 0.733947890660009, + "grad_norm": 45.920005798339844, + "learning_rate": 4.0134866630721266e-07, + "loss": 0.3062, + "num_input_tokens_seen": 36095424, + "step": 11465 + }, + { + "epoch": 0.7342679726009859, + "grad_norm": 24.08492660522461, + "learning_rate": 4.0045401734629367e-07, + "loss": 0.3666, + "num_input_tokens_seen": 36111360, + "step": 11470 + }, + { + "epoch": 0.7345880545419627, + "grad_norm": 25.49542236328125, + "learning_rate": 3.9956011692557377e-07, + "loss": 0.3819, + "num_input_tokens_seen": 36127232, + "step": 11475 + }, + { + "epoch": 0.7349081364829396, + "grad_norm": 50.120731353759766, + "learning_rate": 3.986669661610972e-07, + "loss": 0.3447, + "num_input_tokens_seen": 36143168, + "step": 11480 + }, + { + "epoch": 0.7352282184239165, + "grad_norm": 34.99326705932617, + "learning_rate": 3.9777456616797414e-07, + "loss": 0.329, + "num_input_tokens_seen": 36158272, + "step": 11485 + }, + { + "epoch": 0.7355483003648934, + "grad_norm": 53.85727310180664, + "learning_rate": 3.968829180603761e-07, + "loss": 0.3544, + "num_input_tokens_seen": 36173056, + "step": 11490 + }, + { + "epoch": 0.7358683823058703, + "grad_norm": 48.56296920776367, + "learning_rate": 3.9599202295153624e-07, + "loss": 0.4025, + "num_input_tokens_seen": 36187904, + "step": 11495 + }, + { + "epoch": 0.7361884642468471, + "grad_norm": 70.58976745605469, + "learning_rate": 3.951018819537476e-07, + "loss": 0.3587, + "num_input_tokens_seen": 36205632, + "step": 11500 + }, + { + "epoch": 0.7365085461878241, + "grad_norm": 33.671356201171875, + "learning_rate": 3.942124961783616e-07, + "loss": 0.3492, + "num_input_tokens_seen": 36220160, + "step": 11505 + }, + { + "epoch": 0.736828628128801, + "grad_norm": 27.56850242614746, + "learning_rate": 3.933238667357869e-07, + "loss": 0.3096, + "num_input_tokens_seen": 36236416, + "step": 11510 + }, + { + "epoch": 0.7371487100697779, + "grad_norm": 41.809757232666016, + "learning_rate": 3.924359947354876e-07, + "loss": 0.3546, + "num_input_tokens_seen": 36251584, + "step": 11515 + }, + { + "epoch": 0.7374687920107548, + "grad_norm": 13.83644962310791, + "learning_rate": 3.915488812859826e-07, + "loss": 0.3261, + "num_input_tokens_seen": 36265856, + "step": 11520 + }, + { + "epoch": 0.7377888739517316, + "grad_norm": 58.69389724731445, + "learning_rate": 3.90662527494843e-07, + "loss": 0.3797, + "num_input_tokens_seen": 36283904, + "step": 11525 + }, + { + "epoch": 0.7381089558927085, + "grad_norm": 34.365379333496094, + "learning_rate": 3.8977693446869285e-07, + "loss": 0.3638, + "num_input_tokens_seen": 36298432, + "step": 11530 + }, + { + "epoch": 0.7384290378336854, + "grad_norm": 28.92525291442871, + "learning_rate": 3.8889210331320445e-07, + "loss": 0.3298, + "num_input_tokens_seen": 36313728, + "step": 11535 + }, + { + "epoch": 0.7387491197746623, + "grad_norm": 20.923290252685547, + "learning_rate": 3.8800803513310033e-07, + "loss": 0.3795, + "num_input_tokens_seen": 36329088, + "step": 11540 + }, + { + "epoch": 0.7390692017156392, + "grad_norm": 37.18941116333008, + "learning_rate": 3.8712473103214993e-07, + "loss": 0.4125, + "num_input_tokens_seen": 36345024, + "step": 11545 + }, + { + "epoch": 0.739389283656616, + "grad_norm": 21.793880462646484, + "learning_rate": 3.862421921131688e-07, + "loss": 0.3077, + "num_input_tokens_seen": 36361792, + "step": 11550 + }, + { + "epoch": 0.739709365597593, + "grad_norm": 39.02511978149414, + "learning_rate": 3.85360419478017e-07, + "loss": 0.2844, + "num_input_tokens_seen": 36377152, + "step": 11555 + }, + { + "epoch": 0.7400294475385699, + "grad_norm": 22.608049392700195, + "learning_rate": 3.8447941422759786e-07, + "loss": 0.346, + "num_input_tokens_seen": 36394048, + "step": 11560 + }, + { + "epoch": 0.7403495294795468, + "grad_norm": 31.372352600097656, + "learning_rate": 3.835991774618579e-07, + "loss": 0.3546, + "num_input_tokens_seen": 36409152, + "step": 11565 + }, + { + "epoch": 0.7406696114205237, + "grad_norm": 89.33686065673828, + "learning_rate": 3.827197102797818e-07, + "loss": 0.3882, + "num_input_tokens_seen": 36427072, + "step": 11570 + }, + { + "epoch": 0.7409896933615006, + "grad_norm": 60.6555290222168, + "learning_rate": 3.818410137793947e-07, + "loss": 0.4667, + "num_input_tokens_seen": 36444288, + "step": 11575 + }, + { + "epoch": 0.7413097753024774, + "grad_norm": 17.558565139770508, + "learning_rate": 3.809630890577602e-07, + "loss": 0.4323, + "num_input_tokens_seen": 36460096, + "step": 11580 + }, + { + "epoch": 0.7416298572434543, + "grad_norm": 123.81878662109375, + "learning_rate": 3.800859372109777e-07, + "loss": 0.3414, + "num_input_tokens_seen": 36475264, + "step": 11585 + }, + { + "epoch": 0.7419499391844312, + "grad_norm": 19.393999099731445, + "learning_rate": 3.7920955933418055e-07, + "loss": 0.3205, + "num_input_tokens_seen": 36491264, + "step": 11590 + }, + { + "epoch": 0.7422700211254081, + "grad_norm": 45.717002868652344, + "learning_rate": 3.7833395652153775e-07, + "loss": 0.3158, + "num_input_tokens_seen": 36506368, + "step": 11595 + }, + { + "epoch": 0.742590103066385, + "grad_norm": 36.06786346435547, + "learning_rate": 3.774591298662497e-07, + "loss": 0.2953, + "num_input_tokens_seen": 36522432, + "step": 11600 + }, + { + "epoch": 0.7429101850073618, + "grad_norm": 68.29784393310547, + "learning_rate": 3.765850804605468e-07, + "loss": 0.4255, + "num_input_tokens_seen": 36539008, + "step": 11605 + }, + { + "epoch": 0.7432302669483388, + "grad_norm": 23.243270874023438, + "learning_rate": 3.7571180939569104e-07, + "loss": 0.2863, + "num_input_tokens_seen": 36554240, + "step": 11610 + }, + { + "epoch": 0.7435503488893157, + "grad_norm": 33.98516082763672, + "learning_rate": 3.748393177619711e-07, + "loss": 0.308, + "num_input_tokens_seen": 36569920, + "step": 11615 + }, + { + "epoch": 0.7438704308302926, + "grad_norm": 32.500240325927734, + "learning_rate": 3.739676066487032e-07, + "loss": 0.3273, + "num_input_tokens_seen": 36585792, + "step": 11620 + }, + { + "epoch": 0.7441905127712695, + "grad_norm": 21.474756240844727, + "learning_rate": 3.730966771442289e-07, + "loss": 0.2906, + "num_input_tokens_seen": 36601280, + "step": 11625 + }, + { + "epoch": 0.7445105947122463, + "grad_norm": 34.38766860961914, + "learning_rate": 3.722265303359137e-07, + "loss": 0.5193, + "num_input_tokens_seen": 36617152, + "step": 11630 + }, + { + "epoch": 0.7448306766532232, + "grad_norm": 57.639156341552734, + "learning_rate": 3.713571673101463e-07, + "loss": 0.4, + "num_input_tokens_seen": 36632512, + "step": 11635 + }, + { + "epoch": 0.7451507585942001, + "grad_norm": 13.589853286743164, + "learning_rate": 3.704885891523366e-07, + "loss": 0.3338, + "num_input_tokens_seen": 36647744, + "step": 11640 + }, + { + "epoch": 0.745470840535177, + "grad_norm": 36.2945442199707, + "learning_rate": 3.696207969469146e-07, + "loss": 0.3878, + "num_input_tokens_seen": 36663360, + "step": 11645 + }, + { + "epoch": 0.7457909224761539, + "grad_norm": 28.11053466796875, + "learning_rate": 3.6875379177732913e-07, + "loss": 0.3571, + "num_input_tokens_seen": 36678656, + "step": 11650 + }, + { + "epoch": 0.7461110044171307, + "grad_norm": 71.32048034667969, + "learning_rate": 3.6788757472604634e-07, + "loss": 0.4971, + "num_input_tokens_seen": 36693952, + "step": 11655 + }, + { + "epoch": 0.7464310863581076, + "grad_norm": 34.13432312011719, + "learning_rate": 3.6702214687454825e-07, + "loss": 0.3139, + "num_input_tokens_seen": 36709888, + "step": 11660 + }, + { + "epoch": 0.7467511682990846, + "grad_norm": 26.685256958007812, + "learning_rate": 3.6615750930333177e-07, + "loss": 0.3103, + "num_input_tokens_seen": 36725504, + "step": 11665 + }, + { + "epoch": 0.7470712502400615, + "grad_norm": 11.093647956848145, + "learning_rate": 3.65293663091907e-07, + "loss": 0.3055, + "num_input_tokens_seen": 36741376, + "step": 11670 + }, + { + "epoch": 0.7473913321810384, + "grad_norm": 34.28535461425781, + "learning_rate": 3.6443060931879623e-07, + "loss": 0.4277, + "num_input_tokens_seen": 36756864, + "step": 11675 + }, + { + "epoch": 0.7477114141220152, + "grad_norm": 28.244558334350586, + "learning_rate": 3.635683490615321e-07, + "loss": 0.4503, + "num_input_tokens_seen": 36772608, + "step": 11680 + }, + { + "epoch": 0.7480314960629921, + "grad_norm": 76.50003051757812, + "learning_rate": 3.6270688339665634e-07, + "loss": 0.2975, + "num_input_tokens_seen": 36788352, + "step": 11685 + }, + { + "epoch": 0.748351578003969, + "grad_norm": 38.5800666809082, + "learning_rate": 3.6184621339972e-07, + "loss": 0.3444, + "num_input_tokens_seen": 36804096, + "step": 11690 + }, + { + "epoch": 0.7486716599449459, + "grad_norm": 42.00413131713867, + "learning_rate": 3.609863401452786e-07, + "loss": 0.3568, + "num_input_tokens_seen": 36819776, + "step": 11695 + }, + { + "epoch": 0.7489917418859228, + "grad_norm": 37.22871017456055, + "learning_rate": 3.6012726470689416e-07, + "loss": 0.4084, + "num_input_tokens_seen": 36835072, + "step": 11700 + }, + { + "epoch": 0.7493118238268996, + "grad_norm": 25.7962646484375, + "learning_rate": 3.592689881571329e-07, + "loss": 0.3318, + "num_input_tokens_seen": 36850816, + "step": 11705 + }, + { + "epoch": 0.7496319057678765, + "grad_norm": 36.8912467956543, + "learning_rate": 3.5841151156756334e-07, + "loss": 0.4348, + "num_input_tokens_seen": 36866368, + "step": 11710 + }, + { + "epoch": 0.7499519877088535, + "grad_norm": 39.81080627441406, + "learning_rate": 3.575548360087539e-07, + "loss": 0.3994, + "num_input_tokens_seen": 36885376, + "step": 11715 + }, + { + "epoch": 0.7502720696498304, + "grad_norm": 17.17061996459961, + "learning_rate": 3.5669896255027533e-07, + "loss": 0.3173, + "num_input_tokens_seen": 36900288, + "step": 11720 + }, + { + "epoch": 0.7505921515908073, + "grad_norm": 16.3179988861084, + "learning_rate": 3.5584389226069543e-07, + "loss": 0.4035, + "num_input_tokens_seen": 36916224, + "step": 11725 + }, + { + "epoch": 0.7509122335317842, + "grad_norm": 18.672239303588867, + "learning_rate": 3.5498962620757866e-07, + "loss": 0.2995, + "num_input_tokens_seen": 36931648, + "step": 11730 + }, + { + "epoch": 0.7509122335317842, + "eval_loss": 0.3647865653038025, + "eval_runtime": 49.1603, + "eval_samples_per_second": 282.464, + "eval_steps_per_second": 35.313, + "num_input_tokens_seen": 36931648, + "step": 11730 + }, + { + "epoch": 0.751232315472761, + "grad_norm": 94.76543426513672, + "learning_rate": 3.5413616545748713e-07, + "loss": 0.4327, + "num_input_tokens_seen": 36945856, + "step": 11735 + }, + { + "epoch": 0.7515523974137379, + "grad_norm": 24.62285804748535, + "learning_rate": 3.532835110759763e-07, + "loss": 0.5026, + "num_input_tokens_seen": 36961792, + "step": 11740 + }, + { + "epoch": 0.7518724793547148, + "grad_norm": 24.09138298034668, + "learning_rate": 3.524316641275955e-07, + "loss": 0.3038, + "num_input_tokens_seen": 36977152, + "step": 11745 + }, + { + "epoch": 0.7521925612956917, + "grad_norm": 19.018442153930664, + "learning_rate": 3.5158062567588467e-07, + "loss": 0.4152, + "num_input_tokens_seen": 36991936, + "step": 11750 + }, + { + "epoch": 0.7525126432366686, + "grad_norm": 64.08114624023438, + "learning_rate": 3.5073039678337633e-07, + "loss": 0.3924, + "num_input_tokens_seen": 37006784, + "step": 11755 + }, + { + "epoch": 0.7528327251776454, + "grad_norm": 36.50153732299805, + "learning_rate": 3.498809785115908e-07, + "loss": 0.348, + "num_input_tokens_seen": 37022208, + "step": 11760 + }, + { + "epoch": 0.7531528071186223, + "grad_norm": 12.052895545959473, + "learning_rate": 3.4903237192103697e-07, + "loss": 0.3504, + "num_input_tokens_seen": 37039488, + "step": 11765 + }, + { + "epoch": 0.7534728890595993, + "grad_norm": 34.9785270690918, + "learning_rate": 3.481845780712099e-07, + "loss": 0.3372, + "num_input_tokens_seen": 37056064, + "step": 11770 + }, + { + "epoch": 0.7537929710005762, + "grad_norm": 47.886329650878906, + "learning_rate": 3.4733759802059037e-07, + "loss": 0.3354, + "num_input_tokens_seen": 37072256, + "step": 11775 + }, + { + "epoch": 0.7541130529415531, + "grad_norm": 68.2834701538086, + "learning_rate": 3.4649143282664273e-07, + "loss": 0.4239, + "num_input_tokens_seen": 37087360, + "step": 11780 + }, + { + "epoch": 0.7544331348825299, + "grad_norm": 27.56783676147461, + "learning_rate": 3.456460835458143e-07, + "loss": 0.2992, + "num_input_tokens_seen": 37102144, + "step": 11785 + }, + { + "epoch": 0.7547532168235068, + "grad_norm": 42.50265121459961, + "learning_rate": 3.4480155123353337e-07, + "loss": 0.3172, + "num_input_tokens_seen": 37117568, + "step": 11790 + }, + { + "epoch": 0.7550732987644837, + "grad_norm": 35.93981170654297, + "learning_rate": 3.4395783694420875e-07, + "loss": 0.4541, + "num_input_tokens_seen": 37132800, + "step": 11795 + }, + { + "epoch": 0.7553933807054606, + "grad_norm": 20.164365768432617, + "learning_rate": 3.4311494173122743e-07, + "loss": 0.4009, + "num_input_tokens_seen": 37147776, + "step": 11800 + }, + { + "epoch": 0.7557134626464375, + "grad_norm": 26.284648895263672, + "learning_rate": 3.422728666469541e-07, + "loss": 0.387, + "num_input_tokens_seen": 37163904, + "step": 11805 + }, + { + "epoch": 0.7560335445874143, + "grad_norm": 42.91219711303711, + "learning_rate": 3.41431612742729e-07, + "loss": 0.4272, + "num_input_tokens_seen": 37180416, + "step": 11810 + }, + { + "epoch": 0.7563536265283912, + "grad_norm": 23.454986572265625, + "learning_rate": 3.4059118106886855e-07, + "loss": 0.4243, + "num_input_tokens_seen": 37196480, + "step": 11815 + }, + { + "epoch": 0.7566737084693682, + "grad_norm": 74.44619750976562, + "learning_rate": 3.3975157267466036e-07, + "loss": 0.5118, + "num_input_tokens_seen": 37211648, + "step": 11820 + }, + { + "epoch": 0.7569937904103451, + "grad_norm": 25.939687728881836, + "learning_rate": 3.389127886083656e-07, + "loss": 0.29, + "num_input_tokens_seen": 37227072, + "step": 11825 + }, + { + "epoch": 0.757313872351322, + "grad_norm": 24.652931213378906, + "learning_rate": 3.3807482991721667e-07, + "loss": 0.3415, + "num_input_tokens_seen": 37243968, + "step": 11830 + }, + { + "epoch": 0.7576339542922989, + "grad_norm": 20.54140853881836, + "learning_rate": 3.3723769764741474e-07, + "loss": 0.3219, + "num_input_tokens_seen": 37259200, + "step": 11835 + }, + { + "epoch": 0.7579540362332757, + "grad_norm": 15.427878379821777, + "learning_rate": 3.3640139284412825e-07, + "loss": 0.2948, + "num_input_tokens_seen": 37275072, + "step": 11840 + }, + { + "epoch": 0.7582741181742526, + "grad_norm": 42.64249038696289, + "learning_rate": 3.355659165514948e-07, + "loss": 0.399, + "num_input_tokens_seen": 37291392, + "step": 11845 + }, + { + "epoch": 0.7585942001152295, + "grad_norm": 16.320554733276367, + "learning_rate": 3.347312698126161e-07, + "loss": 0.2714, + "num_input_tokens_seen": 37307648, + "step": 11850 + }, + { + "epoch": 0.7589142820562064, + "grad_norm": 13.9678316116333, + "learning_rate": 3.338974536695578e-07, + "loss": 0.2191, + "num_input_tokens_seen": 37323136, + "step": 11855 + }, + { + "epoch": 0.7592343639971832, + "grad_norm": 21.917150497436523, + "learning_rate": 3.330644691633492e-07, + "loss": 0.3183, + "num_input_tokens_seen": 37338496, + "step": 11860 + }, + { + "epoch": 0.7595544459381601, + "grad_norm": 10.6149320602417, + "learning_rate": 3.322323173339818e-07, + "loss": 0.2783, + "num_input_tokens_seen": 37356800, + "step": 11865 + }, + { + "epoch": 0.759874527879137, + "grad_norm": 25.766250610351562, + "learning_rate": 3.314009992204071e-07, + "loss": 0.4264, + "num_input_tokens_seen": 37372800, + "step": 11870 + }, + { + "epoch": 0.760194609820114, + "grad_norm": 66.81485748291016, + "learning_rate": 3.3057051586053443e-07, + "loss": 0.3269, + "num_input_tokens_seen": 37388608, + "step": 11875 + }, + { + "epoch": 0.7605146917610909, + "grad_norm": 35.36101150512695, + "learning_rate": 3.297408682912329e-07, + "loss": 0.4584, + "num_input_tokens_seen": 37405184, + "step": 11880 + }, + { + "epoch": 0.7608347737020678, + "grad_norm": 21.154664993286133, + "learning_rate": 3.289120575483271e-07, + "loss": 0.2741, + "num_input_tokens_seen": 37420096, + "step": 11885 + }, + { + "epoch": 0.7611548556430446, + "grad_norm": 31.978300094604492, + "learning_rate": 3.280840846665969e-07, + "loss": 0.4214, + "num_input_tokens_seen": 37434368, + "step": 11890 + }, + { + "epoch": 0.7614749375840215, + "grad_norm": 29.54779052734375, + "learning_rate": 3.272569506797761e-07, + "loss": 0.3005, + "num_input_tokens_seen": 37449344, + "step": 11895 + }, + { + "epoch": 0.7617950195249984, + "grad_norm": 28.143238067626953, + "learning_rate": 3.2643065662055136e-07, + "loss": 0.3314, + "num_input_tokens_seen": 37464448, + "step": 11900 + }, + { + "epoch": 0.7621151014659753, + "grad_norm": 69.54246520996094, + "learning_rate": 3.2560520352056033e-07, + "loss": 0.2837, + "num_input_tokens_seen": 37481856, + "step": 11905 + }, + { + "epoch": 0.7624351834069522, + "grad_norm": 18.128210067749023, + "learning_rate": 3.24780592410391e-07, + "loss": 0.3985, + "num_input_tokens_seen": 37497856, + "step": 11910 + }, + { + "epoch": 0.762755265347929, + "grad_norm": 39.83074188232422, + "learning_rate": 3.2395682431957994e-07, + "loss": 0.4494, + "num_input_tokens_seen": 37513600, + "step": 11915 + }, + { + "epoch": 0.7630753472889059, + "grad_norm": 32.585750579833984, + "learning_rate": 3.231339002766115e-07, + "loss": 0.324, + "num_input_tokens_seen": 37529408, + "step": 11920 + }, + { + "epoch": 0.7633954292298829, + "grad_norm": 30.76116371154785, + "learning_rate": 3.2231182130891564e-07, + "loss": 0.3296, + "num_input_tokens_seen": 37545984, + "step": 11925 + }, + { + "epoch": 0.7637155111708598, + "grad_norm": 59.110801696777344, + "learning_rate": 3.214905884428679e-07, + "loss": 0.3405, + "num_input_tokens_seen": 37561856, + "step": 11930 + }, + { + "epoch": 0.7640355931118367, + "grad_norm": 29.65723991394043, + "learning_rate": 3.206702027037868e-07, + "loss": 0.3253, + "num_input_tokens_seen": 37578624, + "step": 11935 + }, + { + "epoch": 0.7643556750528135, + "grad_norm": 43.48826599121094, + "learning_rate": 3.198506651159344e-07, + "loss": 0.3882, + "num_input_tokens_seen": 37593920, + "step": 11940 + }, + { + "epoch": 0.7646757569937904, + "grad_norm": 23.43718147277832, + "learning_rate": 3.190319767025121e-07, + "loss": 0.38, + "num_input_tokens_seen": 37609664, + "step": 11945 + }, + { + "epoch": 0.7649958389347673, + "grad_norm": 59.76777267456055, + "learning_rate": 3.1821413848566213e-07, + "loss": 0.4989, + "num_input_tokens_seen": 37626048, + "step": 11950 + }, + { + "epoch": 0.7653159208757442, + "grad_norm": 17.83317756652832, + "learning_rate": 3.1739715148646564e-07, + "loss": 0.3798, + "num_input_tokens_seen": 37641792, + "step": 11955 + }, + { + "epoch": 0.7656360028167211, + "grad_norm": 51.09782409667969, + "learning_rate": 3.1658101672494043e-07, + "loss": 0.4583, + "num_input_tokens_seen": 37656512, + "step": 11960 + }, + { + "epoch": 0.7659560847576979, + "grad_norm": 46.76288604736328, + "learning_rate": 3.157657352200397e-07, + "loss": 0.3527, + "num_input_tokens_seen": 37672000, + "step": 11965 + }, + { + "epoch": 0.7662761666986748, + "grad_norm": 41.273860931396484, + "learning_rate": 3.149513079896521e-07, + "loss": 0.3362, + "num_input_tokens_seen": 37687232, + "step": 11970 + }, + { + "epoch": 0.7665962486396517, + "grad_norm": 19.319063186645508, + "learning_rate": 3.1413773605060034e-07, + "loss": 0.3244, + "num_input_tokens_seen": 37702656, + "step": 11975 + }, + { + "epoch": 0.7669163305806287, + "grad_norm": 65.8237533569336, + "learning_rate": 3.1332502041863783e-07, + "loss": 0.4343, + "num_input_tokens_seen": 37718080, + "step": 11980 + }, + { + "epoch": 0.7672364125216056, + "grad_norm": 29.25933837890625, + "learning_rate": 3.1251316210844946e-07, + "loss": 0.3141, + "num_input_tokens_seen": 37735680, + "step": 11985 + }, + { + "epoch": 0.7675564944625825, + "grad_norm": 51.57158660888672, + "learning_rate": 3.1170216213365055e-07, + "loss": 0.2871, + "num_input_tokens_seen": 37749952, + "step": 11990 + }, + { + "epoch": 0.7678765764035593, + "grad_norm": 34.71276092529297, + "learning_rate": 3.1089202150678397e-07, + "loss": 0.4607, + "num_input_tokens_seen": 37765312, + "step": 11995 + }, + { + "epoch": 0.7681966583445362, + "grad_norm": 49.59117126464844, + "learning_rate": 3.1008274123931886e-07, + "loss": 0.4695, + "num_input_tokens_seen": 37780160, + "step": 12000 + }, + { + "epoch": 0.7685167402855131, + "grad_norm": 25.49561309814453, + "learning_rate": 3.092743223416523e-07, + "loss": 0.2672, + "num_input_tokens_seen": 37796352, + "step": 12005 + }, + { + "epoch": 0.76883682222649, + "grad_norm": 59.26298522949219, + "learning_rate": 3.0846676582310413e-07, + "loss": 0.3499, + "num_input_tokens_seen": 37812864, + "step": 12010 + }, + { + "epoch": 0.7691569041674668, + "grad_norm": 43.80664825439453, + "learning_rate": 3.076600726919185e-07, + "loss": 0.3824, + "num_input_tokens_seen": 37827840, + "step": 12015 + }, + { + "epoch": 0.7694769861084437, + "grad_norm": 31.392080307006836, + "learning_rate": 3.0685424395526106e-07, + "loss": 0.3579, + "num_input_tokens_seen": 37847040, + "step": 12020 + }, + { + "epoch": 0.7697970680494206, + "grad_norm": 37.12458419799805, + "learning_rate": 3.060492806192184e-07, + "loss": 0.2819, + "num_input_tokens_seen": 37862464, + "step": 12025 + }, + { + "epoch": 0.7701171499903975, + "grad_norm": 36.16139221191406, + "learning_rate": 3.052451836887968e-07, + "loss": 0.377, + "num_input_tokens_seen": 37877760, + "step": 12030 + }, + { + "epoch": 0.7704372319313745, + "grad_norm": 28.426408767700195, + "learning_rate": 3.044419541679207e-07, + "loss": 0.2861, + "num_input_tokens_seen": 37892800, + "step": 12035 + }, + { + "epoch": 0.7707573138723514, + "grad_norm": 62.23591232299805, + "learning_rate": 3.0363959305943153e-07, + "loss": 0.4239, + "num_input_tokens_seen": 37909056, + "step": 12040 + }, + { + "epoch": 0.7710773958133282, + "grad_norm": 42.053489685058594, + "learning_rate": 3.028381013650867e-07, + "loss": 0.348, + "num_input_tokens_seen": 37925376, + "step": 12045 + }, + { + "epoch": 0.7713974777543051, + "grad_norm": 37.59280014038086, + "learning_rate": 3.0203748008555783e-07, + "loss": 0.3716, + "num_input_tokens_seen": 37941632, + "step": 12050 + }, + { + "epoch": 0.771717559695282, + "grad_norm": 41.64907455444336, + "learning_rate": 3.012377302204301e-07, + "loss": 0.3805, + "num_input_tokens_seen": 37957056, + "step": 12055 + }, + { + "epoch": 0.7720376416362589, + "grad_norm": 46.065406799316406, + "learning_rate": 3.0043885276820046e-07, + "loss": 0.3916, + "num_input_tokens_seen": 37973184, + "step": 12060 + }, + { + "epoch": 0.7723577235772358, + "grad_norm": 24.32598304748535, + "learning_rate": 2.99640848726277e-07, + "loss": 0.3087, + "num_input_tokens_seen": 37988288, + "step": 12065 + }, + { + "epoch": 0.7726778055182126, + "grad_norm": 23.855104446411133, + "learning_rate": 2.9884371909097704e-07, + "loss": 0.3812, + "num_input_tokens_seen": 38004224, + "step": 12070 + }, + { + "epoch": 0.7729978874591895, + "grad_norm": 22.65608787536621, + "learning_rate": 2.9804746485752616e-07, + "loss": 0.3711, + "num_input_tokens_seen": 38019456, + "step": 12075 + }, + { + "epoch": 0.7733179694001664, + "grad_norm": 25.479469299316406, + "learning_rate": 2.972520870200573e-07, + "loss": 0.4058, + "num_input_tokens_seen": 38035264, + "step": 12080 + }, + { + "epoch": 0.7736380513411434, + "grad_norm": 31.957597732543945, + "learning_rate": 2.9645758657160904e-07, + "loss": 0.4045, + "num_input_tokens_seen": 38051072, + "step": 12085 + }, + { + "epoch": 0.7739581332821203, + "grad_norm": 16.966663360595703, + "learning_rate": 2.9566396450412444e-07, + "loss": 0.3538, + "num_input_tokens_seen": 38066688, + "step": 12090 + }, + { + "epoch": 0.7742782152230971, + "grad_norm": 22.1097354888916, + "learning_rate": 2.9487122180844957e-07, + "loss": 0.3193, + "num_input_tokens_seen": 38082048, + "step": 12095 + }, + { + "epoch": 0.774598297164074, + "grad_norm": 58.488800048828125, + "learning_rate": 2.9407935947433406e-07, + "loss": 0.2996, + "num_input_tokens_seen": 38097344, + "step": 12100 + }, + { + "epoch": 0.7749183791050509, + "grad_norm": 48.314144134521484, + "learning_rate": 2.932883784904264e-07, + "loss": 0.446, + "num_input_tokens_seen": 38112320, + "step": 12105 + }, + { + "epoch": 0.7752384610460278, + "grad_norm": 19.5347843170166, + "learning_rate": 2.9249827984427555e-07, + "loss": 0.2475, + "num_input_tokens_seen": 38128000, + "step": 12110 + }, + { + "epoch": 0.7755585429870047, + "grad_norm": 29.27086639404297, + "learning_rate": 2.917090645223297e-07, + "loss": 0.3015, + "num_input_tokens_seen": 38143168, + "step": 12115 + }, + { + "epoch": 0.7758786249279815, + "grad_norm": 27.007768630981445, + "learning_rate": 2.909207335099332e-07, + "loss": 0.2912, + "num_input_tokens_seen": 38157824, + "step": 12120 + }, + { + "epoch": 0.7761987068689584, + "grad_norm": 31.75836181640625, + "learning_rate": 2.9013328779132595e-07, + "loss": 0.3332, + "num_input_tokens_seen": 38172864, + "step": 12125 + }, + { + "epoch": 0.7765187888099353, + "grad_norm": 115.25257110595703, + "learning_rate": 2.893467283496439e-07, + "loss": 0.425, + "num_input_tokens_seen": 38187264, + "step": 12130 + }, + { + "epoch": 0.7768388707509122, + "grad_norm": 19.299240112304688, + "learning_rate": 2.885610561669155e-07, + "loss": 0.3551, + "num_input_tokens_seen": 38204288, + "step": 12135 + }, + { + "epoch": 0.7771589526918892, + "grad_norm": 28.34507179260254, + "learning_rate": 2.8777627222406163e-07, + "loss": 0.3462, + "num_input_tokens_seen": 38219264, + "step": 12140 + }, + { + "epoch": 0.777479034632866, + "grad_norm": 40.6217041015625, + "learning_rate": 2.869923775008943e-07, + "loss": 0.3863, + "num_input_tokens_seen": 38234496, + "step": 12145 + }, + { + "epoch": 0.7777991165738429, + "grad_norm": 37.57053756713867, + "learning_rate": 2.862093729761155e-07, + "loss": 0.2559, + "num_input_tokens_seen": 38251072, + "step": 12150 + }, + { + "epoch": 0.7781191985148198, + "grad_norm": 28.284217834472656, + "learning_rate": 2.854272596273152e-07, + "loss": 0.4049, + "num_input_tokens_seen": 38266560, + "step": 12155 + }, + { + "epoch": 0.7784392804557967, + "grad_norm": 43.39320373535156, + "learning_rate": 2.8464603843097134e-07, + "loss": 0.3287, + "num_input_tokens_seen": 38282944, + "step": 12160 + }, + { + "epoch": 0.7787593623967736, + "grad_norm": 32.42449951171875, + "learning_rate": 2.8386571036244764e-07, + "loss": 0.3291, + "num_input_tokens_seen": 38299264, + "step": 12165 + }, + { + "epoch": 0.7790794443377504, + "grad_norm": 51.791812896728516, + "learning_rate": 2.830862763959929e-07, + "loss": 0.39, + "num_input_tokens_seen": 38314368, + "step": 12170 + }, + { + "epoch": 0.7793995262787273, + "grad_norm": 10.4609956741333, + "learning_rate": 2.8230773750473956e-07, + "loss": 0.3154, + "num_input_tokens_seen": 38329664, + "step": 12175 + }, + { + "epoch": 0.7797196082197042, + "grad_norm": 27.046852111816406, + "learning_rate": 2.8153009466070267e-07, + "loss": 0.3072, + "num_input_tokens_seen": 38345408, + "step": 12180 + }, + { + "epoch": 0.7800396901606811, + "grad_norm": 32.581607818603516, + "learning_rate": 2.807533488347783e-07, + "loss": 0.2878, + "num_input_tokens_seen": 38362688, + "step": 12185 + }, + { + "epoch": 0.7803597721016581, + "grad_norm": 23.63336944580078, + "learning_rate": 2.7997750099674277e-07, + "loss": 0.2548, + "num_input_tokens_seen": 38377600, + "step": 12190 + }, + { + "epoch": 0.780679854042635, + "grad_norm": 36.57121276855469, + "learning_rate": 2.792025521152512e-07, + "loss": 0.5286, + "num_input_tokens_seen": 38392640, + "step": 12195 + }, + { + "epoch": 0.7809999359836118, + "grad_norm": 34.91606521606445, + "learning_rate": 2.784285031578365e-07, + "loss": 0.4496, + "num_input_tokens_seen": 38408448, + "step": 12200 + }, + { + "epoch": 0.7813200179245887, + "grad_norm": 26.795875549316406, + "learning_rate": 2.7765535509090786e-07, + "loss": 0.3629, + "num_input_tokens_seen": 38424512, + "step": 12205 + }, + { + "epoch": 0.7816400998655656, + "grad_norm": 29.603397369384766, + "learning_rate": 2.768831088797495e-07, + "loss": 0.4739, + "num_input_tokens_seen": 38439296, + "step": 12210 + }, + { + "epoch": 0.7819601818065425, + "grad_norm": 15.58344554901123, + "learning_rate": 2.761117654885201e-07, + "loss": 0.2482, + "num_input_tokens_seen": 38455424, + "step": 12215 + }, + { + "epoch": 0.7822802637475194, + "grad_norm": 24.491289138793945, + "learning_rate": 2.7534132588025063e-07, + "loss": 0.3265, + "num_input_tokens_seen": 38470976, + "step": 12220 + }, + { + "epoch": 0.7826003456884962, + "grad_norm": 27.425262451171875, + "learning_rate": 2.7457179101684483e-07, + "loss": 0.5075, + "num_input_tokens_seen": 38486016, + "step": 12225 + }, + { + "epoch": 0.7829204276294731, + "grad_norm": 22.376157760620117, + "learning_rate": 2.7380316185907506e-07, + "loss": 0.298, + "num_input_tokens_seen": 38501248, + "step": 12230 + }, + { + "epoch": 0.78324050957045, + "grad_norm": 19.046939849853516, + "learning_rate": 2.730354393665839e-07, + "loss": 0.3503, + "num_input_tokens_seen": 38516992, + "step": 12235 + }, + { + "epoch": 0.7835605915114269, + "grad_norm": 27.88618278503418, + "learning_rate": 2.7226862449788245e-07, + "loss": 0.3702, + "num_input_tokens_seen": 38531456, + "step": 12240 + }, + { + "epoch": 0.7838806734524039, + "grad_norm": 34.346378326416016, + "learning_rate": 2.715027182103482e-07, + "loss": 0.3264, + "num_input_tokens_seen": 38546880, + "step": 12245 + }, + { + "epoch": 0.7842007553933807, + "grad_norm": 20.54593276977539, + "learning_rate": 2.707377214602232e-07, + "loss": 0.3039, + "num_input_tokens_seen": 38562176, + "step": 12250 + }, + { + "epoch": 0.7845208373343576, + "grad_norm": 37.601043701171875, + "learning_rate": 2.699736352026157e-07, + "loss": 0.4366, + "num_input_tokens_seen": 38577472, + "step": 12255 + }, + { + "epoch": 0.7848409192753345, + "grad_norm": 22.17053985595703, + "learning_rate": 2.6921046039149645e-07, + "loss": 0.3297, + "num_input_tokens_seen": 38593088, + "step": 12260 + }, + { + "epoch": 0.7851610012163114, + "grad_norm": 31.56439208984375, + "learning_rate": 2.6844819797969744e-07, + "loss": 0.3408, + "num_input_tokens_seen": 38607936, + "step": 12265 + }, + { + "epoch": 0.7854810831572883, + "grad_norm": 40.473628997802734, + "learning_rate": 2.6768684891891236e-07, + "loss": 0.2481, + "num_input_tokens_seen": 38625024, + "step": 12270 + }, + { + "epoch": 0.7858011650982651, + "grad_norm": 30.89264678955078, + "learning_rate": 2.6692641415969497e-07, + "loss": 0.3321, + "num_input_tokens_seen": 38641792, + "step": 12275 + }, + { + "epoch": 0.786121247039242, + "grad_norm": 47.64722442626953, + "learning_rate": 2.66166894651457e-07, + "loss": 0.395, + "num_input_tokens_seen": 38656896, + "step": 12280 + }, + { + "epoch": 0.7864413289802189, + "grad_norm": 43.44092559814453, + "learning_rate": 2.654082913424668e-07, + "loss": 0.3426, + "num_input_tokens_seen": 38672448, + "step": 12285 + }, + { + "epoch": 0.7867614109211958, + "grad_norm": 27.422563552856445, + "learning_rate": 2.6465060517985003e-07, + "loss": 0.3016, + "num_input_tokens_seen": 38688576, + "step": 12290 + }, + { + "epoch": 0.7870814928621728, + "grad_norm": 44.733848571777344, + "learning_rate": 2.638938371095867e-07, + "loss": 0.5123, + "num_input_tokens_seen": 38704064, + "step": 12295 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 20.204547882080078, + "learning_rate": 2.6313798807651065e-07, + "loss": 0.381, + "num_input_tokens_seen": 38718976, + "step": 12300 + }, + { + "epoch": 0.7877216567441265, + "grad_norm": 17.738218307495117, + "learning_rate": 2.6238305902430813e-07, + "loss": 0.3529, + "num_input_tokens_seen": 38734272, + "step": 12305 + }, + { + "epoch": 0.7880417386851034, + "grad_norm": 14.163119316101074, + "learning_rate": 2.61629050895517e-07, + "loss": 0.307, + "num_input_tokens_seen": 38749504, + "step": 12310 + }, + { + "epoch": 0.7883618206260803, + "grad_norm": 27.0414981842041, + "learning_rate": 2.608759646315253e-07, + "loss": 0.3171, + "num_input_tokens_seen": 38764352, + "step": 12315 + }, + { + "epoch": 0.7886819025670572, + "grad_norm": 18.21839714050293, + "learning_rate": 2.6012380117257005e-07, + "loss": 0.3637, + "num_input_tokens_seen": 38780096, + "step": 12320 + }, + { + "epoch": 0.789001984508034, + "grad_norm": 33.14684295654297, + "learning_rate": 2.5937256145773613e-07, + "loss": 0.3902, + "num_input_tokens_seen": 38795712, + "step": 12325 + }, + { + "epoch": 0.7893220664490109, + "grad_norm": 39.35667037963867, + "learning_rate": 2.586222464249551e-07, + "loss": 0.3264, + "num_input_tokens_seen": 38811328, + "step": 12330 + }, + { + "epoch": 0.7896421483899878, + "grad_norm": 27.116695404052734, + "learning_rate": 2.5787285701100413e-07, + "loss": 0.2022, + "num_input_tokens_seen": 38826240, + "step": 12335 + }, + { + "epoch": 0.7899622303309647, + "grad_norm": 37.62165832519531, + "learning_rate": 2.571243941515048e-07, + "loss": 0.3672, + "num_input_tokens_seen": 38842624, + "step": 12340 + }, + { + "epoch": 0.7902823122719416, + "grad_norm": 22.701847076416016, + "learning_rate": 2.563768587809213e-07, + "loss": 0.2672, + "num_input_tokens_seen": 38857472, + "step": 12345 + }, + { + "epoch": 0.7906023942129186, + "grad_norm": 60.98664855957031, + "learning_rate": 2.5563025183256137e-07, + "loss": 0.4118, + "num_input_tokens_seen": 38872256, + "step": 12350 + }, + { + "epoch": 0.7909224761538954, + "grad_norm": 38.52484893798828, + "learning_rate": 2.548845742385717e-07, + "loss": 0.552, + "num_input_tokens_seen": 38890048, + "step": 12355 + }, + { + "epoch": 0.7912425580948723, + "grad_norm": 38.10274887084961, + "learning_rate": 2.541398269299393e-07, + "loss": 0.2356, + "num_input_tokens_seen": 38905664, + "step": 12360 + }, + { + "epoch": 0.7915626400358492, + "grad_norm": 12.663208961486816, + "learning_rate": 2.5339601083649063e-07, + "loss": 0.2978, + "num_input_tokens_seen": 38926144, + "step": 12365 + }, + { + "epoch": 0.7918827219768261, + "grad_norm": 34.63762283325195, + "learning_rate": 2.526531268868889e-07, + "loss": 0.4751, + "num_input_tokens_seen": 38942720, + "step": 12370 + }, + { + "epoch": 0.792202803917803, + "grad_norm": 25.847164154052734, + "learning_rate": 2.5191117600863266e-07, + "loss": 0.3397, + "num_input_tokens_seen": 38958144, + "step": 12375 + }, + { + "epoch": 0.7925228858587798, + "grad_norm": 20.030961990356445, + "learning_rate": 2.511701591280565e-07, + "loss": 0.2568, + "num_input_tokens_seen": 38973376, + "step": 12380 + }, + { + "epoch": 0.7928429677997567, + "grad_norm": 37.385189056396484, + "learning_rate": 2.504300771703295e-07, + "loss": 0.346, + "num_input_tokens_seen": 38989504, + "step": 12385 + }, + { + "epoch": 0.7931630497407336, + "grad_norm": 72.55767822265625, + "learning_rate": 2.496909310594517e-07, + "loss": 0.3626, + "num_input_tokens_seen": 39005056, + "step": 12390 + }, + { + "epoch": 0.7934831316817105, + "grad_norm": 40.421688079833984, + "learning_rate": 2.4895272171825587e-07, + "loss": 0.4459, + "num_input_tokens_seen": 39020608, + "step": 12395 + }, + { + "epoch": 0.7938032136226874, + "grad_norm": 32.116249084472656, + "learning_rate": 2.482154500684055e-07, + "loss": 0.443, + "num_input_tokens_seen": 39035712, + "step": 12400 + }, + { + "epoch": 0.7941232955636643, + "grad_norm": 25.23982048034668, + "learning_rate": 2.4747911703039293e-07, + "loss": 0.3361, + "num_input_tokens_seen": 39050880, + "step": 12405 + }, + { + "epoch": 0.7944433775046412, + "grad_norm": 35.13556671142578, + "learning_rate": 2.467437235235378e-07, + "loss": 0.3689, + "num_input_tokens_seen": 39065792, + "step": 12410 + }, + { + "epoch": 0.7947634594456181, + "grad_norm": 31.368885040283203, + "learning_rate": 2.460092704659883e-07, + "loss": 0.3418, + "num_input_tokens_seen": 39080960, + "step": 12415 + }, + { + "epoch": 0.795083541386595, + "grad_norm": 16.847009658813477, + "learning_rate": 2.452757587747174e-07, + "loss": 0.2604, + "num_input_tokens_seen": 39097216, + "step": 12420 + }, + { + "epoch": 0.7954036233275719, + "grad_norm": 23.280132293701172, + "learning_rate": 2.445431893655232e-07, + "loss": 0.1771, + "num_input_tokens_seen": 39113152, + "step": 12425 + }, + { + "epoch": 0.7957237052685487, + "grad_norm": 35.485782623291016, + "learning_rate": 2.438115631530271e-07, + "loss": 0.3722, + "num_input_tokens_seen": 39130176, + "step": 12430 + }, + { + "epoch": 0.7960437872095256, + "grad_norm": 28.096521377563477, + "learning_rate": 2.4308088105067305e-07, + "loss": 0.2283, + "num_input_tokens_seen": 39145792, + "step": 12435 + }, + { + "epoch": 0.7963638691505025, + "grad_norm": 67.06790924072266, + "learning_rate": 2.423511439707262e-07, + "loss": 0.4201, + "num_input_tokens_seen": 39161280, + "step": 12440 + }, + { + "epoch": 0.7966839510914794, + "grad_norm": 23.704147338867188, + "learning_rate": 2.4162235282427177e-07, + "loss": 0.2784, + "num_input_tokens_seen": 39176512, + "step": 12445 + }, + { + "epoch": 0.7970040330324563, + "grad_norm": 42.61015319824219, + "learning_rate": 2.408945085212144e-07, + "loss": 0.3621, + "num_input_tokens_seen": 39191808, + "step": 12450 + }, + { + "epoch": 0.7973241149734333, + "grad_norm": 33.03046417236328, + "learning_rate": 2.401676119702759e-07, + "loss": 0.2479, + "num_input_tokens_seen": 39208640, + "step": 12455 + }, + { + "epoch": 0.7976441969144101, + "grad_norm": 19.37267303466797, + "learning_rate": 2.394416640789952e-07, + "loss": 0.3438, + "num_input_tokens_seen": 39223232, + "step": 12460 + }, + { + "epoch": 0.797964278855387, + "grad_norm": 40.43623352050781, + "learning_rate": 2.3871666575372696e-07, + "loss": 0.3098, + "num_input_tokens_seen": 39238656, + "step": 12465 + }, + { + "epoch": 0.7982843607963639, + "grad_norm": 54.1468505859375, + "learning_rate": 2.3799261789963964e-07, + "loss": 0.532, + "num_input_tokens_seen": 39255872, + "step": 12470 + }, + { + "epoch": 0.7986044427373408, + "grad_norm": 21.15880584716797, + "learning_rate": 2.3726952142071644e-07, + "loss": 0.2708, + "num_input_tokens_seen": 39270784, + "step": 12475 + }, + { + "epoch": 0.7989245246783176, + "grad_norm": 41.602508544921875, + "learning_rate": 2.365473772197508e-07, + "loss": 0.3462, + "num_input_tokens_seen": 39286080, + "step": 12480 + }, + { + "epoch": 0.7992446066192945, + "grad_norm": 33.63953399658203, + "learning_rate": 2.3582618619834883e-07, + "loss": 0.356, + "num_input_tokens_seen": 39301312, + "step": 12485 + }, + { + "epoch": 0.7995646885602714, + "grad_norm": 16.34864616394043, + "learning_rate": 2.3510594925692528e-07, + "loss": 0.2216, + "num_input_tokens_seen": 39316736, + "step": 12490 + }, + { + "epoch": 0.7998847705012483, + "grad_norm": 28.48493194580078, + "learning_rate": 2.343866672947057e-07, + "loss": 0.3493, + "num_input_tokens_seen": 39331264, + "step": 12495 + }, + { + "epoch": 0.8002048524422252, + "grad_norm": 26.142616271972656, + "learning_rate": 2.336683412097209e-07, + "loss": 0.2587, + "num_input_tokens_seen": 39345856, + "step": 12500 + }, + { + "epoch": 0.800524934383202, + "grad_norm": 23.281526565551758, + "learning_rate": 2.329509718988095e-07, + "loss": 0.3645, + "num_input_tokens_seen": 39361280, + "step": 12505 + }, + { + "epoch": 0.800845016324179, + "grad_norm": 34.25197982788086, + "learning_rate": 2.3223456025761645e-07, + "loss": 0.3367, + "num_input_tokens_seen": 39375872, + "step": 12510 + }, + { + "epoch": 0.8009730491005698, + "eval_loss": 0.36358681321144104, + "eval_runtime": 49.1621, + "eval_samples_per_second": 282.453, + "eval_steps_per_second": 35.312, + "num_input_tokens_seen": 39382144, + "step": 12512 + }, + { + "epoch": 0.8011650982651559, + "grad_norm": 20.024723052978516, + "learning_rate": 2.315191071805892e-07, + "loss": 0.2866, + "num_input_tokens_seen": 39392320, + "step": 12515 + }, + { + "epoch": 0.8014851802061328, + "grad_norm": 63.86294937133789, + "learning_rate": 2.3080461356097937e-07, + "loss": 0.3619, + "num_input_tokens_seen": 39407680, + "step": 12520 + }, + { + "epoch": 0.8018052621471097, + "grad_norm": 18.46623992919922, + "learning_rate": 2.30091080290841e-07, + "loss": 0.288, + "num_input_tokens_seen": 39424512, + "step": 12525 + }, + { + "epoch": 0.8021253440880866, + "grad_norm": 45.297523498535156, + "learning_rate": 2.29378508261029e-07, + "loss": 0.3463, + "num_input_tokens_seen": 39439296, + "step": 12530 + }, + { + "epoch": 0.8024454260290634, + "grad_norm": 53.35750198364258, + "learning_rate": 2.2866689836119702e-07, + "loss": 0.3707, + "num_input_tokens_seen": 39456576, + "step": 12535 + }, + { + "epoch": 0.8027655079700403, + "grad_norm": 62.54146957397461, + "learning_rate": 2.2795625147979913e-07, + "loss": 0.3536, + "num_input_tokens_seen": 39472512, + "step": 12540 + }, + { + "epoch": 0.8030855899110172, + "grad_norm": 22.177854537963867, + "learning_rate": 2.2724656850408597e-07, + "loss": 0.2332, + "num_input_tokens_seen": 39488192, + "step": 12545 + }, + { + "epoch": 0.8034056718519941, + "grad_norm": 42.50724411010742, + "learning_rate": 2.2653785032010532e-07, + "loss": 0.3855, + "num_input_tokens_seen": 39503552, + "step": 12550 + }, + { + "epoch": 0.803725753792971, + "grad_norm": 38.946964263916016, + "learning_rate": 2.258300978126999e-07, + "loss": 0.3363, + "num_input_tokens_seen": 39519744, + "step": 12555 + }, + { + "epoch": 0.804045835733948, + "grad_norm": 22.364994049072266, + "learning_rate": 2.2512331186550715e-07, + "loss": 0.4753, + "num_input_tokens_seen": 39535232, + "step": 12560 + }, + { + "epoch": 0.8043659176749248, + "grad_norm": 44.744346618652344, + "learning_rate": 2.244174933609575e-07, + "loss": 0.3878, + "num_input_tokens_seen": 39549568, + "step": 12565 + }, + { + "epoch": 0.8046859996159017, + "grad_norm": 27.26950454711914, + "learning_rate": 2.2371264318027383e-07, + "loss": 0.2764, + "num_input_tokens_seen": 39566016, + "step": 12570 + }, + { + "epoch": 0.8050060815568786, + "grad_norm": 31.31670570373535, + "learning_rate": 2.2300876220346975e-07, + "loss": 0.2308, + "num_input_tokens_seen": 39581760, + "step": 12575 + }, + { + "epoch": 0.8053261634978555, + "grad_norm": 39.95564651489258, + "learning_rate": 2.2230585130934897e-07, + "loss": 0.2785, + "num_input_tokens_seen": 39597888, + "step": 12580 + }, + { + "epoch": 0.8056462454388323, + "grad_norm": 23.922866821289062, + "learning_rate": 2.2160391137550394e-07, + "loss": 0.4454, + "num_input_tokens_seen": 39613568, + "step": 12585 + }, + { + "epoch": 0.8059663273798092, + "grad_norm": 60.24818420410156, + "learning_rate": 2.2090294327831494e-07, + "loss": 0.4314, + "num_input_tokens_seen": 39628096, + "step": 12590 + }, + { + "epoch": 0.8062864093207861, + "grad_norm": 40.70429992675781, + "learning_rate": 2.202029478929488e-07, + "loss": 0.2695, + "num_input_tokens_seen": 39642560, + "step": 12595 + }, + { + "epoch": 0.806606491261763, + "grad_norm": 24.328882217407227, + "learning_rate": 2.195039260933581e-07, + "loss": 0.2967, + "num_input_tokens_seen": 39658112, + "step": 12600 + }, + { + "epoch": 0.8069265732027399, + "grad_norm": 33.61399841308594, + "learning_rate": 2.1880587875227973e-07, + "loss": 0.2657, + "num_input_tokens_seen": 39674112, + "step": 12605 + }, + { + "epoch": 0.8072466551437167, + "grad_norm": 27.520858764648438, + "learning_rate": 2.18108806741234e-07, + "loss": 0.3313, + "num_input_tokens_seen": 39690432, + "step": 12610 + }, + { + "epoch": 0.8075667370846937, + "grad_norm": 21.497695922851562, + "learning_rate": 2.1741271093052315e-07, + "loss": 0.3512, + "num_input_tokens_seen": 39705792, + "step": 12615 + }, + { + "epoch": 0.8078868190256706, + "grad_norm": 50.78917694091797, + "learning_rate": 2.167175921892318e-07, + "loss": 0.4692, + "num_input_tokens_seen": 39722048, + "step": 12620 + }, + { + "epoch": 0.8082069009666475, + "grad_norm": 26.748119354248047, + "learning_rate": 2.1602345138522314e-07, + "loss": 0.4239, + "num_input_tokens_seen": 39738304, + "step": 12625 + }, + { + "epoch": 0.8085269829076244, + "grad_norm": 31.953128814697266, + "learning_rate": 2.1533028938514008e-07, + "loss": 0.3468, + "num_input_tokens_seen": 39753728, + "step": 12630 + }, + { + "epoch": 0.8088470648486012, + "grad_norm": 41.40265655517578, + "learning_rate": 2.1463810705440433e-07, + "loss": 0.3435, + "num_input_tokens_seen": 39769600, + "step": 12635 + }, + { + "epoch": 0.8091671467895781, + "grad_norm": 33.059566497802734, + "learning_rate": 2.139469052572127e-07, + "loss": 0.3519, + "num_input_tokens_seen": 39784000, + "step": 12640 + }, + { + "epoch": 0.809487228730555, + "grad_norm": 46.353363037109375, + "learning_rate": 2.1325668485653891e-07, + "loss": 0.344, + "num_input_tokens_seen": 39800320, + "step": 12645 + }, + { + "epoch": 0.8098073106715319, + "grad_norm": 27.811872482299805, + "learning_rate": 2.1256744671413173e-07, + "loss": 0.457, + "num_input_tokens_seen": 39815360, + "step": 12650 + }, + { + "epoch": 0.8101273926125088, + "grad_norm": 31.568683624267578, + "learning_rate": 2.1187919169051316e-07, + "loss": 0.3821, + "num_input_tokens_seen": 39829952, + "step": 12655 + }, + { + "epoch": 0.8104474745534856, + "grad_norm": 33.802940368652344, + "learning_rate": 2.111919206449767e-07, + "loss": 0.3528, + "num_input_tokens_seen": 39845376, + "step": 12660 + }, + { + "epoch": 0.8107675564944626, + "grad_norm": 27.218812942504883, + "learning_rate": 2.1050563443558922e-07, + "loss": 0.4858, + "num_input_tokens_seen": 39861696, + "step": 12665 + }, + { + "epoch": 0.8110876384354395, + "grad_norm": 37.33356475830078, + "learning_rate": 2.0982033391918697e-07, + "loss": 0.297, + "num_input_tokens_seen": 39877440, + "step": 12670 + }, + { + "epoch": 0.8114077203764164, + "grad_norm": 58.22770309448242, + "learning_rate": 2.0913601995137543e-07, + "loss": 0.334, + "num_input_tokens_seen": 39893760, + "step": 12675 + }, + { + "epoch": 0.8117278023173933, + "grad_norm": 15.805877685546875, + "learning_rate": 2.084526933865287e-07, + "loss": 0.2943, + "num_input_tokens_seen": 39909568, + "step": 12680 + }, + { + "epoch": 0.8120478842583702, + "grad_norm": 30.60896873474121, + "learning_rate": 2.0777035507778817e-07, + "loss": 0.4543, + "num_input_tokens_seen": 39923648, + "step": 12685 + }, + { + "epoch": 0.812367966199347, + "grad_norm": 17.86086654663086, + "learning_rate": 2.0708900587706135e-07, + "loss": 0.4299, + "num_input_tokens_seen": 39939008, + "step": 12690 + }, + { + "epoch": 0.8126880481403239, + "grad_norm": 45.35393142700195, + "learning_rate": 2.0640864663502e-07, + "loss": 0.3374, + "num_input_tokens_seen": 39955072, + "step": 12695 + }, + { + "epoch": 0.8130081300813008, + "grad_norm": 31.832155227661133, + "learning_rate": 2.057292782011013e-07, + "loss": 0.4545, + "num_input_tokens_seen": 39970880, + "step": 12700 + }, + { + "epoch": 0.8133282120222777, + "grad_norm": 22.989181518554688, + "learning_rate": 2.0505090142350468e-07, + "loss": 0.2967, + "num_input_tokens_seen": 39986240, + "step": 12705 + }, + { + "epoch": 0.8136482939632546, + "grad_norm": 31.20648765563965, + "learning_rate": 2.0437351714919127e-07, + "loss": 0.3427, + "num_input_tokens_seen": 40001856, + "step": 12710 + }, + { + "epoch": 0.8139683759042314, + "grad_norm": 18.44768714904785, + "learning_rate": 2.0369712622388336e-07, + "loss": 0.309, + "num_input_tokens_seen": 40018112, + "step": 12715 + }, + { + "epoch": 0.8142884578452084, + "grad_norm": 37.2120475769043, + "learning_rate": 2.0302172949206298e-07, + "loss": 0.2879, + "num_input_tokens_seen": 40033664, + "step": 12720 + }, + { + "epoch": 0.8146085397861853, + "grad_norm": 54.152069091796875, + "learning_rate": 2.0234732779697094e-07, + "loss": 0.2967, + "num_input_tokens_seen": 40048768, + "step": 12725 + }, + { + "epoch": 0.8149286217271622, + "grad_norm": 42.13416290283203, + "learning_rate": 2.016739219806056e-07, + "loss": 0.3229, + "num_input_tokens_seen": 40063232, + "step": 12730 + }, + { + "epoch": 0.8152487036681391, + "grad_norm": 19.65249252319336, + "learning_rate": 2.0100151288372215e-07, + "loss": 0.3904, + "num_input_tokens_seen": 40079296, + "step": 12735 + }, + { + "epoch": 0.8155687856091159, + "grad_norm": 59.13142013549805, + "learning_rate": 2.0033010134583084e-07, + "loss": 0.5554, + "num_input_tokens_seen": 40094976, + "step": 12740 + }, + { + "epoch": 0.8158888675500928, + "grad_norm": 32.4484977722168, + "learning_rate": 1.9965968820519763e-07, + "loss": 0.3218, + "num_input_tokens_seen": 40110464, + "step": 12745 + }, + { + "epoch": 0.8162089494910697, + "grad_norm": 48.04807662963867, + "learning_rate": 1.9899027429884042e-07, + "loss": 0.3981, + "num_input_tokens_seen": 40125568, + "step": 12750 + }, + { + "epoch": 0.8165290314320466, + "grad_norm": 37.24668502807617, + "learning_rate": 1.983218604625305e-07, + "loss": 0.4142, + "num_input_tokens_seen": 40141440, + "step": 12755 + }, + { + "epoch": 0.8168491133730235, + "grad_norm": 14.393180847167969, + "learning_rate": 1.9765444753079096e-07, + "loss": 0.3275, + "num_input_tokens_seen": 40156416, + "step": 12760 + }, + { + "epoch": 0.8171691953140003, + "grad_norm": 29.691728591918945, + "learning_rate": 1.9698803633689408e-07, + "loss": 0.3998, + "num_input_tokens_seen": 40172928, + "step": 12765 + }, + { + "epoch": 0.8174892772549772, + "grad_norm": 21.646751403808594, + "learning_rate": 1.963226277128619e-07, + "loss": 0.2336, + "num_input_tokens_seen": 40188096, + "step": 12770 + }, + { + "epoch": 0.8178093591959542, + "grad_norm": 29.038705825805664, + "learning_rate": 1.956582224894655e-07, + "loss": 0.3593, + "num_input_tokens_seen": 40204032, + "step": 12775 + }, + { + "epoch": 0.8181294411369311, + "grad_norm": 46.25074768066406, + "learning_rate": 1.949948214962227e-07, + "loss": 0.3646, + "num_input_tokens_seen": 40218944, + "step": 12780 + }, + { + "epoch": 0.818449523077908, + "grad_norm": 54.344844818115234, + "learning_rate": 1.943324255613964e-07, + "loss": 0.3731, + "num_input_tokens_seen": 40235456, + "step": 12785 + }, + { + "epoch": 0.8187696050188848, + "grad_norm": 24.159887313842773, + "learning_rate": 1.936710355119967e-07, + "loss": 0.4505, + "num_input_tokens_seen": 40250176, + "step": 12790 + }, + { + "epoch": 0.8190896869598617, + "grad_norm": 33.41341018676758, + "learning_rate": 1.9301065217377655e-07, + "loss": 0.3157, + "num_input_tokens_seen": 40265472, + "step": 12795 + }, + { + "epoch": 0.8194097689008386, + "grad_norm": 25.555482864379883, + "learning_rate": 1.9235127637123249e-07, + "loss": 0.3992, + "num_input_tokens_seen": 40281728, + "step": 12800 + }, + { + "epoch": 0.8197298508418155, + "grad_norm": 52.75870132446289, + "learning_rate": 1.9169290892760225e-07, + "loss": 0.3282, + "num_input_tokens_seen": 40296768, + "step": 12805 + }, + { + "epoch": 0.8200499327827924, + "grad_norm": 44.361934661865234, + "learning_rate": 1.91035550664866e-07, + "loss": 0.3201, + "num_input_tokens_seen": 40311488, + "step": 12810 + }, + { + "epoch": 0.8203700147237692, + "grad_norm": 54.147613525390625, + "learning_rate": 1.903792024037433e-07, + "loss": 0.314, + "num_input_tokens_seen": 40327232, + "step": 12815 + }, + { + "epoch": 0.8206900966647461, + "grad_norm": 33.24623489379883, + "learning_rate": 1.8972386496369185e-07, + "loss": 0.4472, + "num_input_tokens_seen": 40344064, + "step": 12820 + }, + { + "epoch": 0.8210101786057231, + "grad_norm": 41.800315856933594, + "learning_rate": 1.89069539162909e-07, + "loss": 0.3976, + "num_input_tokens_seen": 40359040, + "step": 12825 + }, + { + "epoch": 0.8213302605467, + "grad_norm": 19.14189338684082, + "learning_rate": 1.8841622581832783e-07, + "loss": 0.4066, + "num_input_tokens_seen": 40376384, + "step": 12830 + }, + { + "epoch": 0.8216503424876769, + "grad_norm": 28.32308578491211, + "learning_rate": 1.8776392574561783e-07, + "loss": 0.5901, + "num_input_tokens_seen": 40391936, + "step": 12835 + }, + { + "epoch": 0.8219704244286538, + "grad_norm": 23.97947883605957, + "learning_rate": 1.8711263975918322e-07, + "loss": 0.4831, + "num_input_tokens_seen": 40408832, + "step": 12840 + }, + { + "epoch": 0.8222905063696306, + "grad_norm": 35.37938690185547, + "learning_rate": 1.8646236867216215e-07, + "loss": 0.4603, + "num_input_tokens_seen": 40425280, + "step": 12845 + }, + { + "epoch": 0.8226105883106075, + "grad_norm": 34.26011657714844, + "learning_rate": 1.8581311329642591e-07, + "loss": 0.338, + "num_input_tokens_seen": 40440832, + "step": 12850 + }, + { + "epoch": 0.8229306702515844, + "grad_norm": 29.206497192382812, + "learning_rate": 1.8516487444257723e-07, + "loss": 0.2651, + "num_input_tokens_seen": 40458624, + "step": 12855 + }, + { + "epoch": 0.8232507521925613, + "grad_norm": 33.5301399230957, + "learning_rate": 1.8451765291995004e-07, + "loss": 0.4093, + "num_input_tokens_seen": 40474688, + "step": 12860 + }, + { + "epoch": 0.8235708341335382, + "grad_norm": 35.508880615234375, + "learning_rate": 1.8387144953660806e-07, + "loss": 0.3554, + "num_input_tokens_seen": 40490816, + "step": 12865 + }, + { + "epoch": 0.823890916074515, + "grad_norm": 39.21906280517578, + "learning_rate": 1.832262650993437e-07, + "loss": 0.4472, + "num_input_tokens_seen": 40506112, + "step": 12870 + }, + { + "epoch": 0.8242109980154919, + "grad_norm": 20.77424430847168, + "learning_rate": 1.825821004136774e-07, + "loss": 0.2954, + "num_input_tokens_seen": 40521344, + "step": 12875 + }, + { + "epoch": 0.8245310799564689, + "grad_norm": 29.856380462646484, + "learning_rate": 1.819389562838559e-07, + "loss": 0.2698, + "num_input_tokens_seen": 40537024, + "step": 12880 + }, + { + "epoch": 0.8248511618974458, + "grad_norm": 47.23398208618164, + "learning_rate": 1.8129683351285319e-07, + "loss": 0.3136, + "num_input_tokens_seen": 40552640, + "step": 12885 + }, + { + "epoch": 0.8251712438384227, + "grad_norm": 35.031856536865234, + "learning_rate": 1.8065573290236626e-07, + "loss": 0.3186, + "num_input_tokens_seen": 40568000, + "step": 12890 + }, + { + "epoch": 0.8254913257793995, + "grad_norm": 22.70587730407715, + "learning_rate": 1.8001565525281682e-07, + "loss": 0.3809, + "num_input_tokens_seen": 40584960, + "step": 12895 + }, + { + "epoch": 0.8258114077203764, + "grad_norm": 25.041950225830078, + "learning_rate": 1.793766013633493e-07, + "loss": 0.3665, + "num_input_tokens_seen": 40600704, + "step": 12900 + }, + { + "epoch": 0.8261314896613533, + "grad_norm": 27.236404418945312, + "learning_rate": 1.7873857203183074e-07, + "loss": 0.3693, + "num_input_tokens_seen": 40615872, + "step": 12905 + }, + { + "epoch": 0.8264515716023302, + "grad_norm": 54.097450256347656, + "learning_rate": 1.7810156805484733e-07, + "loss": 0.4563, + "num_input_tokens_seen": 40632640, + "step": 12910 + }, + { + "epoch": 0.8267716535433071, + "grad_norm": 25.137113571166992, + "learning_rate": 1.7746559022770612e-07, + "loss": 0.2995, + "num_input_tokens_seen": 40648064, + "step": 12915 + }, + { + "epoch": 0.8270917354842839, + "grad_norm": 29.874134063720703, + "learning_rate": 1.7683063934443342e-07, + "loss": 0.3663, + "num_input_tokens_seen": 40664704, + "step": 12920 + }, + { + "epoch": 0.8274118174252608, + "grad_norm": 40.31401824951172, + "learning_rate": 1.7619671619777277e-07, + "loss": 0.4004, + "num_input_tokens_seen": 40681024, + "step": 12925 + }, + { + "epoch": 0.8277318993662378, + "grad_norm": 31.526283264160156, + "learning_rate": 1.7556382157918404e-07, + "loss": 0.4101, + "num_input_tokens_seen": 40695936, + "step": 12930 + }, + { + "epoch": 0.8280519813072147, + "grad_norm": 27.806535720825195, + "learning_rate": 1.7493195627884427e-07, + "loss": 0.3185, + "num_input_tokens_seen": 40713472, + "step": 12935 + }, + { + "epoch": 0.8283720632481916, + "grad_norm": 42.26551055908203, + "learning_rate": 1.7430112108564465e-07, + "loss": 0.3141, + "num_input_tokens_seen": 40729344, + "step": 12940 + }, + { + "epoch": 0.8286921451891684, + "grad_norm": 35.58454895019531, + "learning_rate": 1.736713167871896e-07, + "loss": 0.3861, + "num_input_tokens_seen": 40745856, + "step": 12945 + }, + { + "epoch": 0.8290122271301453, + "grad_norm": 19.220375061035156, + "learning_rate": 1.7304254416979803e-07, + "loss": 0.2993, + "num_input_tokens_seen": 40761920, + "step": 12950 + }, + { + "epoch": 0.8293323090711222, + "grad_norm": 17.930898666381836, + "learning_rate": 1.7241480401849963e-07, + "loss": 0.2488, + "num_input_tokens_seen": 40776960, + "step": 12955 + }, + { + "epoch": 0.8296523910120991, + "grad_norm": 21.81646156311035, + "learning_rate": 1.7178809711703524e-07, + "loss": 0.3455, + "num_input_tokens_seen": 40792192, + "step": 12960 + }, + { + "epoch": 0.829972472953076, + "grad_norm": 34.8779296875, + "learning_rate": 1.7116242424785599e-07, + "loss": 0.3612, + "num_input_tokens_seen": 40808256, + "step": 12965 + }, + { + "epoch": 0.8302925548940528, + "grad_norm": 40.2933464050293, + "learning_rate": 1.7053778619212166e-07, + "loss": 0.4288, + "num_input_tokens_seen": 40823424, + "step": 12970 + }, + { + "epoch": 0.8306126368350297, + "grad_norm": 39.040504455566406, + "learning_rate": 1.6991418372970022e-07, + "loss": 0.4221, + "num_input_tokens_seen": 40840960, + "step": 12975 + }, + { + "epoch": 0.8309327187760066, + "grad_norm": 26.533519744873047, + "learning_rate": 1.6929161763916666e-07, + "loss": 0.3775, + "num_input_tokens_seen": 40857536, + "step": 12980 + }, + { + "epoch": 0.8312528007169836, + "grad_norm": 25.883270263671875, + "learning_rate": 1.686700886978021e-07, + "loss": 0.3597, + "num_input_tokens_seen": 40874240, + "step": 12985 + }, + { + "epoch": 0.8315728826579605, + "grad_norm": 37.27665710449219, + "learning_rate": 1.6804959768159266e-07, + "loss": 0.3573, + "num_input_tokens_seen": 40888960, + "step": 12990 + }, + { + "epoch": 0.8318929645989374, + "grad_norm": 53.164058685302734, + "learning_rate": 1.674301453652287e-07, + "loss": 0.5238, + "num_input_tokens_seen": 40904512, + "step": 12995 + }, + { + "epoch": 0.8322130465399142, + "grad_norm": 37.5425910949707, + "learning_rate": 1.6681173252210378e-07, + "loss": 0.2903, + "num_input_tokens_seen": 40921856, + "step": 13000 + }, + { + "epoch": 0.8325331284808911, + "grad_norm": 49.16252517700195, + "learning_rate": 1.6619435992431342e-07, + "loss": 0.3741, + "num_input_tokens_seen": 40938752, + "step": 13005 + }, + { + "epoch": 0.832853210421868, + "grad_norm": 43.46717071533203, + "learning_rate": 1.6557802834265466e-07, + "loss": 0.3033, + "num_input_tokens_seen": 40954048, + "step": 13010 + }, + { + "epoch": 0.8331732923628449, + "grad_norm": 24.154077529907227, + "learning_rate": 1.649627385466248e-07, + "loss": 0.3593, + "num_input_tokens_seen": 40972672, + "step": 13015 + }, + { + "epoch": 0.8334933743038218, + "grad_norm": 19.601119995117188, + "learning_rate": 1.643484913044202e-07, + "loss": 0.242, + "num_input_tokens_seen": 40987648, + "step": 13020 + }, + { + "epoch": 0.8338134562447986, + "grad_norm": 13.510409355163574, + "learning_rate": 1.6373528738293564e-07, + "loss": 0.3147, + "num_input_tokens_seen": 41003328, + "step": 13025 + }, + { + "epoch": 0.8341335381857755, + "grad_norm": 31.341299057006836, + "learning_rate": 1.6312312754776404e-07, + "loss": 0.2875, + "num_input_tokens_seen": 41018624, + "step": 13030 + }, + { + "epoch": 0.8344536201267524, + "grad_norm": 16.611501693725586, + "learning_rate": 1.6251201256319357e-07, + "loss": 0.3321, + "num_input_tokens_seen": 41034624, + "step": 13035 + }, + { + "epoch": 0.8347737020677294, + "grad_norm": 26.413938522338867, + "learning_rate": 1.619019431922083e-07, + "loss": 0.3821, + "num_input_tokens_seen": 41049664, + "step": 13040 + }, + { + "epoch": 0.8350937840087063, + "grad_norm": 33.03317642211914, + "learning_rate": 1.6129292019648754e-07, + "loss": 0.3454, + "num_input_tokens_seen": 41066368, + "step": 13045 + }, + { + "epoch": 0.8354138659496831, + "grad_norm": 25.02870750427246, + "learning_rate": 1.606849443364038e-07, + "loss": 0.2916, + "num_input_tokens_seen": 41082048, + "step": 13050 + }, + { + "epoch": 0.83573394789066, + "grad_norm": 16.02092170715332, + "learning_rate": 1.6007801637102104e-07, + "loss": 0.3422, + "num_input_tokens_seen": 41098048, + "step": 13055 + }, + { + "epoch": 0.8360540298316369, + "grad_norm": 20.10306167602539, + "learning_rate": 1.594721370580969e-07, + "loss": 0.3826, + "num_input_tokens_seen": 41112768, + "step": 13060 + }, + { + "epoch": 0.8363741117726138, + "grad_norm": 20.185379028320312, + "learning_rate": 1.588673071540788e-07, + "loss": 0.4512, + "num_input_tokens_seen": 41127488, + "step": 13065 + }, + { + "epoch": 0.8366941937135907, + "grad_norm": 37.06159591674805, + "learning_rate": 1.5826352741410332e-07, + "loss": 0.3295, + "num_input_tokens_seen": 41142272, + "step": 13070 + }, + { + "epoch": 0.8370142756545675, + "grad_norm": 52.25266647338867, + "learning_rate": 1.576607985919971e-07, + "loss": 0.2947, + "num_input_tokens_seen": 41157952, + "step": 13075 + }, + { + "epoch": 0.8373343575955444, + "grad_norm": 38.03484344482422, + "learning_rate": 1.57059121440274e-07, + "loss": 0.3595, + "num_input_tokens_seen": 41172992, + "step": 13080 + }, + { + "epoch": 0.8376544395365213, + "grad_norm": 47.07827377319336, + "learning_rate": 1.56458496710135e-07, + "loss": 0.3642, + "num_input_tokens_seen": 41187776, + "step": 13085 + }, + { + "epoch": 0.8379745214774983, + "grad_norm": 36.153099060058594, + "learning_rate": 1.5585892515146716e-07, + "loss": 0.3461, + "num_input_tokens_seen": 41204416, + "step": 13090 + }, + { + "epoch": 0.8382946034184752, + "grad_norm": 22.711284637451172, + "learning_rate": 1.5526040751284253e-07, + "loss": 0.4195, + "num_input_tokens_seen": 41220032, + "step": 13095 + }, + { + "epoch": 0.838614685359452, + "grad_norm": 35.58867263793945, + "learning_rate": 1.546629445415174e-07, + "loss": 0.3118, + "num_input_tokens_seen": 41235776, + "step": 13100 + }, + { + "epoch": 0.8389347673004289, + "grad_norm": 41.773040771484375, + "learning_rate": 1.5406653698343141e-07, + "loss": 0.3725, + "num_input_tokens_seen": 41252160, + "step": 13105 + }, + { + "epoch": 0.8392548492414058, + "grad_norm": 33.417354583740234, + "learning_rate": 1.5347118558320637e-07, + "loss": 0.3539, + "num_input_tokens_seen": 41269056, + "step": 13110 + }, + { + "epoch": 0.8395749311823827, + "grad_norm": 24.998620986938477, + "learning_rate": 1.5287689108414558e-07, + "loss": 0.3562, + "num_input_tokens_seen": 41285312, + "step": 13115 + }, + { + "epoch": 0.8398950131233596, + "grad_norm": 39.11224365234375, + "learning_rate": 1.5228365422823242e-07, + "loss": 0.3246, + "num_input_tokens_seen": 41300992, + "step": 13120 + }, + { + "epoch": 0.8402150950643364, + "grad_norm": 28.325523376464844, + "learning_rate": 1.5169147575613038e-07, + "loss": 0.2623, + "num_input_tokens_seen": 41317952, + "step": 13125 + }, + { + "epoch": 0.8405351770053133, + "grad_norm": 12.87824821472168, + "learning_rate": 1.5110035640718098e-07, + "loss": 0.2941, + "num_input_tokens_seen": 41333440, + "step": 13130 + }, + { + "epoch": 0.8408552589462902, + "grad_norm": 31.341796875, + "learning_rate": 1.5051029691940387e-07, + "loss": 0.3725, + "num_input_tokens_seen": 41349312, + "step": 13135 + }, + { + "epoch": 0.8411753408872671, + "grad_norm": 33.42830276489258, + "learning_rate": 1.4992129802949515e-07, + "loss": 0.3449, + "num_input_tokens_seen": 41364288, + "step": 13140 + }, + { + "epoch": 0.8414954228282441, + "grad_norm": 24.27691078186035, + "learning_rate": 1.4933336047282696e-07, + "loss": 0.2836, + "num_input_tokens_seen": 41379904, + "step": 13145 + }, + { + "epoch": 0.841815504769221, + "grad_norm": 34.65740203857422, + "learning_rate": 1.4874648498344579e-07, + "loss": 0.3199, + "num_input_tokens_seen": 41394432, + "step": 13150 + }, + { + "epoch": 0.8421355867101978, + "grad_norm": 53.11001205444336, + "learning_rate": 1.4816067229407348e-07, + "loss": 0.3419, + "num_input_tokens_seen": 41409984, + "step": 13155 + }, + { + "epoch": 0.8424556686511747, + "grad_norm": 18.456310272216797, + "learning_rate": 1.4757592313610322e-07, + "loss": 0.3038, + "num_input_tokens_seen": 41425984, + "step": 13160 + }, + { + "epoch": 0.8427757505921516, + "grad_norm": 17.635456085205078, + "learning_rate": 1.4699223823960128e-07, + "loss": 0.3293, + "num_input_tokens_seen": 41441920, + "step": 13165 + }, + { + "epoch": 0.8430958325331285, + "grad_norm": 38.752742767333984, + "learning_rate": 1.4640961833330579e-07, + "loss": 0.3392, + "num_input_tokens_seen": 41457664, + "step": 13170 + }, + { + "epoch": 0.8434159144741054, + "grad_norm": 16.197906494140625, + "learning_rate": 1.4582806414462378e-07, + "loss": 0.2544, + "num_input_tokens_seen": 41472832, + "step": 13175 + }, + { + "epoch": 0.8437359964150822, + "grad_norm": 24.1660213470459, + "learning_rate": 1.4524757639963258e-07, + "loss": 0.3411, + "num_input_tokens_seen": 41490368, + "step": 13180 + }, + { + "epoch": 0.8440560783560591, + "grad_norm": 44.753700256347656, + "learning_rate": 1.4466815582307845e-07, + "loss": 0.4458, + "num_input_tokens_seen": 41506624, + "step": 13185 + }, + { + "epoch": 0.844376160297036, + "grad_norm": 9.318314552307129, + "learning_rate": 1.440898031383746e-07, + "loss": 0.2433, + "num_input_tokens_seen": 41523264, + "step": 13190 + }, + { + "epoch": 0.844696242238013, + "grad_norm": 42.493797302246094, + "learning_rate": 1.4351251906760064e-07, + "loss": 0.3678, + "num_input_tokens_seen": 41538944, + "step": 13195 + }, + { + "epoch": 0.8450163241789899, + "grad_norm": 40.14229202270508, + "learning_rate": 1.4293630433150317e-07, + "loss": 0.3919, + "num_input_tokens_seen": 41554880, + "step": 13200 + }, + { + "epoch": 0.8453364061199667, + "grad_norm": 47.614463806152344, + "learning_rate": 1.423611596494927e-07, + "loss": 0.4473, + "num_input_tokens_seen": 41569280, + "step": 13205 + }, + { + "epoch": 0.8456564880609436, + "grad_norm": 18.392112731933594, + "learning_rate": 1.4178708573964438e-07, + "loss": 0.3541, + "num_input_tokens_seen": 41584576, + "step": 13210 + }, + { + "epoch": 0.8459765700019205, + "grad_norm": 19.08127212524414, + "learning_rate": 1.4121408331869566e-07, + "loss": 0.3483, + "num_input_tokens_seen": 41600000, + "step": 13215 + }, + { + "epoch": 0.8462966519428974, + "grad_norm": 37.911075592041016, + "learning_rate": 1.406421531020474e-07, + "loss": 0.3539, + "num_input_tokens_seen": 41615040, + "step": 13220 + }, + { + "epoch": 0.8466167338838743, + "grad_norm": 69.3670883178711, + "learning_rate": 1.4007129580376097e-07, + "loss": 0.3418, + "num_input_tokens_seen": 41630208, + "step": 13225 + }, + { + "epoch": 0.8469368158248511, + "grad_norm": 36.10555648803711, + "learning_rate": 1.3950151213655847e-07, + "loss": 0.354, + "num_input_tokens_seen": 41645440, + "step": 13230 + }, + { + "epoch": 0.847256897765828, + "grad_norm": 42.61678695678711, + "learning_rate": 1.389328028118214e-07, + "loss": 0.3286, + "num_input_tokens_seen": 41661184, + "step": 13235 + }, + { + "epoch": 0.8475769797068049, + "grad_norm": 27.363248825073242, + "learning_rate": 1.3836516853959e-07, + "loss": 0.3546, + "num_input_tokens_seen": 41676224, + "step": 13240 + }, + { + "epoch": 0.8478970616477818, + "grad_norm": 18.371397018432617, + "learning_rate": 1.3779861002856242e-07, + "loss": 0.3031, + "num_input_tokens_seen": 41690816, + "step": 13245 + }, + { + "epoch": 0.8482171435887588, + "grad_norm": 17.178085327148438, + "learning_rate": 1.3723312798609366e-07, + "loss": 0.3261, + "num_input_tokens_seen": 41706688, + "step": 13250 + }, + { + "epoch": 0.8485372255297357, + "grad_norm": 26.48369789123535, + "learning_rate": 1.3666872311819455e-07, + "loss": 0.3518, + "num_input_tokens_seen": 41721920, + "step": 13255 + }, + { + "epoch": 0.8488573074707125, + "grad_norm": 21.16022300720215, + "learning_rate": 1.361053961295312e-07, + "loss": 0.2742, + "num_input_tokens_seen": 41738112, + "step": 13260 + }, + { + "epoch": 0.8491773894116894, + "grad_norm": 50.990020751953125, + "learning_rate": 1.3554314772342412e-07, + "loss": 0.3463, + "num_input_tokens_seen": 41753792, + "step": 13265 + }, + { + "epoch": 0.8494974713526663, + "grad_norm": 20.54403305053711, + "learning_rate": 1.349819786018469e-07, + "loss": 0.3268, + "num_input_tokens_seen": 41771328, + "step": 13270 + }, + { + "epoch": 0.8498175532936432, + "grad_norm": 37.34607696533203, + "learning_rate": 1.3442188946542566e-07, + "loss": 0.375, + "num_input_tokens_seen": 41787712, + "step": 13275 + }, + { + "epoch": 0.85013763523462, + "grad_norm": 24.755434036254883, + "learning_rate": 1.338628810134388e-07, + "loss": 0.2995, + "num_input_tokens_seen": 41803072, + "step": 13280 + }, + { + "epoch": 0.8504577171755969, + "grad_norm": 36.77594757080078, + "learning_rate": 1.3330495394381435e-07, + "loss": 0.3636, + "num_input_tokens_seen": 41818688, + "step": 13285 + }, + { + "epoch": 0.8507777991165738, + "grad_norm": 15.947341918945312, + "learning_rate": 1.3274810895313083e-07, + "loss": 0.272, + "num_input_tokens_seen": 41833792, + "step": 13290 + }, + { + "epoch": 0.8510338646693554, + "eval_loss": 0.3570670485496521, + "eval_runtime": 49.1744, + "eval_samples_per_second": 282.383, + "eval_steps_per_second": 35.303, + "num_input_tokens_seen": 41847872, + "step": 13294 + }, + { + "epoch": 0.8510978810575507, + "grad_norm": 25.20223617553711, + "learning_rate": 1.321923467366164e-07, + "loss": 0.3708, + "num_input_tokens_seen": 41850880, + "step": 13295 + }, + { + "epoch": 0.8514179629985277, + "grad_norm": 14.625531196594238, + "learning_rate": 1.3163766798814603e-07, + "loss": 0.1815, + "num_input_tokens_seen": 41866560, + "step": 13300 + }, + { + "epoch": 0.8517380449395046, + "grad_norm": 49.65571594238281, + "learning_rate": 1.3108407340024264e-07, + "loss": 0.2872, + "num_input_tokens_seen": 41882240, + "step": 13305 + }, + { + "epoch": 0.8520581268804814, + "grad_norm": 37.89714813232422, + "learning_rate": 1.3053156366407613e-07, + "loss": 0.332, + "num_input_tokens_seen": 41898880, + "step": 13310 + }, + { + "epoch": 0.8523782088214583, + "grad_norm": 19.63136100769043, + "learning_rate": 1.2998013946946119e-07, + "loss": 0.2398, + "num_input_tokens_seen": 41915968, + "step": 13315 + }, + { + "epoch": 0.8526982907624352, + "grad_norm": 36.910030364990234, + "learning_rate": 1.2942980150485706e-07, + "loss": 0.3556, + "num_input_tokens_seen": 41930816, + "step": 13320 + }, + { + "epoch": 0.8530183727034121, + "grad_norm": 49.309322357177734, + "learning_rate": 1.2888055045736723e-07, + "loss": 0.3098, + "num_input_tokens_seen": 41947200, + "step": 13325 + }, + { + "epoch": 0.853338454644389, + "grad_norm": 19.818714141845703, + "learning_rate": 1.283323870127384e-07, + "loss": 0.3021, + "num_input_tokens_seen": 41962240, + "step": 13330 + }, + { + "epoch": 0.8536585365853658, + "grad_norm": 28.360517501831055, + "learning_rate": 1.2778531185535911e-07, + "loss": 0.3063, + "num_input_tokens_seen": 41978752, + "step": 13335 + }, + { + "epoch": 0.8539786185263427, + "grad_norm": 19.08763313293457, + "learning_rate": 1.2723932566825844e-07, + "loss": 0.324, + "num_input_tokens_seen": 41994112, + "step": 13340 + }, + { + "epoch": 0.8542987004673196, + "grad_norm": 16.557178497314453, + "learning_rate": 1.2669442913310723e-07, + "loss": 0.2986, + "num_input_tokens_seen": 42010432, + "step": 13345 + }, + { + "epoch": 0.8546187824082965, + "grad_norm": 27.915157318115234, + "learning_rate": 1.2615062293021506e-07, + "loss": 0.2722, + "num_input_tokens_seen": 42025984, + "step": 13350 + }, + { + "epoch": 0.8549388643492735, + "grad_norm": 43.59603500366211, + "learning_rate": 1.2560790773853025e-07, + "loss": 0.3185, + "num_input_tokens_seen": 42040832, + "step": 13355 + }, + { + "epoch": 0.8552589462902503, + "grad_norm": 25.36774253845215, + "learning_rate": 1.2506628423563915e-07, + "loss": 0.4035, + "num_input_tokens_seen": 42057536, + "step": 13360 + }, + { + "epoch": 0.8555790282312272, + "grad_norm": 31.750885009765625, + "learning_rate": 1.2452575309776493e-07, + "loss": 0.2863, + "num_input_tokens_seen": 42073152, + "step": 13365 + }, + { + "epoch": 0.8558991101722041, + "grad_norm": 45.091915130615234, + "learning_rate": 1.2398631499976732e-07, + "loss": 0.304, + "num_input_tokens_seen": 42088512, + "step": 13370 + }, + { + "epoch": 0.856219192113181, + "grad_norm": 22.48138999938965, + "learning_rate": 1.234479706151409e-07, + "loss": 0.4208, + "num_input_tokens_seen": 42103552, + "step": 13375 + }, + { + "epoch": 0.8565392740541579, + "grad_norm": 22.086090087890625, + "learning_rate": 1.2291072061601503e-07, + "loss": 0.3608, + "num_input_tokens_seen": 42119872, + "step": 13380 + }, + { + "epoch": 0.8568593559951347, + "grad_norm": 34.048282623291016, + "learning_rate": 1.2237456567315264e-07, + "loss": 0.4351, + "num_input_tokens_seen": 42136832, + "step": 13385 + }, + { + "epoch": 0.8571794379361116, + "grad_norm": 23.326128005981445, + "learning_rate": 1.2183950645594944e-07, + "loss": 0.2975, + "num_input_tokens_seen": 42152896, + "step": 13390 + }, + { + "epoch": 0.8574995198770885, + "grad_norm": 52.200294494628906, + "learning_rate": 1.2130554363243318e-07, + "loss": 0.3421, + "num_input_tokens_seen": 42168064, + "step": 13395 + }, + { + "epoch": 0.8578196018180654, + "grad_norm": 20.56406593322754, + "learning_rate": 1.207726778692625e-07, + "loss": 0.3703, + "num_input_tokens_seen": 42182784, + "step": 13400 + }, + { + "epoch": 0.8581396837590423, + "grad_norm": 23.129608154296875, + "learning_rate": 1.2024090983172718e-07, + "loss": 0.3271, + "num_input_tokens_seen": 42199744, + "step": 13405 + }, + { + "epoch": 0.8584597657000193, + "grad_norm": 40.9952507019043, + "learning_rate": 1.1971024018374532e-07, + "loss": 0.3625, + "num_input_tokens_seen": 42215040, + "step": 13410 + }, + { + "epoch": 0.8587798476409961, + "grad_norm": 35.23881149291992, + "learning_rate": 1.1918066958786432e-07, + "loss": 0.3091, + "num_input_tokens_seen": 42230144, + "step": 13415 + }, + { + "epoch": 0.859099929581973, + "grad_norm": 59.670223236083984, + "learning_rate": 1.1865219870525922e-07, + "loss": 0.3553, + "num_input_tokens_seen": 42246528, + "step": 13420 + }, + { + "epoch": 0.8594200115229499, + "grad_norm": 20.215394973754883, + "learning_rate": 1.1812482819573222e-07, + "loss": 0.4317, + "num_input_tokens_seen": 42263168, + "step": 13425 + }, + { + "epoch": 0.8597400934639268, + "grad_norm": 32.689353942871094, + "learning_rate": 1.1759855871771163e-07, + "loss": 0.3905, + "num_input_tokens_seen": 42278912, + "step": 13430 + }, + { + "epoch": 0.8600601754049036, + "grad_norm": 45.541587829589844, + "learning_rate": 1.1707339092825075e-07, + "loss": 0.3824, + "num_input_tokens_seen": 42294656, + "step": 13435 + }, + { + "epoch": 0.8603802573458805, + "grad_norm": 45.382381439208984, + "learning_rate": 1.1654932548302842e-07, + "loss": 0.3909, + "num_input_tokens_seen": 42311552, + "step": 13440 + }, + { + "epoch": 0.8607003392868574, + "grad_norm": 48.50038528442383, + "learning_rate": 1.1602636303634595e-07, + "loss": 0.3635, + "num_input_tokens_seen": 42327552, + "step": 13445 + }, + { + "epoch": 0.8610204212278343, + "grad_norm": 18.829587936401367, + "learning_rate": 1.1550450424112801e-07, + "loss": 0.3583, + "num_input_tokens_seen": 42343360, + "step": 13450 + }, + { + "epoch": 0.8613405031688112, + "grad_norm": 22.35457992553711, + "learning_rate": 1.1498374974892178e-07, + "loss": 0.3341, + "num_input_tokens_seen": 42360064, + "step": 13455 + }, + { + "epoch": 0.8616605851097882, + "grad_norm": 23.769941329956055, + "learning_rate": 1.144641002098955e-07, + "loss": 0.4371, + "num_input_tokens_seen": 42374976, + "step": 13460 + }, + { + "epoch": 0.861980667050765, + "grad_norm": 44.195152282714844, + "learning_rate": 1.1394555627283697e-07, + "loss": 0.3524, + "num_input_tokens_seen": 42391616, + "step": 13465 + }, + { + "epoch": 0.8623007489917419, + "grad_norm": 58.780975341796875, + "learning_rate": 1.134281185851551e-07, + "loss": 0.3095, + "num_input_tokens_seen": 42406528, + "step": 13470 + }, + { + "epoch": 0.8626208309327188, + "grad_norm": 29.023456573486328, + "learning_rate": 1.1291178779287691e-07, + "loss": 0.288, + "num_input_tokens_seen": 42424320, + "step": 13475 + }, + { + "epoch": 0.8629409128736957, + "grad_norm": 41.91423034667969, + "learning_rate": 1.1239656454064683e-07, + "loss": 0.3654, + "num_input_tokens_seen": 42440960, + "step": 13480 + }, + { + "epoch": 0.8632609948146726, + "grad_norm": 16.42652130126953, + "learning_rate": 1.1188244947172776e-07, + "loss": 0.2474, + "num_input_tokens_seen": 42456448, + "step": 13485 + }, + { + "epoch": 0.8635810767556494, + "grad_norm": 20.765544891357422, + "learning_rate": 1.1136944322799812e-07, + "loss": 0.3165, + "num_input_tokens_seen": 42472448, + "step": 13490 + }, + { + "epoch": 0.8639011586966263, + "grad_norm": 51.0446662902832, + "learning_rate": 1.1085754644995227e-07, + "loss": 0.3147, + "num_input_tokens_seen": 42487808, + "step": 13495 + }, + { + "epoch": 0.8642212406376032, + "grad_norm": 34.88838195800781, + "learning_rate": 1.1034675977669938e-07, + "loss": 0.3516, + "num_input_tokens_seen": 42503744, + "step": 13500 + }, + { + "epoch": 0.8645413225785801, + "grad_norm": 50.67732238769531, + "learning_rate": 1.0983708384596258e-07, + "loss": 0.5636, + "num_input_tokens_seen": 42520768, + "step": 13505 + }, + { + "epoch": 0.864861404519557, + "grad_norm": 17.03850555419922, + "learning_rate": 1.0932851929407827e-07, + "loss": 0.3664, + "num_input_tokens_seen": 42537408, + "step": 13510 + }, + { + "epoch": 0.8651814864605339, + "grad_norm": 45.833168029785156, + "learning_rate": 1.0882106675599534e-07, + "loss": 0.36, + "num_input_tokens_seen": 42553728, + "step": 13515 + }, + { + "epoch": 0.8655015684015108, + "grad_norm": 14.135661125183105, + "learning_rate": 1.0831472686527409e-07, + "loss": 0.3304, + "num_input_tokens_seen": 42568896, + "step": 13520 + }, + { + "epoch": 0.8658216503424877, + "grad_norm": 13.662610054016113, + "learning_rate": 1.0780950025408586e-07, + "loss": 0.2939, + "num_input_tokens_seen": 42584000, + "step": 13525 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 62.21460723876953, + "learning_rate": 1.0730538755321217e-07, + "loss": 0.3824, + "num_input_tokens_seen": 42600192, + "step": 13530 + }, + { + "epoch": 0.8664618142244415, + "grad_norm": 20.335872650146484, + "learning_rate": 1.0680238939204334e-07, + "loss": 0.304, + "num_input_tokens_seen": 42614656, + "step": 13535 + }, + { + "epoch": 0.8667818961654183, + "grad_norm": 42.727237701416016, + "learning_rate": 1.0630050639857879e-07, + "loss": 0.3989, + "num_input_tokens_seen": 42629504, + "step": 13540 + }, + { + "epoch": 0.8671019781063952, + "grad_norm": 20.651216506958008, + "learning_rate": 1.0579973919942508e-07, + "loss": 0.3036, + "num_input_tokens_seen": 42644224, + "step": 13545 + }, + { + "epoch": 0.8674220600473721, + "grad_norm": 21.302921295166016, + "learning_rate": 1.0530008841979621e-07, + "loss": 0.2417, + "num_input_tokens_seen": 42659584, + "step": 13550 + }, + { + "epoch": 0.867742141988349, + "grad_norm": 36.984397888183594, + "learning_rate": 1.048015546835117e-07, + "loss": 0.2756, + "num_input_tokens_seen": 42675776, + "step": 13555 + }, + { + "epoch": 0.8680622239293259, + "grad_norm": 23.602458953857422, + "learning_rate": 1.0430413861299691e-07, + "loss": 0.3976, + "num_input_tokens_seen": 42693184, + "step": 13560 + }, + { + "epoch": 0.8683823058703029, + "grad_norm": 45.383060455322266, + "learning_rate": 1.0380784082928196e-07, + "loss": 0.4533, + "num_input_tokens_seen": 42710784, + "step": 13565 + }, + { + "epoch": 0.8687023878112797, + "grad_norm": 40.113624572753906, + "learning_rate": 1.0331266195200006e-07, + "loss": 0.3903, + "num_input_tokens_seen": 42727040, + "step": 13570 + }, + { + "epoch": 0.8690224697522566, + "grad_norm": 18.091224670410156, + "learning_rate": 1.0281860259938779e-07, + "loss": 0.3126, + "num_input_tokens_seen": 42742208, + "step": 13575 + }, + { + "epoch": 0.8693425516932335, + "grad_norm": 19.732269287109375, + "learning_rate": 1.0232566338828452e-07, + "loss": 0.3673, + "num_input_tokens_seen": 42758464, + "step": 13580 + }, + { + "epoch": 0.8696626336342104, + "grad_norm": 47.176029205322266, + "learning_rate": 1.018338449341305e-07, + "loss": 0.4102, + "num_input_tokens_seen": 42774016, + "step": 13585 + }, + { + "epoch": 0.8699827155751872, + "grad_norm": 19.62028694152832, + "learning_rate": 1.0134314785096632e-07, + "loss": 0.3942, + "num_input_tokens_seen": 42789248, + "step": 13590 + }, + { + "epoch": 0.8703027975161641, + "grad_norm": 17.851299285888672, + "learning_rate": 1.0085357275143359e-07, + "loss": 0.342, + "num_input_tokens_seen": 42804608, + "step": 13595 + }, + { + "epoch": 0.870622879457141, + "grad_norm": 32.63302230834961, + "learning_rate": 1.0036512024677268e-07, + "loss": 0.4964, + "num_input_tokens_seen": 42819584, + "step": 13600 + }, + { + "epoch": 0.8709429613981179, + "grad_norm": 9.898176193237305, + "learning_rate": 9.98777909468217e-08, + "loss": 0.2733, + "num_input_tokens_seen": 42835200, + "step": 13605 + }, + { + "epoch": 0.8712630433390948, + "grad_norm": 48.42760467529297, + "learning_rate": 9.939158546001736e-08, + "loss": 0.406, + "num_input_tokens_seen": 42852672, + "step": 13610 + }, + { + "epoch": 0.8715831252800716, + "grad_norm": 19.67852020263672, + "learning_rate": 9.890650439339299e-08, + "loss": 0.3322, + "num_input_tokens_seen": 42868672, + "step": 13615 + }, + { + "epoch": 0.8719032072210486, + "grad_norm": 55.09160232543945, + "learning_rate": 9.842254835257791e-08, + "loss": 0.416, + "num_input_tokens_seen": 42884096, + "step": 13620 + }, + { + "epoch": 0.8722232891620255, + "grad_norm": 32.343929290771484, + "learning_rate": 9.793971794179679e-08, + "loss": 0.3767, + "num_input_tokens_seen": 42898752, + "step": 13625 + }, + { + "epoch": 0.8725433711030024, + "grad_norm": 27.15031623840332, + "learning_rate": 9.745801376386931e-08, + "loss": 0.3417, + "num_input_tokens_seen": 42914688, + "step": 13630 + }, + { + "epoch": 0.8728634530439793, + "grad_norm": 42.770503997802734, + "learning_rate": 9.697743642020861e-08, + "loss": 0.3211, + "num_input_tokens_seen": 42930688, + "step": 13635 + }, + { + "epoch": 0.8731835349849562, + "grad_norm": 37.78193664550781, + "learning_rate": 9.649798651082119e-08, + "loss": 0.3372, + "num_input_tokens_seen": 42947008, + "step": 13640 + }, + { + "epoch": 0.873503616925933, + "grad_norm": 17.573001861572266, + "learning_rate": 9.601966463430588e-08, + "loss": 0.3946, + "num_input_tokens_seen": 42962816, + "step": 13645 + }, + { + "epoch": 0.8738236988669099, + "grad_norm": 15.034274101257324, + "learning_rate": 9.554247138785321e-08, + "loss": 0.3405, + "num_input_tokens_seen": 42977664, + "step": 13650 + }, + { + "epoch": 0.8741437808078868, + "grad_norm": 74.6231460571289, + "learning_rate": 9.506640736724447e-08, + "loss": 0.4684, + "num_input_tokens_seen": 42993472, + "step": 13655 + }, + { + "epoch": 0.8744638627488637, + "grad_norm": 31.8859920501709, + "learning_rate": 9.459147316685123e-08, + "loss": 0.3895, + "num_input_tokens_seen": 43010688, + "step": 13660 + }, + { + "epoch": 0.8747839446898406, + "grad_norm": 41.20021438598633, + "learning_rate": 9.41176693796345e-08, + "loss": 0.3357, + "num_input_tokens_seen": 43027392, + "step": 13665 + }, + { + "epoch": 0.8751040266308175, + "grad_norm": 39.77818298339844, + "learning_rate": 9.364499659714364e-08, + "loss": 0.4172, + "num_input_tokens_seen": 43043008, + "step": 13670 + }, + { + "epoch": 0.8754241085717944, + "grad_norm": 36.9276123046875, + "learning_rate": 9.31734554095165e-08, + "loss": 0.342, + "num_input_tokens_seen": 43059072, + "step": 13675 + }, + { + "epoch": 0.8757441905127713, + "grad_norm": 35.170780181884766, + "learning_rate": 9.270304640547744e-08, + "loss": 0.3481, + "num_input_tokens_seen": 43074624, + "step": 13680 + }, + { + "epoch": 0.8760642724537482, + "grad_norm": 30.96558380126953, + "learning_rate": 9.223377017233768e-08, + "loss": 0.3952, + "num_input_tokens_seen": 43089536, + "step": 13685 + }, + { + "epoch": 0.8763843543947251, + "grad_norm": 28.36827850341797, + "learning_rate": 9.176562729599458e-08, + "loss": 0.3535, + "num_input_tokens_seen": 43104512, + "step": 13690 + }, + { + "epoch": 0.8767044363357019, + "grad_norm": 49.10908508300781, + "learning_rate": 9.129861836092944e-08, + "loss": 0.3463, + "num_input_tokens_seen": 43120640, + "step": 13695 + }, + { + "epoch": 0.8770245182766788, + "grad_norm": 21.713356018066406, + "learning_rate": 9.083274395020845e-08, + "loss": 0.4422, + "num_input_tokens_seen": 43136384, + "step": 13700 + }, + { + "epoch": 0.8773446002176557, + "grad_norm": 23.583024978637695, + "learning_rate": 9.036800464548156e-08, + "loss": 0.4045, + "num_input_tokens_seen": 43153216, + "step": 13705 + }, + { + "epoch": 0.8776646821586326, + "grad_norm": 22.666852951049805, + "learning_rate": 8.990440102698138e-08, + "loss": 0.3473, + "num_input_tokens_seen": 43167936, + "step": 13710 + }, + { + "epoch": 0.8779847640996095, + "grad_norm": 42.15274429321289, + "learning_rate": 8.944193367352182e-08, + "loss": 0.2767, + "num_input_tokens_seen": 43183872, + "step": 13715 + }, + { + "epoch": 0.8783048460405863, + "grad_norm": 28.620649337768555, + "learning_rate": 8.898060316249944e-08, + "loss": 0.4057, + "num_input_tokens_seen": 43200256, + "step": 13720 + }, + { + "epoch": 0.8786249279815633, + "grad_norm": 46.91181182861328, + "learning_rate": 8.852041006989064e-08, + "loss": 0.3563, + "num_input_tokens_seen": 43217600, + "step": 13725 + }, + { + "epoch": 0.8789450099225402, + "grad_norm": 48.15342712402344, + "learning_rate": 8.80613549702518e-08, + "loss": 0.3785, + "num_input_tokens_seen": 43233344, + "step": 13730 + }, + { + "epoch": 0.8792650918635171, + "grad_norm": 48.054359436035156, + "learning_rate": 8.760343843671824e-08, + "loss": 0.5423, + "num_input_tokens_seen": 43249280, + "step": 13735 + }, + { + "epoch": 0.879585173804494, + "grad_norm": 74.3794937133789, + "learning_rate": 8.714666104100487e-08, + "loss": 0.4461, + "num_input_tokens_seen": 43265024, + "step": 13740 + }, + { + "epoch": 0.8799052557454708, + "grad_norm": 75.1503677368164, + "learning_rate": 8.66910233534034e-08, + "loss": 0.3544, + "num_input_tokens_seen": 43280576, + "step": 13745 + }, + { + "epoch": 0.8802253376864477, + "grad_norm": 32.35490798950195, + "learning_rate": 8.62365259427823e-08, + "loss": 0.3156, + "num_input_tokens_seen": 43296064, + "step": 13750 + }, + { + "epoch": 0.8805454196274246, + "grad_norm": 29.028377532958984, + "learning_rate": 8.578316937658758e-08, + "loss": 0.2899, + "num_input_tokens_seen": 43311552, + "step": 13755 + }, + { + "epoch": 0.8808655015684015, + "grad_norm": 18.780216217041016, + "learning_rate": 8.533095422083992e-08, + "loss": 0.3116, + "num_input_tokens_seen": 43326272, + "step": 13760 + }, + { + "epoch": 0.8811855835093784, + "grad_norm": 26.572908401489258, + "learning_rate": 8.487988104013533e-08, + "loss": 0.2906, + "num_input_tokens_seen": 43342592, + "step": 13765 + }, + { + "epoch": 0.8815056654503552, + "grad_norm": 24.25293731689453, + "learning_rate": 8.4429950397644e-08, + "loss": 0.3188, + "num_input_tokens_seen": 43357888, + "step": 13770 + }, + { + "epoch": 0.8818257473913321, + "grad_norm": 20.96013832092285, + "learning_rate": 8.398116285510948e-08, + "loss": 0.2679, + "num_input_tokens_seen": 43374272, + "step": 13775 + }, + { + "epoch": 0.8821458293323091, + "grad_norm": 47.135711669921875, + "learning_rate": 8.353351897284844e-08, + "loss": 0.2698, + "num_input_tokens_seen": 43393280, + "step": 13780 + }, + { + "epoch": 0.882465911273286, + "grad_norm": 10.159743309020996, + "learning_rate": 8.308701930974949e-08, + "loss": 0.4762, + "num_input_tokens_seen": 43409600, + "step": 13785 + }, + { + "epoch": 0.8827859932142629, + "grad_norm": 27.35509490966797, + "learning_rate": 8.264166442327269e-08, + "loss": 0.4038, + "num_input_tokens_seen": 43424384, + "step": 13790 + }, + { + "epoch": 0.8831060751552398, + "grad_norm": 41.762332916259766, + "learning_rate": 8.219745486944885e-08, + "loss": 0.2533, + "num_input_tokens_seen": 43440128, + "step": 13795 + }, + { + "epoch": 0.8834261570962166, + "grad_norm": 78.77603912353516, + "learning_rate": 8.175439120287875e-08, + "loss": 0.4597, + "num_input_tokens_seen": 43455168, + "step": 13800 + }, + { + "epoch": 0.8837462390371935, + "grad_norm": 49.571353912353516, + "learning_rate": 8.131247397673269e-08, + "loss": 0.3494, + "num_input_tokens_seen": 43472064, + "step": 13805 + }, + { + "epoch": 0.8840663209781704, + "grad_norm": 118.99240112304688, + "learning_rate": 8.087170374274921e-08, + "loss": 0.4333, + "num_input_tokens_seen": 43488000, + "step": 13810 + }, + { + "epoch": 0.8843864029191473, + "grad_norm": 27.12523078918457, + "learning_rate": 8.043208105123578e-08, + "loss": 0.2981, + "num_input_tokens_seen": 43503488, + "step": 13815 + }, + { + "epoch": 0.8847064848601242, + "grad_norm": 42.8975830078125, + "learning_rate": 7.999360645106579e-08, + "loss": 0.335, + "num_input_tokens_seen": 43518336, + "step": 13820 + }, + { + "epoch": 0.885026566801101, + "grad_norm": 17.23529052734375, + "learning_rate": 7.955628048968011e-08, + "loss": 0.2651, + "num_input_tokens_seen": 43532800, + "step": 13825 + }, + { + "epoch": 0.885346648742078, + "grad_norm": 29.590059280395508, + "learning_rate": 7.912010371308564e-08, + "loss": 0.2627, + "num_input_tokens_seen": 43547648, + "step": 13830 + }, + { + "epoch": 0.8856667306830549, + "grad_norm": 27.454540252685547, + "learning_rate": 7.868507666585422e-08, + "loss": 0.2935, + "num_input_tokens_seen": 43562688, + "step": 13835 + }, + { + "epoch": 0.8859868126240318, + "grad_norm": 45.65460968017578, + "learning_rate": 7.825119989112172e-08, + "loss": 0.4137, + "num_input_tokens_seen": 43578176, + "step": 13840 + }, + { + "epoch": 0.8863068945650087, + "grad_norm": 30.539806365966797, + "learning_rate": 7.78184739305886e-08, + "loss": 0.2938, + "num_input_tokens_seen": 43593920, + "step": 13845 + }, + { + "epoch": 0.8866269765059855, + "grad_norm": 20.917694091796875, + "learning_rate": 7.73868993245187e-08, + "loss": 0.3491, + "num_input_tokens_seen": 43610944, + "step": 13850 + }, + { + "epoch": 0.8869470584469624, + "grad_norm": 18.05341911315918, + "learning_rate": 7.695647661173754e-08, + "loss": 0.3412, + "num_input_tokens_seen": 43627008, + "step": 13855 + }, + { + "epoch": 0.8872671403879393, + "grad_norm": 44.19736862182617, + "learning_rate": 7.652720632963284e-08, + "loss": 0.3785, + "num_input_tokens_seen": 43642752, + "step": 13860 + }, + { + "epoch": 0.8875872223289162, + "grad_norm": 49.171730041503906, + "learning_rate": 7.609908901415396e-08, + "loss": 0.3396, + "num_input_tokens_seen": 43658496, + "step": 13865 + }, + { + "epoch": 0.8879073042698931, + "grad_norm": 53.71741485595703, + "learning_rate": 7.567212519981047e-08, + "loss": 0.4018, + "num_input_tokens_seen": 43674304, + "step": 13870 + }, + { + "epoch": 0.8882273862108699, + "grad_norm": 18.578672409057617, + "learning_rate": 7.524631541967108e-08, + "loss": 0.3382, + "num_input_tokens_seen": 43689536, + "step": 13875 + }, + { + "epoch": 0.8885474681518468, + "grad_norm": 72.489501953125, + "learning_rate": 7.482166020536485e-08, + "loss": 0.2903, + "num_input_tokens_seen": 43706496, + "step": 13880 + }, + { + "epoch": 0.8888675500928238, + "grad_norm": 17.48689079284668, + "learning_rate": 7.439816008707877e-08, + "loss": 0.3108, + "num_input_tokens_seen": 43721408, + "step": 13885 + }, + { + "epoch": 0.8891876320338007, + "grad_norm": 17.783830642700195, + "learning_rate": 7.397581559355748e-08, + "loss": 0.3216, + "num_input_tokens_seen": 43737536, + "step": 13890 + }, + { + "epoch": 0.8895077139747776, + "grad_norm": 33.39737319946289, + "learning_rate": 7.355462725210315e-08, + "loss": 0.4116, + "num_input_tokens_seen": 43752640, + "step": 13895 + }, + { + "epoch": 0.8898277959157544, + "grad_norm": 30.600183486938477, + "learning_rate": 7.313459558857438e-08, + "loss": 0.4081, + "num_input_tokens_seen": 43768384, + "step": 13900 + }, + { + "epoch": 0.8901478778567313, + "grad_norm": 26.679346084594727, + "learning_rate": 7.271572112738566e-08, + "loss": 0.3108, + "num_input_tokens_seen": 43784320, + "step": 13905 + }, + { + "epoch": 0.8904679597977082, + "grad_norm": 32.508792877197266, + "learning_rate": 7.229800439150657e-08, + "loss": 0.3582, + "num_input_tokens_seen": 43799232, + "step": 13910 + }, + { + "epoch": 0.8907880417386851, + "grad_norm": 64.69635009765625, + "learning_rate": 7.188144590246148e-08, + "loss": 0.3721, + "num_input_tokens_seen": 43815360, + "step": 13915 + }, + { + "epoch": 0.891108123679662, + "grad_norm": 24.958736419677734, + "learning_rate": 7.146604618032848e-08, + "loss": 0.339, + "num_input_tokens_seen": 43830336, + "step": 13920 + }, + { + "epoch": 0.8914282056206388, + "grad_norm": 36.58753967285156, + "learning_rate": 7.105180574373904e-08, + "loss": 0.4065, + "num_input_tokens_seen": 43846656, + "step": 13925 + }, + { + "epoch": 0.8917482875616157, + "grad_norm": 19.49739646911621, + "learning_rate": 7.063872510987712e-08, + "loss": 0.3231, + "num_input_tokens_seen": 43862720, + "step": 13930 + }, + { + "epoch": 0.8920683695025927, + "grad_norm": 32.121185302734375, + "learning_rate": 7.022680479447874e-08, + "loss": 0.3558, + "num_input_tokens_seen": 43876800, + "step": 13935 + }, + { + "epoch": 0.8923884514435696, + "grad_norm": 22.010385513305664, + "learning_rate": 6.98160453118316e-08, + "loss": 0.2952, + "num_input_tokens_seen": 43892160, + "step": 13940 + }, + { + "epoch": 0.8927085333845465, + "grad_norm": 38.97593688964844, + "learning_rate": 6.940644717477328e-08, + "loss": 0.333, + "num_input_tokens_seen": 43908416, + "step": 13945 + }, + { + "epoch": 0.8930286153255234, + "grad_norm": 31.57818031311035, + "learning_rate": 6.899801089469204e-08, + "loss": 0.4213, + "num_input_tokens_seen": 43923712, + "step": 13950 + }, + { + "epoch": 0.8933486972665002, + "grad_norm": 20.735111236572266, + "learning_rate": 6.85907369815254e-08, + "loss": 0.3555, + "num_input_tokens_seen": 43939520, + "step": 13955 + }, + { + "epoch": 0.8936687792074771, + "grad_norm": 51.4113883972168, + "learning_rate": 6.81846259437595e-08, + "loss": 0.3895, + "num_input_tokens_seen": 43954688, + "step": 13960 + }, + { + "epoch": 0.893988861148454, + "grad_norm": 53.543155670166016, + "learning_rate": 6.77796782884289e-08, + "loss": 0.3146, + "num_input_tokens_seen": 43969600, + "step": 13965 + }, + { + "epoch": 0.8943089430894309, + "grad_norm": 46.502647399902344, + "learning_rate": 6.737589452111526e-08, + "loss": 0.3824, + "num_input_tokens_seen": 43985472, + "step": 13970 + }, + { + "epoch": 0.8946290250304078, + "grad_norm": 39.93029022216797, + "learning_rate": 6.697327514594786e-08, + "loss": 0.3916, + "num_input_tokens_seen": 44000768, + "step": 13975 + }, + { + "epoch": 0.8949491069713846, + "grad_norm": 41.46504592895508, + "learning_rate": 6.657182066560118e-08, + "loss": 0.4586, + "num_input_tokens_seen": 44017088, + "step": 13980 + }, + { + "epoch": 0.8952691889123615, + "grad_norm": 26.99639892578125, + "learning_rate": 6.617153158129596e-08, + "loss": 0.37, + "num_input_tokens_seen": 44031488, + "step": 13985 + }, + { + "epoch": 0.8955892708533385, + "grad_norm": 37.02708435058594, + "learning_rate": 6.577240839279807e-08, + "loss": 0.337, + "num_input_tokens_seen": 44047296, + "step": 13990 + }, + { + "epoch": 0.8959093527943154, + "grad_norm": 31.63517189025879, + "learning_rate": 6.537445159841748e-08, + "loss": 0.3143, + "num_input_tokens_seen": 44063744, + "step": 13995 + }, + { + "epoch": 0.8962294347352923, + "grad_norm": 34.43181610107422, + "learning_rate": 6.497766169500752e-08, + "loss": 0.3936, + "num_input_tokens_seen": 44079168, + "step": 14000 + }, + { + "epoch": 0.8965495166762691, + "grad_norm": 13.677638053894043, + "learning_rate": 6.458203917796546e-08, + "loss": 0.2643, + "num_input_tokens_seen": 44093824, + "step": 14005 + }, + { + "epoch": 0.896869598617246, + "grad_norm": 19.27773666381836, + "learning_rate": 6.418758454123041e-08, + "loss": 0.455, + "num_input_tokens_seen": 44111296, + "step": 14010 + }, + { + "epoch": 0.8971896805582229, + "grad_norm": 18.031564712524414, + "learning_rate": 6.379429827728377e-08, + "loss": 0.3905, + "num_input_tokens_seen": 44128000, + "step": 14015 + }, + { + "epoch": 0.8975097624991998, + "grad_norm": 17.980560302734375, + "learning_rate": 6.340218087714799e-08, + "loss": 0.3833, + "num_input_tokens_seen": 44143488, + "step": 14020 + }, + { + "epoch": 0.8978298444401767, + "grad_norm": 84.56553649902344, + "learning_rate": 6.301123283038634e-08, + "loss": 0.3567, + "num_input_tokens_seen": 44158976, + "step": 14025 + }, + { + "epoch": 0.8981499263811535, + "grad_norm": 20.843826293945312, + "learning_rate": 6.262145462510193e-08, + "loss": 0.319, + "num_input_tokens_seen": 44175808, + "step": 14030 + }, + { + "epoch": 0.8984700083221304, + "grad_norm": 44.17280578613281, + "learning_rate": 6.223284674793738e-08, + "loss": 0.2817, + "num_input_tokens_seen": 44190336, + "step": 14035 + }, + { + "epoch": 0.8987900902631074, + "grad_norm": 35.57537078857422, + "learning_rate": 6.184540968407437e-08, + "loss": 0.3835, + "num_input_tokens_seen": 44205696, + "step": 14040 + }, + { + "epoch": 0.8991101722040843, + "grad_norm": 26.58342742919922, + "learning_rate": 6.145914391723239e-08, + "loss": 0.3546, + "num_input_tokens_seen": 44222016, + "step": 14045 + }, + { + "epoch": 0.8994302541450612, + "grad_norm": 25.470823287963867, + "learning_rate": 6.107404992966902e-08, + "loss": 0.3285, + "num_input_tokens_seen": 44238592, + "step": 14050 + }, + { + "epoch": 0.899750336086038, + "grad_norm": 23.68887710571289, + "learning_rate": 6.069012820217856e-08, + "loss": 0.2517, + "num_input_tokens_seen": 44254016, + "step": 14055 + }, + { + "epoch": 0.9000704180270149, + "grad_norm": 28.1870059967041, + "learning_rate": 6.030737921409168e-08, + "loss": 0.3757, + "num_input_tokens_seen": 44269376, + "step": 14060 + }, + { + "epoch": 0.9003904999679918, + "grad_norm": 53.616127014160156, + "learning_rate": 5.992580344327503e-08, + "loss": 0.4646, + "num_input_tokens_seen": 44284672, + "step": 14065 + }, + { + "epoch": 0.9007105819089687, + "grad_norm": 33.5253791809082, + "learning_rate": 5.954540136613051e-08, + "loss": 0.352, + "num_input_tokens_seen": 44300224, + "step": 14070 + }, + { + "epoch": 0.9010306638499456, + "grad_norm": 24.468204498291016, + "learning_rate": 5.916617345759456e-08, + "loss": 0.3451, + "num_input_tokens_seen": 44315264, + "step": 14075 + }, + { + "epoch": 0.901094680238141, + "eval_loss": 0.3543796241283417, + "eval_runtime": 49.176, + "eval_samples_per_second": 282.373, + "eval_steps_per_second": 35.302, + "num_input_tokens_seen": 44318848, + "step": 14076 + }, + { + "epoch": 0.9013507457909224, + "grad_norm": 45.981563568115234, + "learning_rate": 5.878812019113766e-08, + "loss": 0.4234, + "num_input_tokens_seen": 44330176, + "step": 14080 + }, + { + "epoch": 0.9016708277318993, + "grad_norm": 22.737422943115234, + "learning_rate": 5.84112420387638e-08, + "loss": 0.2892, + "num_input_tokens_seen": 44345152, + "step": 14085 + }, + { + "epoch": 0.9019909096728762, + "grad_norm": 31.271459579467773, + "learning_rate": 5.8035539471009697e-08, + "loss": 0.3656, + "num_input_tokens_seen": 44361152, + "step": 14090 + }, + { + "epoch": 0.9023109916138532, + "grad_norm": 33.406707763671875, + "learning_rate": 5.7661012956944253e-08, + "loss": 0.4078, + "num_input_tokens_seen": 44376128, + "step": 14095 + }, + { + "epoch": 0.9026310735548301, + "grad_norm": 17.146968841552734, + "learning_rate": 5.728766296416876e-08, + "loss": 0.2842, + "num_input_tokens_seen": 44392192, + "step": 14100 + }, + { + "epoch": 0.902951155495807, + "grad_norm": 34.22679901123047, + "learning_rate": 5.6915489958814453e-08, + "loss": 0.4079, + "num_input_tokens_seen": 44407680, + "step": 14105 + }, + { + "epoch": 0.9032712374367838, + "grad_norm": 53.51115798950195, + "learning_rate": 5.654449440554399e-08, + "loss": 0.4093, + "num_input_tokens_seen": 44424384, + "step": 14110 + }, + { + "epoch": 0.9035913193777607, + "grad_norm": 21.632587432861328, + "learning_rate": 5.617467676754972e-08, + "loss": 0.3752, + "num_input_tokens_seen": 44439744, + "step": 14115 + }, + { + "epoch": 0.9039114013187376, + "grad_norm": 57.51222610473633, + "learning_rate": 5.580603750655344e-08, + "loss": 0.3012, + "num_input_tokens_seen": 44454272, + "step": 14120 + }, + { + "epoch": 0.9042314832597145, + "grad_norm": 33.1247444152832, + "learning_rate": 5.543857708280497e-08, + "loss": 0.3578, + "num_input_tokens_seen": 44468992, + "step": 14125 + }, + { + "epoch": 0.9045515652006914, + "grad_norm": 41.706947326660156, + "learning_rate": 5.507229595508367e-08, + "loss": 0.4819, + "num_input_tokens_seen": 44484864, + "step": 14130 + }, + { + "epoch": 0.9048716471416682, + "grad_norm": 14.103269577026367, + "learning_rate": 5.4707194580695504e-08, + "loss": 0.289, + "num_input_tokens_seen": 44499968, + "step": 14135 + }, + { + "epoch": 0.9051917290826451, + "grad_norm": 35.217655181884766, + "learning_rate": 5.4343273415473846e-08, + "loss": 0.4239, + "num_input_tokens_seen": 44517952, + "step": 14140 + }, + { + "epoch": 0.905511811023622, + "grad_norm": 24.536203384399414, + "learning_rate": 5.3980532913778576e-08, + "loss": 0.3421, + "num_input_tokens_seen": 44532928, + "step": 14145 + }, + { + "epoch": 0.905831892964599, + "grad_norm": 32.02094650268555, + "learning_rate": 5.361897352849554e-08, + "loss": 0.3955, + "num_input_tokens_seen": 44548288, + "step": 14150 + }, + { + "epoch": 0.9061519749055759, + "grad_norm": 20.607261657714844, + "learning_rate": 5.325859571103586e-08, + "loss": 0.3331, + "num_input_tokens_seen": 44563712, + "step": 14155 + }, + { + "epoch": 0.9064720568465527, + "grad_norm": 21.235889434814453, + "learning_rate": 5.289939991133508e-08, + "loss": 0.3333, + "num_input_tokens_seen": 44579264, + "step": 14160 + }, + { + "epoch": 0.9067921387875296, + "grad_norm": 12.65000057220459, + "learning_rate": 5.2541386577853895e-08, + "loss": 0.2384, + "num_input_tokens_seen": 44594176, + "step": 14165 + }, + { + "epoch": 0.9071122207285065, + "grad_norm": 16.73200225830078, + "learning_rate": 5.2184556157576e-08, + "loss": 0.2502, + "num_input_tokens_seen": 44609664, + "step": 14170 + }, + { + "epoch": 0.9074323026694834, + "grad_norm": 52.27291488647461, + "learning_rate": 5.1828909096008234e-08, + "loss": 0.3649, + "num_input_tokens_seen": 44626944, + "step": 14175 + }, + { + "epoch": 0.9077523846104603, + "grad_norm": 18.205657958984375, + "learning_rate": 5.14744458371803e-08, + "loss": 0.2331, + "num_input_tokens_seen": 44643520, + "step": 14180 + }, + { + "epoch": 0.9080724665514371, + "grad_norm": 87.80847930908203, + "learning_rate": 5.1121166823643646e-08, + "loss": 0.5075, + "num_input_tokens_seen": 44657984, + "step": 14185 + }, + { + "epoch": 0.908392548492414, + "grad_norm": 28.186279296875, + "learning_rate": 5.076907249647122e-08, + "loss": 0.376, + "num_input_tokens_seen": 44673024, + "step": 14190 + }, + { + "epoch": 0.9087126304333909, + "grad_norm": 25.26058578491211, + "learning_rate": 5.0418163295257055e-08, + "loss": 0.412, + "num_input_tokens_seen": 44687424, + "step": 14195 + }, + { + "epoch": 0.9090327123743679, + "grad_norm": 40.44475555419922, + "learning_rate": 5.006843965811536e-08, + "loss": 0.2867, + "num_input_tokens_seen": 44702976, + "step": 14200 + }, + { + "epoch": 0.9093527943153448, + "grad_norm": 46.02883529663086, + "learning_rate": 4.971990202168008e-08, + "loss": 0.482, + "num_input_tokens_seen": 44718144, + "step": 14205 + }, + { + "epoch": 0.9096728762563216, + "grad_norm": 26.443368911743164, + "learning_rate": 4.9372550821104697e-08, + "loss": 0.3277, + "num_input_tokens_seen": 44734912, + "step": 14210 + }, + { + "epoch": 0.9099929581972985, + "grad_norm": 20.41611671447754, + "learning_rate": 4.902638649006119e-08, + "loss": 0.311, + "num_input_tokens_seen": 44749888, + "step": 14215 + }, + { + "epoch": 0.9103130401382754, + "grad_norm": 19.726547241210938, + "learning_rate": 4.868140946073973e-08, + "loss": 0.3201, + "num_input_tokens_seen": 44764544, + "step": 14220 + }, + { + "epoch": 0.9106331220792523, + "grad_norm": 32.19831848144531, + "learning_rate": 4.833762016384857e-08, + "loss": 0.2995, + "num_input_tokens_seen": 44780992, + "step": 14225 + }, + { + "epoch": 0.9109532040202292, + "grad_norm": 50.0634880065918, + "learning_rate": 4.799501902861214e-08, + "loss": 0.3879, + "num_input_tokens_seen": 44796672, + "step": 14230 + }, + { + "epoch": 0.911273285961206, + "grad_norm": 44.15312957763672, + "learning_rate": 4.765360648277217e-08, + "loss": 0.4313, + "num_input_tokens_seen": 44812224, + "step": 14235 + }, + { + "epoch": 0.9115933679021829, + "grad_norm": 38.931339263916016, + "learning_rate": 4.7313382952586465e-08, + "loss": 0.4254, + "num_input_tokens_seen": 44827136, + "step": 14240 + }, + { + "epoch": 0.9119134498431598, + "grad_norm": 16.312923431396484, + "learning_rate": 4.6974348862828027e-08, + "loss": 0.3787, + "num_input_tokens_seen": 44842176, + "step": 14245 + }, + { + "epoch": 0.9122335317841367, + "grad_norm": 47.28225326538086, + "learning_rate": 4.663650463678448e-08, + "loss": 0.4211, + "num_input_tokens_seen": 44858880, + "step": 14250 + }, + { + "epoch": 0.9125536137251137, + "grad_norm": 21.42548942565918, + "learning_rate": 4.629985069625875e-08, + "loss": 0.4399, + "num_input_tokens_seen": 44875328, + "step": 14255 + }, + { + "epoch": 0.9128736956660906, + "grad_norm": 41.41118240356445, + "learning_rate": 4.596438746156728e-08, + "loss": 0.3625, + "num_input_tokens_seen": 44892032, + "step": 14260 + }, + { + "epoch": 0.9131937776070674, + "grad_norm": 35.68510818481445, + "learning_rate": 4.563011535153949e-08, + "loss": 0.3618, + "num_input_tokens_seen": 44907328, + "step": 14265 + }, + { + "epoch": 0.9135138595480443, + "grad_norm": 26.231754302978516, + "learning_rate": 4.52970347835181e-08, + "loss": 0.2686, + "num_input_tokens_seen": 44922560, + "step": 14270 + }, + { + "epoch": 0.9138339414890212, + "grad_norm": 34.4133186340332, + "learning_rate": 4.496514617335845e-08, + "loss": 0.3256, + "num_input_tokens_seen": 44937728, + "step": 14275 + }, + { + "epoch": 0.9141540234299981, + "grad_norm": 42.511531829833984, + "learning_rate": 4.4634449935427197e-08, + "loss": 0.3568, + "num_input_tokens_seen": 44954560, + "step": 14280 + }, + { + "epoch": 0.914474105370975, + "grad_norm": 28.035154342651367, + "learning_rate": 4.430494648260219e-08, + "loss": 0.3032, + "num_input_tokens_seen": 44971520, + "step": 14285 + }, + { + "epoch": 0.9147941873119518, + "grad_norm": 35.39820098876953, + "learning_rate": 4.397663622627279e-08, + "loss": 0.4391, + "num_input_tokens_seen": 44987392, + "step": 14290 + }, + { + "epoch": 0.9151142692529287, + "grad_norm": 25.651020050048828, + "learning_rate": 4.364951957633789e-08, + "loss": 0.3116, + "num_input_tokens_seen": 45002688, + "step": 14295 + }, + { + "epoch": 0.9154343511939056, + "grad_norm": 29.278078079223633, + "learning_rate": 4.332359694120669e-08, + "loss": 0.2874, + "num_input_tokens_seen": 45017792, + "step": 14300 + }, + { + "epoch": 0.9157544331348826, + "grad_norm": 33.1219482421875, + "learning_rate": 4.299886872779734e-08, + "loss": 0.3561, + "num_input_tokens_seen": 45032640, + "step": 14305 + }, + { + "epoch": 0.9160745150758595, + "grad_norm": 29.479825973510742, + "learning_rate": 4.267533534153678e-08, + "loss": 0.2945, + "num_input_tokens_seen": 45048256, + "step": 14310 + }, + { + "epoch": 0.9163945970168363, + "grad_norm": 26.894004821777344, + "learning_rate": 4.2352997186360316e-08, + "loss": 0.3251, + "num_input_tokens_seen": 45064192, + "step": 14315 + }, + { + "epoch": 0.9167146789578132, + "grad_norm": 19.898136138916016, + "learning_rate": 4.203185466471082e-08, + "loss": 0.321, + "num_input_tokens_seen": 45079488, + "step": 14320 + }, + { + "epoch": 0.9170347608987901, + "grad_norm": 20.337265014648438, + "learning_rate": 4.1711908177538556e-08, + "loss": 0.3791, + "num_input_tokens_seen": 45095616, + "step": 14325 + }, + { + "epoch": 0.917354842839767, + "grad_norm": 45.242088317871094, + "learning_rate": 4.139315812430055e-08, + "loss": 0.3797, + "num_input_tokens_seen": 45110592, + "step": 14330 + }, + { + "epoch": 0.9176749247807439, + "grad_norm": 29.204076766967773, + "learning_rate": 4.1075604902959915e-08, + "loss": 0.3756, + "num_input_tokens_seen": 45127168, + "step": 14335 + }, + { + "epoch": 0.9179950067217207, + "grad_norm": 31.663959503173828, + "learning_rate": 4.07592489099855e-08, + "loss": 0.3157, + "num_input_tokens_seen": 45142208, + "step": 14340 + }, + { + "epoch": 0.9183150886626976, + "grad_norm": 38.191898345947266, + "learning_rate": 4.044409054035147e-08, + "loss": 0.3917, + "num_input_tokens_seen": 45157184, + "step": 14345 + }, + { + "epoch": 0.9186351706036745, + "grad_norm": 15.774205207824707, + "learning_rate": 4.0130130187537195e-08, + "loss": 0.3891, + "num_input_tokens_seen": 45174464, + "step": 14350 + }, + { + "epoch": 0.9189552525446514, + "grad_norm": 36.91510772705078, + "learning_rate": 3.981736824352522e-08, + "loss": 0.3157, + "num_input_tokens_seen": 45188992, + "step": 14355 + }, + { + "epoch": 0.9192753344856284, + "grad_norm": 32.23750305175781, + "learning_rate": 3.950580509880286e-08, + "loss": 0.4661, + "num_input_tokens_seen": 45204032, + "step": 14360 + }, + { + "epoch": 0.9195954164266052, + "grad_norm": 46.32685089111328, + "learning_rate": 3.9195441142360066e-08, + "loss": 0.4012, + "num_input_tokens_seen": 45219328, + "step": 14365 + }, + { + "epoch": 0.9199154983675821, + "grad_norm": 23.546079635620117, + "learning_rate": 3.888627676169043e-08, + "loss": 0.3271, + "num_input_tokens_seen": 45235584, + "step": 14370 + }, + { + "epoch": 0.920235580308559, + "grad_norm": 39.16623306274414, + "learning_rate": 3.857831234278886e-08, + "loss": 0.3709, + "num_input_tokens_seen": 45250880, + "step": 14375 + }, + { + "epoch": 0.9205556622495359, + "grad_norm": 31.843650817871094, + "learning_rate": 3.827154827015255e-08, + "loss": 0.4085, + "num_input_tokens_seen": 45266752, + "step": 14380 + }, + { + "epoch": 0.9208757441905128, + "grad_norm": 12.346802711486816, + "learning_rate": 3.7965984926780383e-08, + "loss": 0.2914, + "num_input_tokens_seen": 45282496, + "step": 14385 + }, + { + "epoch": 0.9211958261314896, + "grad_norm": 41.83573532104492, + "learning_rate": 3.766162269417139e-08, + "loss": 0.3577, + "num_input_tokens_seen": 45297024, + "step": 14390 + }, + { + "epoch": 0.9215159080724665, + "grad_norm": 45.033992767333984, + "learning_rate": 3.73584619523255e-08, + "loss": 0.3693, + "num_input_tokens_seen": 45314176, + "step": 14395 + }, + { + "epoch": 0.9218359900134434, + "grad_norm": 21.012765884399414, + "learning_rate": 3.7056503079742616e-08, + "loss": 0.3557, + "num_input_tokens_seen": 45329344, + "step": 14400 + }, + { + "epoch": 0.9221560719544203, + "grad_norm": 29.65179443359375, + "learning_rate": 3.6755746453421945e-08, + "loss": 0.3428, + "num_input_tokens_seen": 45344384, + "step": 14405 + }, + { + "epoch": 0.9224761538953972, + "grad_norm": 13.857353210449219, + "learning_rate": 3.645619244886145e-08, + "loss": 0.2869, + "num_input_tokens_seen": 45360192, + "step": 14410 + }, + { + "epoch": 0.9227962358363742, + "grad_norm": 14.174830436706543, + "learning_rate": 3.615784144005796e-08, + "loss": 0.3103, + "num_input_tokens_seen": 45376000, + "step": 14415 + }, + { + "epoch": 0.923116317777351, + "grad_norm": 30.094505310058594, + "learning_rate": 3.5860693799506184e-08, + "loss": 0.4093, + "num_input_tokens_seen": 45390400, + "step": 14420 + }, + { + "epoch": 0.9234363997183279, + "grad_norm": 29.435256958007812, + "learning_rate": 3.5564749898198466e-08, + "loss": 0.4518, + "num_input_tokens_seen": 45406976, + "step": 14425 + }, + { + "epoch": 0.9237564816593048, + "grad_norm": 33.67948913574219, + "learning_rate": 3.527001010562425e-08, + "loss": 0.3481, + "num_input_tokens_seen": 45422080, + "step": 14430 + }, + { + "epoch": 0.9240765636002817, + "grad_norm": 52.893489837646484, + "learning_rate": 3.4976474789769504e-08, + "loss": 0.3429, + "num_input_tokens_seen": 45439296, + "step": 14435 + }, + { + "epoch": 0.9243966455412586, + "grad_norm": 34.073848724365234, + "learning_rate": 3.4684144317116636e-08, + "loss": 0.2983, + "num_input_tokens_seen": 45454208, + "step": 14440 + }, + { + "epoch": 0.9247167274822354, + "grad_norm": 18.271291732788086, + "learning_rate": 3.439301905264369e-08, + "loss": 0.3001, + "num_input_tokens_seen": 45470400, + "step": 14445 + }, + { + "epoch": 0.9250368094232123, + "grad_norm": 46.16067123413086, + "learning_rate": 3.410309935982403e-08, + "loss": 0.3212, + "num_input_tokens_seen": 45486528, + "step": 14450 + }, + { + "epoch": 0.9253568913641892, + "grad_norm": 17.307554244995117, + "learning_rate": 3.381438560062555e-08, + "loss": 0.3429, + "num_input_tokens_seen": 45501440, + "step": 14455 + }, + { + "epoch": 0.9256769733051661, + "grad_norm": 38.451210021972656, + "learning_rate": 3.3526878135511025e-08, + "loss": 0.3181, + "num_input_tokens_seen": 45517760, + "step": 14460 + }, + { + "epoch": 0.9259970552461431, + "grad_norm": 48.87675094604492, + "learning_rate": 3.324057732343666e-08, + "loss": 0.3642, + "num_input_tokens_seen": 45533056, + "step": 14465 + }, + { + "epoch": 0.9263171371871199, + "grad_norm": 24.82399559020996, + "learning_rate": 3.295548352185262e-08, + "loss": 0.4131, + "num_input_tokens_seen": 45549248, + "step": 14470 + }, + { + "epoch": 0.9266372191280968, + "grad_norm": 36.503944396972656, + "learning_rate": 3.2671597086701753e-08, + "loss": 0.3477, + "num_input_tokens_seen": 45565760, + "step": 14475 + }, + { + "epoch": 0.9269573010690737, + "grad_norm": 23.015771865844727, + "learning_rate": 3.238891837241964e-08, + "loss": 0.3246, + "num_input_tokens_seen": 45581568, + "step": 14480 + }, + { + "epoch": 0.9272773830100506, + "grad_norm": 43.855220794677734, + "learning_rate": 3.210744773193386e-08, + "loss": 0.4038, + "num_input_tokens_seen": 45596928, + "step": 14485 + }, + { + "epoch": 0.9275974649510275, + "grad_norm": 45.25807189941406, + "learning_rate": 3.182718551666386e-08, + "loss": 0.2948, + "num_input_tokens_seen": 45612800, + "step": 14490 + }, + { + "epoch": 0.9279175468920043, + "grad_norm": 79.2214584350586, + "learning_rate": 3.154813207652063e-08, + "loss": 0.4114, + "num_input_tokens_seen": 45627584, + "step": 14495 + }, + { + "epoch": 0.9282376288329812, + "grad_norm": 48.060794830322266, + "learning_rate": 3.1270287759905143e-08, + "loss": 0.3379, + "num_input_tokens_seen": 45643840, + "step": 14500 + }, + { + "epoch": 0.9285577107739581, + "grad_norm": 15.610395431518555, + "learning_rate": 3.0993652913709476e-08, + "loss": 0.2884, + "num_input_tokens_seen": 45659072, + "step": 14505 + }, + { + "epoch": 0.928877792714935, + "grad_norm": 27.879131317138672, + "learning_rate": 3.0718227883315796e-08, + "loss": 0.482, + "num_input_tokens_seen": 45675328, + "step": 14510 + }, + { + "epoch": 0.9291978746559119, + "grad_norm": 39.35497283935547, + "learning_rate": 3.044401301259503e-08, + "loss": 0.368, + "num_input_tokens_seen": 45690816, + "step": 14515 + }, + { + "epoch": 0.9295179565968889, + "grad_norm": 15.0499267578125, + "learning_rate": 3.017100864390787e-08, + "loss": 0.3333, + "num_input_tokens_seen": 45706432, + "step": 14520 + }, + { + "epoch": 0.9298380385378657, + "grad_norm": 51.364315032958984, + "learning_rate": 2.9899215118103446e-08, + "loss": 0.3446, + "num_input_tokens_seen": 45721920, + "step": 14525 + }, + { + "epoch": 0.9301581204788426, + "grad_norm": 15.155922889709473, + "learning_rate": 2.9628632774519435e-08, + "loss": 0.3433, + "num_input_tokens_seen": 45738048, + "step": 14530 + }, + { + "epoch": 0.9304782024198195, + "grad_norm": 24.992616653442383, + "learning_rate": 2.9359261950980485e-08, + "loss": 0.3308, + "num_input_tokens_seen": 45753856, + "step": 14535 + }, + { + "epoch": 0.9307982843607964, + "grad_norm": 22.78838539123535, + "learning_rate": 2.90911029837998e-08, + "loss": 0.3015, + "num_input_tokens_seen": 45768704, + "step": 14540 + }, + { + "epoch": 0.9311183663017732, + "grad_norm": 28.63710594177246, + "learning_rate": 2.8824156207776673e-08, + "loss": 0.2789, + "num_input_tokens_seen": 45783936, + "step": 14545 + }, + { + "epoch": 0.9314384482427501, + "grad_norm": 115.81269836425781, + "learning_rate": 2.8558421956197397e-08, + "loss": 0.4514, + "num_input_tokens_seen": 45800320, + "step": 14550 + }, + { + "epoch": 0.931758530183727, + "grad_norm": 36.78664779663086, + "learning_rate": 2.829390056083436e-08, + "loss": 0.3864, + "num_input_tokens_seen": 45816512, + "step": 14555 + }, + { + "epoch": 0.9320786121247039, + "grad_norm": 21.332889556884766, + "learning_rate": 2.8030592351945492e-08, + "loss": 0.3037, + "num_input_tokens_seen": 45831936, + "step": 14560 + }, + { + "epoch": 0.9323986940656808, + "grad_norm": 20.547264099121094, + "learning_rate": 2.776849765827427e-08, + "loss": 0.2968, + "num_input_tokens_seen": 45846784, + "step": 14565 + }, + { + "epoch": 0.9327187760066578, + "grad_norm": 39.512290954589844, + "learning_rate": 2.750761680704905e-08, + "loss": 0.4282, + "num_input_tokens_seen": 45862080, + "step": 14570 + }, + { + "epoch": 0.9330388579476346, + "grad_norm": 40.28529357910156, + "learning_rate": 2.724795012398251e-08, + "loss": 0.3937, + "num_input_tokens_seen": 45878528, + "step": 14575 + }, + { + "epoch": 0.9333589398886115, + "grad_norm": 36.721534729003906, + "learning_rate": 2.6989497933271543e-08, + "loss": 0.3737, + "num_input_tokens_seen": 45894016, + "step": 14580 + }, + { + "epoch": 0.9336790218295884, + "grad_norm": 18.749881744384766, + "learning_rate": 2.673226055759692e-08, + "loss": 0.3295, + "num_input_tokens_seen": 45909504, + "step": 14585 + }, + { + "epoch": 0.9339991037705653, + "grad_norm": 31.62596321105957, + "learning_rate": 2.6476238318122402e-08, + "loss": 0.338, + "num_input_tokens_seen": 45925376, + "step": 14590 + }, + { + "epoch": 0.9343191857115422, + "grad_norm": 33.345306396484375, + "learning_rate": 2.6221431534494742e-08, + "loss": 0.3956, + "num_input_tokens_seen": 45940224, + "step": 14595 + }, + { + "epoch": 0.934639267652519, + "grad_norm": 57.66178894042969, + "learning_rate": 2.5967840524843243e-08, + "loss": 0.3521, + "num_input_tokens_seen": 45955072, + "step": 14600 + }, + { + "epoch": 0.9349593495934959, + "grad_norm": 33.97639846801758, + "learning_rate": 2.5715465605779195e-08, + "loss": 0.4287, + "num_input_tokens_seen": 45970240, + "step": 14605 + }, + { + "epoch": 0.9352794315344728, + "grad_norm": 83.71870422363281, + "learning_rate": 2.5464307092395777e-08, + "loss": 0.406, + "num_input_tokens_seen": 45985856, + "step": 14610 + }, + { + "epoch": 0.9355995134754497, + "grad_norm": 20.36864471435547, + "learning_rate": 2.5214365298267148e-08, + "loss": 0.3398, + "num_input_tokens_seen": 46000256, + "step": 14615 + }, + { + "epoch": 0.9359195954164266, + "grad_norm": 26.265127182006836, + "learning_rate": 2.4965640535448917e-08, + "loss": 0.32, + "num_input_tokens_seen": 46015616, + "step": 14620 + }, + { + "epoch": 0.9362396773574035, + "grad_norm": 32.42552185058594, + "learning_rate": 2.471813311447657e-08, + "loss": 0.3741, + "num_input_tokens_seen": 46031040, + "step": 14625 + }, + { + "epoch": 0.9365597592983804, + "grad_norm": 37.86249542236328, + "learning_rate": 2.4471843344365915e-08, + "loss": 0.3304, + "num_input_tokens_seen": 46046016, + "step": 14630 + }, + { + "epoch": 0.9368798412393573, + "grad_norm": 17.967323303222656, + "learning_rate": 2.42267715326131e-08, + "loss": 0.2715, + "num_input_tokens_seen": 46062528, + "step": 14635 + }, + { + "epoch": 0.9371999231803342, + "grad_norm": 31.25685691833496, + "learning_rate": 2.3982917985192697e-08, + "loss": 0.3426, + "num_input_tokens_seen": 46078144, + "step": 14640 + }, + { + "epoch": 0.9375200051213111, + "grad_norm": 53.25637435913086, + "learning_rate": 2.3740283006558838e-08, + "loss": 0.3748, + "num_input_tokens_seen": 46096896, + "step": 14645 + }, + { + "epoch": 0.9378400870622879, + "grad_norm": 47.64904022216797, + "learning_rate": 2.349886689964431e-08, + "loss": 0.3715, + "num_input_tokens_seen": 46111808, + "step": 14650 + }, + { + "epoch": 0.9381601690032648, + "grad_norm": 36.294498443603516, + "learning_rate": 2.32586699658599e-08, + "loss": 0.2804, + "num_input_tokens_seen": 46127936, + "step": 14655 + }, + { + "epoch": 0.9384802509442417, + "grad_norm": 22.60685920715332, + "learning_rate": 2.3019692505094056e-08, + "loss": 0.3522, + "num_input_tokens_seen": 46142848, + "step": 14660 + }, + { + "epoch": 0.9388003328852186, + "grad_norm": 51.22877502441406, + "learning_rate": 2.2781934815713223e-08, + "loss": 0.5364, + "num_input_tokens_seen": 46158848, + "step": 14665 + }, + { + "epoch": 0.9391204148261955, + "grad_norm": 28.425065994262695, + "learning_rate": 2.254539719456061e-08, + "loss": 0.3566, + "num_input_tokens_seen": 46174912, + "step": 14670 + }, + { + "epoch": 0.9394404967671725, + "grad_norm": 19.683509826660156, + "learning_rate": 2.231007993695633e-08, + "loss": 0.2587, + "num_input_tokens_seen": 46189248, + "step": 14675 + }, + { + "epoch": 0.9397605787081493, + "grad_norm": 19.60419273376465, + "learning_rate": 2.2075983336696357e-08, + "loss": 0.314, + "num_input_tokens_seen": 46204928, + "step": 14680 + }, + { + "epoch": 0.9400806606491262, + "grad_norm": 40.57781982421875, + "learning_rate": 2.1843107686053353e-08, + "loss": 0.3916, + "num_input_tokens_seen": 46220160, + "step": 14685 + }, + { + "epoch": 0.9404007425901031, + "grad_norm": 24.233959197998047, + "learning_rate": 2.1611453275775405e-08, + "loss": 0.4249, + "num_input_tokens_seen": 46235584, + "step": 14690 + }, + { + "epoch": 0.94072082453108, + "grad_norm": 20.722745895385742, + "learning_rate": 2.138102039508538e-08, + "loss": 0.2691, + "num_input_tokens_seen": 46251904, + "step": 14695 + }, + { + "epoch": 0.9410409064720568, + "grad_norm": 43.360191345214844, + "learning_rate": 2.1151809331681703e-08, + "loss": 0.3948, + "num_input_tokens_seen": 46268032, + "step": 14700 + }, + { + "epoch": 0.9413609884130337, + "grad_norm": 54.16123962402344, + "learning_rate": 2.092382037173701e-08, + "loss": 0.3362, + "num_input_tokens_seen": 46283392, + "step": 14705 + }, + { + "epoch": 0.9416810703540106, + "grad_norm": 26.91010856628418, + "learning_rate": 2.0697053799898277e-08, + "loss": 0.2966, + "num_input_tokens_seen": 46298752, + "step": 14710 + }, + { + "epoch": 0.9420011522949875, + "grad_norm": 29.30316734313965, + "learning_rate": 2.0471509899286144e-08, + "loss": 0.3392, + "num_input_tokens_seen": 46314624, + "step": 14715 + }, + { + "epoch": 0.9423212342359644, + "grad_norm": 25.85833740234375, + "learning_rate": 2.0247188951494797e-08, + "loss": 0.3403, + "num_input_tokens_seen": 46331712, + "step": 14720 + }, + { + "epoch": 0.9426413161769412, + "grad_norm": 49.84812927246094, + "learning_rate": 2.0024091236591655e-08, + "loss": 0.5398, + "num_input_tokens_seen": 46347200, + "step": 14725 + }, + { + "epoch": 0.9429613981179182, + "grad_norm": 17.558185577392578, + "learning_rate": 1.98022170331168e-08, + "loss": 0.3166, + "num_input_tokens_seen": 46363008, + "step": 14730 + }, + { + "epoch": 0.9432814800588951, + "grad_norm": 32.16617202758789, + "learning_rate": 1.9581566618082744e-08, + "loss": 0.3797, + "num_input_tokens_seen": 46378816, + "step": 14735 + }, + { + "epoch": 0.943601561999872, + "grad_norm": 57.684410095214844, + "learning_rate": 1.9362140266974025e-08, + "loss": 0.3915, + "num_input_tokens_seen": 46395200, + "step": 14740 + }, + { + "epoch": 0.9439216439408489, + "grad_norm": 53.940555572509766, + "learning_rate": 1.9143938253747383e-08, + "loss": 0.3198, + "num_input_tokens_seen": 46411840, + "step": 14745 + }, + { + "epoch": 0.9442417258818258, + "grad_norm": 25.7904109954834, + "learning_rate": 1.892696085083023e-08, + "loss": 0.4515, + "num_input_tokens_seen": 46427776, + "step": 14750 + }, + { + "epoch": 0.9445618078228026, + "grad_norm": 36.919376373291016, + "learning_rate": 1.8711208329121542e-08, + "loss": 0.3118, + "num_input_tokens_seen": 46444736, + "step": 14755 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 26.23403549194336, + "learning_rate": 1.849668095799084e-08, + "loss": 0.3325, + "num_input_tokens_seen": 46460672, + "step": 14760 + }, + { + "epoch": 0.9452019717047564, + "grad_norm": 24.87689781188965, + "learning_rate": 1.8283379005278098e-08, + "loss": 0.3344, + "num_input_tokens_seen": 46476736, + "step": 14765 + }, + { + "epoch": 0.9455220536457333, + "grad_norm": 13.15492057800293, + "learning_rate": 1.807130273729329e-08, + "loss": 0.3231, + "num_input_tokens_seen": 46492416, + "step": 14770 + }, + { + "epoch": 0.9458421355867102, + "grad_norm": 36.111331939697266, + "learning_rate": 1.7860452418816173e-08, + "loss": 0.3349, + "num_input_tokens_seen": 46507264, + "step": 14775 + }, + { + "epoch": 0.946162217527687, + "grad_norm": 28.380617141723633, + "learning_rate": 1.7650828313095834e-08, + "loss": 0.3288, + "num_input_tokens_seen": 46524224, + "step": 14780 + }, + { + "epoch": 0.946482299468664, + "grad_norm": 14.132955551147461, + "learning_rate": 1.7442430681850362e-08, + "loss": 0.3101, + "num_input_tokens_seen": 46539456, + "step": 14785 + }, + { + "epoch": 0.9468023814096409, + "grad_norm": 38.144737243652344, + "learning_rate": 1.723525978526652e-08, + "loss": 0.4302, + "num_input_tokens_seen": 46555136, + "step": 14790 + }, + { + "epoch": 0.9471224633506178, + "grad_norm": 27.17024040222168, + "learning_rate": 1.702931588199996e-08, + "loss": 0.3501, + "num_input_tokens_seen": 46570432, + "step": 14795 + }, + { + "epoch": 0.9474425452915947, + "grad_norm": 30.944738388061523, + "learning_rate": 1.6824599229173897e-08, + "loss": 0.3115, + "num_input_tokens_seen": 46586304, + "step": 14800 + }, + { + "epoch": 0.9477626272325715, + "grad_norm": 33.253997802734375, + "learning_rate": 1.662111008237932e-08, + "loss": 0.2909, + "num_input_tokens_seen": 46602432, + "step": 14805 + }, + { + "epoch": 0.9480827091735484, + "grad_norm": 33.023921966552734, + "learning_rate": 1.6418848695675003e-08, + "loss": 0.3218, + "num_input_tokens_seen": 46617472, + "step": 14810 + }, + { + "epoch": 0.9484027911145253, + "grad_norm": 35.12213897705078, + "learning_rate": 1.6217815321586614e-08, + "loss": 0.372, + "num_input_tokens_seen": 46632896, + "step": 14815 + }, + { + "epoch": 0.9487228730555022, + "grad_norm": 18.142263412475586, + "learning_rate": 1.6018010211106602e-08, + "loss": 0.355, + "num_input_tokens_seen": 46649408, + "step": 14820 + }, + { + "epoch": 0.9490429549964791, + "grad_norm": 16.464832305908203, + "learning_rate": 1.58194336136942e-08, + "loss": 0.2816, + "num_input_tokens_seen": 46665344, + "step": 14825 + }, + { + "epoch": 0.9493630369374559, + "grad_norm": 36.46229934692383, + "learning_rate": 1.5622085777274417e-08, + "loss": 0.4274, + "num_input_tokens_seen": 46680704, + "step": 14830 + }, + { + "epoch": 0.9496831188784329, + "grad_norm": 39.555789947509766, + "learning_rate": 1.542596694823839e-08, + "loss": 0.3333, + "num_input_tokens_seen": 46695936, + "step": 14835 + }, + { + "epoch": 0.9500032008194098, + "grad_norm": 54.21735382080078, + "learning_rate": 1.5231077371442914e-08, + "loss": 0.4259, + "num_input_tokens_seen": 46711680, + "step": 14840 + }, + { + "epoch": 0.9503232827603867, + "grad_norm": 24.265138626098633, + "learning_rate": 1.5037417290209685e-08, + "loss": 0.2888, + "num_input_tokens_seen": 46727040, + "step": 14845 + }, + { + "epoch": 0.9506433647013636, + "grad_norm": 37.78664779663086, + "learning_rate": 1.4844986946325743e-08, + "loss": 0.393, + "num_input_tokens_seen": 46742720, + "step": 14850 + }, + { + "epoch": 0.9509634466423404, + "grad_norm": 23.887489318847656, + "learning_rate": 1.4653786580042681e-08, + "loss": 0.2502, + "num_input_tokens_seen": 46758336, + "step": 14855 + }, + { + "epoch": 0.9511554958069266, + "eval_loss": 0.3537425398826599, + "eval_runtime": 49.1421, + "eval_samples_per_second": 282.568, + "eval_steps_per_second": 35.326, + "num_input_tokens_seen": 46767552, + "step": 14858 + }, + { + "epoch": 0.9512835285833173, + "grad_norm": 22.978870391845703, + "learning_rate": 1.4463816430076215e-08, + "loss": 0.3108, + "num_input_tokens_seen": 46773312, + "step": 14860 + }, + { + "epoch": 0.9516036105242942, + "grad_norm": 39.241058349609375, + "learning_rate": 1.4275076733606395e-08, + "loss": 0.3685, + "num_input_tokens_seen": 46787968, + "step": 14865 + }, + { + "epoch": 0.9519236924652711, + "grad_norm": 24.853103637695312, + "learning_rate": 1.4087567726277061e-08, + "loss": 0.2913, + "num_input_tokens_seen": 46803712, + "step": 14870 + }, + { + "epoch": 0.952243774406248, + "grad_norm": 28.337535858154297, + "learning_rate": 1.390128964219528e-08, + "loss": 0.2789, + "num_input_tokens_seen": 46820288, + "step": 14875 + }, + { + "epoch": 0.9525638563472248, + "grad_norm": 45.00613784790039, + "learning_rate": 1.3716242713931348e-08, + "loss": 0.3819, + "num_input_tokens_seen": 46835904, + "step": 14880 + }, + { + "epoch": 0.9528839382882017, + "grad_norm": 27.987937927246094, + "learning_rate": 1.3532427172518789e-08, + "loss": 0.3714, + "num_input_tokens_seen": 46851136, + "step": 14885 + }, + { + "epoch": 0.9532040202291787, + "grad_norm": 34.979331970214844, + "learning_rate": 1.3349843247453252e-08, + "loss": 0.3343, + "num_input_tokens_seen": 46867456, + "step": 14890 + }, + { + "epoch": 0.9535241021701556, + "grad_norm": 26.81144905090332, + "learning_rate": 1.3168491166692941e-08, + "loss": 0.2772, + "num_input_tokens_seen": 46882816, + "step": 14895 + }, + { + "epoch": 0.9538441841111325, + "grad_norm": 40.77924728393555, + "learning_rate": 1.2988371156658073e-08, + "loss": 0.4506, + "num_input_tokens_seen": 46898624, + "step": 14900 + }, + { + "epoch": 0.9541642660521094, + "grad_norm": 28.05156135559082, + "learning_rate": 1.2809483442230763e-08, + "loss": 0.282, + "num_input_tokens_seen": 46914304, + "step": 14905 + }, + { + "epoch": 0.9544843479930862, + "grad_norm": 21.98477554321289, + "learning_rate": 1.2631828246754128e-08, + "loss": 0.3705, + "num_input_tokens_seen": 46930368, + "step": 14910 + }, + { + "epoch": 0.9548044299340631, + "grad_norm": 38.7076301574707, + "learning_rate": 1.2455405792032969e-08, + "loss": 0.364, + "num_input_tokens_seen": 46945792, + "step": 14915 + }, + { + "epoch": 0.95512451187504, + "grad_norm": 32.54359817504883, + "learning_rate": 1.2280216298332646e-08, + "loss": 0.342, + "num_input_tokens_seen": 46962048, + "step": 14920 + }, + { + "epoch": 0.9554445938160169, + "grad_norm": 53.13780212402344, + "learning_rate": 1.2106259984379642e-08, + "loss": 0.4603, + "num_input_tokens_seen": 46976768, + "step": 14925 + }, + { + "epoch": 0.9557646757569938, + "grad_norm": 45.00946807861328, + "learning_rate": 1.1933537067359889e-08, + "loss": 0.4141, + "num_input_tokens_seen": 46991424, + "step": 14930 + }, + { + "epoch": 0.9560847576979706, + "grad_norm": 24.874343872070312, + "learning_rate": 1.1762047762920446e-08, + "loss": 0.3607, + "num_input_tokens_seen": 47006656, + "step": 14935 + }, + { + "epoch": 0.9564048396389476, + "grad_norm": 51.970680236816406, + "learning_rate": 1.1591792285167602e-08, + "loss": 0.3576, + "num_input_tokens_seen": 47021824, + "step": 14940 + }, + { + "epoch": 0.9567249215799245, + "grad_norm": 29.96383285522461, + "learning_rate": 1.1422770846667206e-08, + "loss": 0.3907, + "num_input_tokens_seen": 47037440, + "step": 14945 + }, + { + "epoch": 0.9570450035209014, + "grad_norm": 19.72380256652832, + "learning_rate": 1.1254983658444572e-08, + "loss": 0.307, + "num_input_tokens_seen": 47053760, + "step": 14950 + }, + { + "epoch": 0.9573650854618783, + "grad_norm": 46.794639587402344, + "learning_rate": 1.1088430929984017e-08, + "loss": 0.3148, + "num_input_tokens_seen": 47068928, + "step": 14955 + }, + { + "epoch": 0.9576851674028551, + "grad_norm": 37.3883056640625, + "learning_rate": 1.0923112869228645e-08, + "loss": 0.383, + "num_input_tokens_seen": 47084672, + "step": 14960 + }, + { + "epoch": 0.958005249343832, + "grad_norm": 41.08680725097656, + "learning_rate": 1.0759029682579801e-08, + "loss": 0.3613, + "num_input_tokens_seen": 47101632, + "step": 14965 + }, + { + "epoch": 0.9583253312848089, + "grad_norm": 24.6757755279541, + "learning_rate": 1.0596181574897389e-08, + "loss": 0.306, + "num_input_tokens_seen": 47116480, + "step": 14970 + }, + { + "epoch": 0.9586454132257858, + "grad_norm": 29.715951919555664, + "learning_rate": 1.0434568749499107e-08, + "loss": 0.3155, + "num_input_tokens_seen": 47132992, + "step": 14975 + }, + { + "epoch": 0.9589654951667627, + "grad_norm": 26.07288932800293, + "learning_rate": 1.027419140816066e-08, + "loss": 0.3061, + "num_input_tokens_seen": 47149056, + "step": 14980 + }, + { + "epoch": 0.9592855771077395, + "grad_norm": 23.639156341552734, + "learning_rate": 1.0115049751114768e-08, + "loss": 0.2984, + "num_input_tokens_seen": 47164864, + "step": 14985 + }, + { + "epoch": 0.9596056590487164, + "grad_norm": 18.913105010986328, + "learning_rate": 9.957143977051941e-09, + "loss": 0.3481, + "num_input_tokens_seen": 47180544, + "step": 14990 + }, + { + "epoch": 0.9599257409896934, + "grad_norm": 29.4930362701416, + "learning_rate": 9.800474283119142e-09, + "loss": 0.3836, + "num_input_tokens_seen": 47196608, + "step": 14995 + }, + { + "epoch": 0.9602458229306703, + "grad_norm": 26.606163024902344, + "learning_rate": 9.645040864920462e-09, + "loss": 0.3701, + "num_input_tokens_seen": 47213504, + "step": 15000 + }, + { + "epoch": 0.9605659048716472, + "grad_norm": 32.366455078125, + "learning_rate": 9.490843916516334e-09, + "loss": 0.4056, + "num_input_tokens_seen": 47228288, + "step": 15005 + }, + { + "epoch": 0.960885986812624, + "grad_norm": 25.494123458862305, + "learning_rate": 9.337883630423316e-09, + "loss": 0.4448, + "num_input_tokens_seen": 47243712, + "step": 15010 + }, + { + "epoch": 0.9612060687536009, + "grad_norm": 50.839359283447266, + "learning_rate": 9.186160197614423e-09, + "loss": 0.4909, + "num_input_tokens_seen": 47259904, + "step": 15015 + }, + { + "epoch": 0.9615261506945778, + "grad_norm": 33.710933685302734, + "learning_rate": 9.035673807517795e-09, + "loss": 0.4837, + "num_input_tokens_seen": 47275072, + "step": 15020 + }, + { + "epoch": 0.9618462326355547, + "grad_norm": 42.61496353149414, + "learning_rate": 8.886424648017698e-09, + "loss": 0.27, + "num_input_tokens_seen": 47290688, + "step": 15025 + }, + { + "epoch": 0.9621663145765316, + "grad_norm": 18.92186737060547, + "learning_rate": 8.738412905453408e-09, + "loss": 0.3408, + "num_input_tokens_seen": 47306496, + "step": 15030 + }, + { + "epoch": 0.9624863965175084, + "grad_norm": 29.760217666625977, + "learning_rate": 8.591638764619324e-09, + "loss": 0.3575, + "num_input_tokens_seen": 47321280, + "step": 15035 + }, + { + "epoch": 0.9628064784584853, + "grad_norm": 45.232330322265625, + "learning_rate": 8.446102408764643e-09, + "loss": 0.3623, + "num_input_tokens_seen": 47337536, + "step": 15040 + }, + { + "epoch": 0.9631265603994623, + "grad_norm": 38.70942687988281, + "learning_rate": 8.301804019593129e-09, + "loss": 0.273, + "num_input_tokens_seen": 47353024, + "step": 15045 + }, + { + "epoch": 0.9634466423404392, + "grad_norm": 31.57654571533203, + "learning_rate": 8.158743777263333e-09, + "loss": 0.3535, + "num_input_tokens_seen": 47369088, + "step": 15050 + }, + { + "epoch": 0.9637667242814161, + "grad_norm": 26.071718215942383, + "learning_rate": 8.016921860387272e-09, + "loss": 0.3678, + "num_input_tokens_seen": 47384320, + "step": 15055 + }, + { + "epoch": 0.964086806222393, + "grad_norm": 28.67797088623047, + "learning_rate": 7.876338446031416e-09, + "loss": 0.3908, + "num_input_tokens_seen": 47400896, + "step": 15060 + }, + { + "epoch": 0.9644068881633698, + "grad_norm": 44.70686340332031, + "learning_rate": 7.736993709716033e-09, + "loss": 0.3169, + "num_input_tokens_seen": 47416896, + "step": 15065 + }, + { + "epoch": 0.9647269701043467, + "grad_norm": 49.233890533447266, + "learning_rate": 7.59888782541418e-09, + "loss": 0.4783, + "num_input_tokens_seen": 47432320, + "step": 15070 + }, + { + "epoch": 0.9650470520453236, + "grad_norm": 16.93093490600586, + "learning_rate": 7.462020965553151e-09, + "loss": 0.2656, + "num_input_tokens_seen": 47448320, + "step": 15075 + }, + { + "epoch": 0.9653671339863005, + "grad_norm": 17.901084899902344, + "learning_rate": 7.32639330101259e-09, + "loss": 0.49, + "num_input_tokens_seen": 47463488, + "step": 15080 + }, + { + "epoch": 0.9656872159272774, + "grad_norm": 51.073936462402344, + "learning_rate": 7.1920050011252675e-09, + "loss": 0.3886, + "num_input_tokens_seen": 47479104, + "step": 15085 + }, + { + "epoch": 0.9660072978682542, + "grad_norm": 37.59046173095703, + "learning_rate": 7.058856233676525e-09, + "loss": 0.391, + "num_input_tokens_seen": 47496448, + "step": 15090 + }, + { + "epoch": 0.9663273798092311, + "grad_norm": 86.30872344970703, + "learning_rate": 6.926947164904162e-09, + "loss": 0.3733, + "num_input_tokens_seen": 47511936, + "step": 15095 + }, + { + "epoch": 0.9666474617502081, + "grad_norm": 26.688161849975586, + "learning_rate": 6.796277959498331e-09, + "loss": 0.3984, + "num_input_tokens_seen": 47528320, + "step": 15100 + }, + { + "epoch": 0.966967543691185, + "grad_norm": 26.294218063354492, + "learning_rate": 6.666848780600864e-09, + "loss": 0.2793, + "num_input_tokens_seen": 47543296, + "step": 15105 + }, + { + "epoch": 0.9672876256321619, + "grad_norm": 10.18204116821289, + "learning_rate": 6.538659789805834e-09, + "loss": 0.2751, + "num_input_tokens_seen": 47558656, + "step": 15110 + }, + { + "epoch": 0.9676077075731387, + "grad_norm": 34.290340423583984, + "learning_rate": 6.411711147158438e-09, + "loss": 0.3498, + "num_input_tokens_seen": 47574720, + "step": 15115 + }, + { + "epoch": 0.9679277895141156, + "grad_norm": 52.94532012939453, + "learning_rate": 6.286003011155783e-09, + "loss": 0.3107, + "num_input_tokens_seen": 47590272, + "step": 15120 + }, + { + "epoch": 0.9682478714550925, + "grad_norm": 32.81538772583008, + "learning_rate": 6.161535538745877e-09, + "loss": 0.4098, + "num_input_tokens_seen": 47605696, + "step": 15125 + }, + { + "epoch": 0.9685679533960694, + "grad_norm": 32.042781829833984, + "learning_rate": 6.0383088853277475e-09, + "loss": 0.3975, + "num_input_tokens_seen": 47621760, + "step": 15130 + }, + { + "epoch": 0.9688880353370463, + "grad_norm": 24.502296447753906, + "learning_rate": 5.916323204751439e-09, + "loss": 0.3081, + "num_input_tokens_seen": 47639296, + "step": 15135 + }, + { + "epoch": 0.9692081172780231, + "grad_norm": 27.488826751708984, + "learning_rate": 5.795578649317345e-09, + "loss": 0.2648, + "num_input_tokens_seen": 47654656, + "step": 15140 + }, + { + "epoch": 0.969528199219, + "grad_norm": 44.00014877319336, + "learning_rate": 5.676075369776656e-09, + "loss": 0.3157, + "num_input_tokens_seen": 47671168, + "step": 15145 + }, + { + "epoch": 0.9698482811599769, + "grad_norm": 23.902742385864258, + "learning_rate": 5.557813515330468e-09, + "loss": 0.3348, + "num_input_tokens_seen": 47686400, + "step": 15150 + }, + { + "epoch": 0.9701683631009539, + "grad_norm": 28.53948211669922, + "learning_rate": 5.440793233630115e-09, + "loss": 0.3439, + "num_input_tokens_seen": 47701760, + "step": 15155 + }, + { + "epoch": 0.9704884450419308, + "grad_norm": 40.30237579345703, + "learning_rate": 5.325014670776951e-09, + "loss": 0.3063, + "num_input_tokens_seen": 47717248, + "step": 15160 + }, + { + "epoch": 0.9708085269829076, + "grad_norm": 60.948604583740234, + "learning_rate": 5.21047797132157e-09, + "loss": 0.3599, + "num_input_tokens_seen": 47734336, + "step": 15165 + }, + { + "epoch": 0.9711286089238845, + "grad_norm": 25.381938934326172, + "learning_rate": 5.097183278264694e-09, + "loss": 0.3417, + "num_input_tokens_seen": 47750464, + "step": 15170 + }, + { + "epoch": 0.9714486908648614, + "grad_norm": 25.686281204223633, + "learning_rate": 4.985130733055954e-09, + "loss": 0.4364, + "num_input_tokens_seen": 47765824, + "step": 15175 + }, + { + "epoch": 0.9717687728058383, + "grad_norm": 27.45149803161621, + "learning_rate": 4.874320475594107e-09, + "loss": 0.3893, + "num_input_tokens_seen": 47781760, + "step": 15180 + }, + { + "epoch": 0.9720888547468152, + "grad_norm": 17.62384605407715, + "learning_rate": 4.764752644227377e-09, + "loss": 0.2832, + "num_input_tokens_seen": 47797312, + "step": 15185 + }, + { + "epoch": 0.972408936687792, + "grad_norm": 29.088834762573242, + "learning_rate": 4.656427375752336e-09, + "loss": 0.3392, + "num_input_tokens_seen": 47813440, + "step": 15190 + }, + { + "epoch": 0.9727290186287689, + "grad_norm": 33.35861587524414, + "learning_rate": 4.549344805414246e-09, + "loss": 0.34, + "num_input_tokens_seen": 47829440, + "step": 15195 + }, + { + "epoch": 0.9730491005697458, + "grad_norm": 32.597530364990234, + "learning_rate": 4.443505066907049e-09, + "loss": 0.4139, + "num_input_tokens_seen": 47844608, + "step": 15200 + }, + { + "epoch": 0.9733691825107228, + "grad_norm": 28.545236587524414, + "learning_rate": 4.338908292372934e-09, + "loss": 0.2823, + "num_input_tokens_seen": 47860160, + "step": 15205 + }, + { + "epoch": 0.9736892644516997, + "grad_norm": 42.930023193359375, + "learning_rate": 4.235554612402214e-09, + "loss": 0.3864, + "num_input_tokens_seen": 47875648, + "step": 15210 + }, + { + "epoch": 0.9740093463926766, + "grad_norm": 48.120704650878906, + "learning_rate": 4.133444156033006e-09, + "loss": 0.381, + "num_input_tokens_seen": 47892736, + "step": 15215 + }, + { + "epoch": 0.9743294283336534, + "grad_norm": 37.2425422668457, + "learning_rate": 4.032577050751551e-09, + "loss": 0.3145, + "num_input_tokens_seen": 47908992, + "step": 15220 + }, + { + "epoch": 0.9746495102746303, + "grad_norm": 23.053668975830078, + "learning_rate": 3.932953422491669e-09, + "loss": 0.3428, + "num_input_tokens_seen": 47924736, + "step": 15225 + }, + { + "epoch": 0.9749695922156072, + "grad_norm": 52.20282745361328, + "learning_rate": 3.8345733956345326e-09, + "loss": 0.284, + "num_input_tokens_seen": 47941056, + "step": 15230 + }, + { + "epoch": 0.9752896741565841, + "grad_norm": 29.915189743041992, + "learning_rate": 3.737437093008777e-09, + "loss": 0.3619, + "num_input_tokens_seen": 47957824, + "step": 15235 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 42.0181770324707, + "learning_rate": 3.641544635890281e-09, + "loss": 0.4107, + "num_input_tokens_seen": 47973056, + "step": 15240 + }, + { + "epoch": 0.9759298380385378, + "grad_norm": 18.199411392211914, + "learning_rate": 3.546896144001832e-09, + "loss": 0.3896, + "num_input_tokens_seen": 47988928, + "step": 15245 + }, + { + "epoch": 0.9762499199795147, + "grad_norm": 47.75886917114258, + "learning_rate": 3.4534917355132364e-09, + "loss": 0.3926, + "num_input_tokens_seen": 48004032, + "step": 15250 + }, + { + "epoch": 0.9765700019204916, + "grad_norm": 35.261905670166016, + "learning_rate": 3.361331527040878e-09, + "loss": 0.4376, + "num_input_tokens_seen": 48020800, + "step": 15255 + }, + { + "epoch": 0.9768900838614686, + "grad_norm": 31.275798797607422, + "learning_rate": 3.270415633647938e-09, + "loss": 0.3935, + "num_input_tokens_seen": 48036800, + "step": 15260 + }, + { + "epoch": 0.9772101658024455, + "grad_norm": 22.784738540649414, + "learning_rate": 3.180744168843952e-09, + "loss": 0.2847, + "num_input_tokens_seen": 48051264, + "step": 15265 + }, + { + "epoch": 0.9775302477434223, + "grad_norm": 27.314804077148438, + "learning_rate": 3.0923172445849187e-09, + "loss": 0.2318, + "num_input_tokens_seen": 48066176, + "step": 15270 + }, + { + "epoch": 0.9778503296843992, + "grad_norm": 34.85258865356445, + "learning_rate": 3.0051349712727493e-09, + "loss": 0.3178, + "num_input_tokens_seen": 48081984, + "step": 15275 + }, + { + "epoch": 0.9781704116253761, + "grad_norm": 27.141429901123047, + "learning_rate": 2.9191974577555954e-09, + "loss": 0.4072, + "num_input_tokens_seen": 48096896, + "step": 15280 + }, + { + "epoch": 0.978490493566353, + "grad_norm": 18.883970260620117, + "learning_rate": 2.8345048113274096e-09, + "loss": 0.2334, + "num_input_tokens_seen": 48112128, + "step": 15285 + }, + { + "epoch": 0.9788105755073299, + "grad_norm": 32.112449645996094, + "learning_rate": 2.751057137727941e-09, + "loss": 0.3388, + "num_input_tokens_seen": 48127616, + "step": 15290 + }, + { + "epoch": 0.9791306574483067, + "grad_norm": 59.22599411010742, + "learning_rate": 2.66885454114274e-09, + "loss": 0.384, + "num_input_tokens_seen": 48142144, + "step": 15295 + }, + { + "epoch": 0.9794507393892836, + "grad_norm": 60.90025329589844, + "learning_rate": 2.5878971242025983e-09, + "loss": 0.3776, + "num_input_tokens_seen": 48158272, + "step": 15300 + }, + { + "epoch": 0.9797708213302605, + "grad_norm": 23.69969940185547, + "learning_rate": 2.5081849879837746e-09, + "loss": 0.3239, + "num_input_tokens_seen": 48173120, + "step": 15305 + }, + { + "epoch": 0.9800909032712375, + "grad_norm": 19.513404846191406, + "learning_rate": 2.429718232007771e-09, + "loss": 0.3428, + "num_input_tokens_seen": 48188672, + "step": 15310 + }, + { + "epoch": 0.9804109852122144, + "grad_norm": 25.234663009643555, + "learning_rate": 2.3524969542414453e-09, + "loss": 0.2688, + "num_input_tokens_seen": 48204480, + "step": 15315 + }, + { + "epoch": 0.9807310671531912, + "grad_norm": 14.73193359375, + "learning_rate": 2.2765212510963418e-09, + "loss": 0.3525, + "num_input_tokens_seen": 48219584, + "step": 15320 + }, + { + "epoch": 0.9810511490941681, + "grad_norm": 33.33141326904297, + "learning_rate": 2.2017912174289164e-09, + "loss": 0.2847, + "num_input_tokens_seen": 48235904, + "step": 15325 + }, + { + "epoch": 0.981371231035145, + "grad_norm": 34.248878479003906, + "learning_rate": 2.128306946540648e-09, + "loss": 0.4052, + "num_input_tokens_seen": 48252992, + "step": 15330 + }, + { + "epoch": 0.9816913129761219, + "grad_norm": 28.99315071105957, + "learning_rate": 2.0560685301774792e-09, + "loss": 0.3316, + "num_input_tokens_seen": 48267840, + "step": 15335 + }, + { + "epoch": 0.9820113949170988, + "grad_norm": 21.494754791259766, + "learning_rate": 1.985076058529933e-09, + "loss": 0.3781, + "num_input_tokens_seen": 48282688, + "step": 15340 + }, + { + "epoch": 0.9823314768580756, + "grad_norm": 38.192710876464844, + "learning_rate": 1.9153296202328863e-09, + "loss": 0.4768, + "num_input_tokens_seen": 48300096, + "step": 15345 + }, + { + "epoch": 0.9826515587990525, + "grad_norm": 32.44169998168945, + "learning_rate": 1.8468293023656823e-09, + "loss": 0.3929, + "num_input_tokens_seen": 48315136, + "step": 15350 + }, + { + "epoch": 0.9829716407400294, + "grad_norm": 17.585954666137695, + "learning_rate": 1.7795751904515766e-09, + "loss": 0.4052, + "num_input_tokens_seen": 48330240, + "step": 15355 + }, + { + "epoch": 0.9832917226810063, + "grad_norm": 56.64820098876953, + "learning_rate": 1.7135673684584019e-09, + "loss": 0.318, + "num_input_tokens_seen": 48345280, + "step": 15360 + }, + { + "epoch": 0.9836118046219833, + "grad_norm": 30.882753372192383, + "learning_rate": 1.6488059187974579e-09, + "loss": 0.3972, + "num_input_tokens_seen": 48361792, + "step": 15365 + }, + { + "epoch": 0.9839318865629602, + "grad_norm": 32.313411712646484, + "learning_rate": 1.5852909223242894e-09, + "loss": 0.4099, + "num_input_tokens_seen": 48377408, + "step": 15370 + }, + { + "epoch": 0.984251968503937, + "grad_norm": 16.098203659057617, + "learning_rate": 1.5230224583380192e-09, + "loss": 0.3759, + "num_input_tokens_seen": 48392896, + "step": 15375 + }, + { + "epoch": 0.9845720504449139, + "grad_norm": 39.47123336791992, + "learning_rate": 1.4620006045816813e-09, + "loss": 0.4663, + "num_input_tokens_seen": 48407552, + "step": 15380 + }, + { + "epoch": 0.9848921323858908, + "grad_norm": 15.717222213745117, + "learning_rate": 1.4022254372417774e-09, + "loss": 0.2785, + "num_input_tokens_seen": 48424320, + "step": 15385 + }, + { + "epoch": 0.9852122143268677, + "grad_norm": 35.01372146606445, + "learning_rate": 1.3436970309481655e-09, + "loss": 0.5093, + "num_input_tokens_seen": 48441984, + "step": 15390 + }, + { + "epoch": 0.9855322962678446, + "grad_norm": 15.031546592712402, + "learning_rate": 1.2864154587742815e-09, + "loss": 0.3442, + "num_input_tokens_seen": 48456832, + "step": 15395 + }, + { + "epoch": 0.9858523782088214, + "grad_norm": 32.367923736572266, + "learning_rate": 1.2303807922370292e-09, + "loss": 0.3608, + "num_input_tokens_seen": 48472512, + "step": 15400 + }, + { + "epoch": 0.9861724601497983, + "grad_norm": 53.186859130859375, + "learning_rate": 1.1755931012961128e-09, + "loss": 0.3122, + "num_input_tokens_seen": 48488832, + "step": 15405 + }, + { + "epoch": 0.9864925420907752, + "grad_norm": 17.48390007019043, + "learning_rate": 1.122052454354705e-09, + "loss": 0.3491, + "num_input_tokens_seen": 48503936, + "step": 15410 + }, + { + "epoch": 0.9868126240317522, + "grad_norm": 20.294185638427734, + "learning_rate": 1.0697589182590005e-09, + "loss": 0.4398, + "num_input_tokens_seen": 48519040, + "step": 15415 + }, + { + "epoch": 0.9871327059727291, + "grad_norm": 28.50274085998535, + "learning_rate": 1.018712558297996e-09, + "loss": 0.5967, + "num_input_tokens_seen": 48535040, + "step": 15420 + }, + { + "epoch": 0.9874527879137059, + "grad_norm": 36.501163482666016, + "learning_rate": 9.689134382037113e-10, + "loss": 0.4383, + "num_input_tokens_seen": 48551808, + "step": 15425 + }, + { + "epoch": 0.9877728698546828, + "grad_norm": 35.623992919921875, + "learning_rate": 9.203616201508557e-10, + "loss": 0.3967, + "num_input_tokens_seen": 48566592, + "step": 15430 + }, + { + "epoch": 0.9880929517956597, + "grad_norm": 46.61222457885742, + "learning_rate": 8.730571647570517e-10, + "loss": 0.3159, + "num_input_tokens_seen": 48582720, + "step": 15435 + }, + { + "epoch": 0.9884130337366366, + "grad_norm": 46.78093338012695, + "learning_rate": 8.270001310825003e-10, + "loss": 0.4878, + "num_input_tokens_seen": 48599104, + "step": 15440 + }, + { + "epoch": 0.9887331156776135, + "grad_norm": 12.824591636657715, + "learning_rate": 7.821905766297599e-10, + "loss": 0.3118, + "num_input_tokens_seen": 48615040, + "step": 15445 + }, + { + "epoch": 0.9890531976185903, + "grad_norm": 28.26544952392578, + "learning_rate": 7.386285573441897e-10, + "loss": 0.3926, + "num_input_tokens_seen": 48630976, + "step": 15450 + }, + { + "epoch": 0.9893732795595672, + "grad_norm": 25.03919792175293, + "learning_rate": 6.963141276136175e-10, + "loss": 0.2862, + "num_input_tokens_seen": 48646080, + "step": 15455 + }, + { + "epoch": 0.9896933615005441, + "grad_norm": 26.057968139648438, + "learning_rate": 6.552473402678949e-10, + "loss": 0.2525, + "num_input_tokens_seen": 48662528, + "step": 15460 + }, + { + "epoch": 0.990013443441521, + "grad_norm": 49.04160690307617, + "learning_rate": 6.154282465794524e-10, + "loss": 0.3301, + "num_input_tokens_seen": 48680000, + "step": 15465 + }, + { + "epoch": 0.990333525382498, + "grad_norm": 30.749189376831055, + "learning_rate": 5.768568962629672e-10, + "loss": 0.424, + "num_input_tokens_seen": 48696256, + "step": 15470 + }, + { + "epoch": 0.9906536073234748, + "grad_norm": 41.51435470581055, + "learning_rate": 5.395333374751398e-10, + "loss": 0.3065, + "num_input_tokens_seen": 48711168, + "step": 15475 + }, + { + "epoch": 0.9909736892644517, + "grad_norm": 45.217079162597656, + "learning_rate": 5.034576168149174e-10, + "loss": 0.5309, + "num_input_tokens_seen": 48726848, + "step": 15480 + }, + { + "epoch": 0.9912937712054286, + "grad_norm": 48.17198181152344, + "learning_rate": 4.686297793231597e-10, + "loss": 0.4868, + "num_input_tokens_seen": 48743232, + "step": 15485 + }, + { + "epoch": 0.9916138531464055, + "grad_norm": 24.643993377685547, + "learning_rate": 4.350498684829729e-10, + "loss": 0.456, + "num_input_tokens_seen": 48758080, + "step": 15490 + }, + { + "epoch": 0.9919339350873824, + "grad_norm": 38.15465545654297, + "learning_rate": 4.0271792621926483e-10, + "loss": 0.3105, + "num_input_tokens_seen": 48773120, + "step": 15495 + }, + { + "epoch": 0.9922540170283592, + "grad_norm": 14.166491508483887, + "learning_rate": 3.716339928987455e-10, + "loss": 0.3815, + "num_input_tokens_seen": 48789056, + "step": 15500 + }, + { + "epoch": 0.9925740989693361, + "grad_norm": 64.28377532958984, + "learning_rate": 3.41798107330149e-10, + "loss": 0.4142, + "num_input_tokens_seen": 48804288, + "step": 15505 + }, + { + "epoch": 0.992894180910313, + "grad_norm": 34.623619079589844, + "learning_rate": 3.1321030676390027e-10, + "loss": 0.3715, + "num_input_tokens_seen": 48818816, + "step": 15510 + }, + { + "epoch": 0.9932142628512899, + "grad_norm": 22.467647552490234, + "learning_rate": 2.8587062689222617e-10, + "loss": 0.2872, + "num_input_tokens_seen": 48835520, + "step": 15515 + }, + { + "epoch": 0.9935343447922668, + "grad_norm": 30.136613845825195, + "learning_rate": 2.5977910184904473e-10, + "loss": 0.3221, + "num_input_tokens_seen": 48851328, + "step": 15520 + }, + { + "epoch": 0.9938544267332438, + "grad_norm": 32.950374603271484, + "learning_rate": 2.3493576420985373e-10, + "loss": 0.3354, + "num_input_tokens_seen": 48866304, + "step": 15525 + }, + { + "epoch": 0.9941745086742206, + "grad_norm": 15.965251922607422, + "learning_rate": 2.11340644991842e-10, + "loss": 0.3174, + "num_input_tokens_seen": 48882752, + "step": 15530 + }, + { + "epoch": 0.9944945906151975, + "grad_norm": 37.14493942260742, + "learning_rate": 1.8899377365388936e-10, + "loss": 0.3041, + "num_input_tokens_seen": 48898304, + "step": 15535 + }, + { + "epoch": 0.9948146725561744, + "grad_norm": 16.286380767822266, + "learning_rate": 1.6789517809634447e-10, + "loss": 0.4202, + "num_input_tokens_seen": 48914048, + "step": 15540 + }, + { + "epoch": 0.9951347544971513, + "grad_norm": 61.637794494628906, + "learning_rate": 1.480448846609139e-10, + "loss": 0.3127, + "num_input_tokens_seen": 48930176, + "step": 15545 + }, + { + "epoch": 0.9954548364381282, + "grad_norm": 24.89733123779297, + "learning_rate": 1.294429181311063e-10, + "loss": 0.3505, + "num_input_tokens_seen": 48945920, + "step": 15550 + }, + { + "epoch": 0.995774918379105, + "grad_norm": 23.30603790283203, + "learning_rate": 1.1208930173145503e-10, + "loss": 0.4079, + "num_input_tokens_seen": 48960832, + "step": 15555 + }, + { + "epoch": 0.9960950003200819, + "grad_norm": 21.470914840698242, + "learning_rate": 9.598405712840651e-11, + "loss": 0.3213, + "num_input_tokens_seen": 48977280, + "step": 15560 + }, + { + "epoch": 0.9964150822610588, + "grad_norm": 19.718584060668945, + "learning_rate": 8.1127204429432e-11, + "loss": 0.347, + "num_input_tokens_seen": 48992512, + "step": 15565 + }, + { + "epoch": 0.9967351642020357, + "grad_norm": 25.985633850097656, + "learning_rate": 6.751876218336061e-11, + "loss": 0.3524, + "num_input_tokens_seen": 49008128, + "step": 15570 + }, + { + "epoch": 0.9970552461430127, + "grad_norm": 22.135334014892578, + "learning_rate": 5.515874738071247e-11, + "loss": 0.3376, + "num_input_tokens_seen": 49024512, + "step": 15575 + }, + { + "epoch": 0.9973753280839895, + "grad_norm": 44.398292541503906, + "learning_rate": 4.404717545303249e-11, + "loss": 0.308, + "num_input_tokens_seen": 49040128, + "step": 15580 + }, + { + "epoch": 0.9976954100249664, + "grad_norm": 14.405759811401367, + "learning_rate": 3.418406027322352e-11, + "loss": 0.3099, + "num_input_tokens_seen": 49055360, + "step": 15585 + }, + { + "epoch": 0.9980154919659433, + "grad_norm": 33.78312683105469, + "learning_rate": 2.5569414155546254e-11, + "loss": 0.3518, + "num_input_tokens_seen": 49071360, + "step": 15590 + }, + { + "epoch": 0.9983355739069202, + "grad_norm": 50.76702117919922, + "learning_rate": 1.8203247855397287e-11, + "loss": 0.2734, + "num_input_tokens_seen": 49086144, + "step": 15595 + }, + { + "epoch": 0.9986556558478971, + "grad_norm": 33.41775131225586, + "learning_rate": 1.2085570569642101e-11, + "loss": 0.395, + "num_input_tokens_seen": 49101312, + "step": 15600 + }, + { + "epoch": 0.9989757377888739, + "grad_norm": 56.984737396240234, + "learning_rate": 7.216389936171019e-12, + "loss": 0.3097, + "num_input_tokens_seen": 49116672, + "step": 15605 + }, + { + "epoch": 0.9992958197298508, + "grad_norm": 16.939533233642578, + "learning_rate": 3.5957120342322567e-12, + "loss": 0.1772, + "num_input_tokens_seen": 49132288, + "step": 15610 + }, + { + "epoch": 0.9996159016708277, + "grad_norm": 15.791190147399902, + "learning_rate": 1.2235413842098807e-12, + "loss": 0.3934, + "num_input_tokens_seen": 49148096, + "step": 15615 + }, + { + "epoch": 0.9999359836118046, + "grad_norm": 20.582731246948242, + "learning_rate": 9.98809480678986e-14, + "loss": 0.2515, + "num_input_tokens_seen": 49163840, + "step": 15620 + }, + { + "epoch": 1.0, + "num_input_tokens_seen": 49166912, + "step": 15621, + "total_flos": 2.8707953551107686e+17, + "train_loss": 0.44386771268258823, + "train_runtime": 3548.0201, + "train_samples_per_second": 35.222, + "train_steps_per_second": 4.403 + } + ], + "logging_steps": 5, + "max_steps": 15621, + "num_input_tokens_seen": 49166912, + "num_train_epochs": 1, + "save_steps": 782, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.8707953551107686e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..598e077 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31e55ab4740b3cecf83e585688fb88554d0886d1b8069d53e6fa5dec0b23681 +size 6289 diff --git a/training_eval_loss.png b/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..6d252b1f0bca7b8ec91f9dadbdaa29c418d91a74 GIT binary patch literal 39826 zcmdqJ^+Q!#)HS^6PU#k6t-hbfz;a>0coU`}ZYt1$1m}88!BX4P`lMyo!BM3rv?W(E{f?#711Z$cQ zAO1$NZ~PbhA?ta?(DSy7t*5u8`(5OQrKhWti>H&l6~}|S?jH6o&f-F%B0@43IPQ6R zx_Zb83*Z0WZwR@#+X*)rd-cLqh+MDU@jws?OY|SCT%{a)1bKCOP4$wlPwMJ~x6f@o zc7n}~8~x|+*X+HGznmOeDs3dFLUoJrcN9lZCSlNi1f}~ zn#D}tjd$19kJRsRlxGVp9S5ZI#JPKHRP%hk)>~{xoXH!-!O6*)NvG2Pjq%@q9kwA) zBO)SlLoJc3s;a8rn?;cG@C^q`285c5imF0{6p4h-jTrDT@Uf~ODUR~L3xr{Dz{lDo zE&nh5T$d5idL=;=5D*}#Q7$zs!F_OW;4>S^Grhh}eu;8(+~cdHXCl*+*RR>vKW)^9 z^{UkwRl9THUQ*W7d@?ig=2Ec8y>A@xlO12bQp9uL5VL=^v-EqeyJ~OJ*V4|80Itx% z9w_0NsJXuTRe_ruKZwI(t}B5oF(=XDYq|m@F4oFWVZImB{C=;_T$f^*g72<`r#@45 z`~GPOUI}(IonUiUf`F+1p=W_v9aEg*;XM_*ygs?WQ+cz%6R)f{k*|`zD*a`7@ZbTj zpdcP1zdK?r<1tS<`EY?1{lP?ym*X_cP?}awaC9`uWZ>VEu->T8w6wIAT`%|+zh&U8 zuC7|#zaP_^CiipTrjn%JJ8$=yX1vEcLDJ zft%+pTp*>Rqoc&l5E9P2BxqEg+w^xakDsD4X+gN*_<3Nhgr(BS_8Htun2L&uKDRWj z@B{$^tJVmr&8@A+8rhe_$yqmdf4F2-RaNzG+sya6-Y$0>7TR7OBq5;}a#}2qU|jh6 zx`WxJ_VJ})e95JusV=7T#S33al;R6fTPeFHN+yso(*_EnC8xV3nvNapC^WtD|L9jEs@515XcP zhXeogNbkdlwx)wAW|wkat7&PGCkmU#n$`oLUq@XAkqezi{4R#B3uuP2_!xRuwC~Q-z#j@BB9ewBepP8WU?d`Q)ddMJRfkZ_`aU^x$NS9jj z=3!OPWs@BWpRj?U`$ug{Bv`Sj_Jmiek`YGyZQS`4dP&pucmk2L-CKySZ;4yTd$ z?YnoG@PA}!WMa=|XRSKte#_z+&+lvB=B44jStd(NLSlLMuHH|_-Svs+;`&2k=>tE@JV~-m)51h!|nc3=KSHF6d5Tb3z0S_~fbF(8_xSsI(_3K|=inbJBSy&UR zGA+y9O8RXwAb=@bK__P7fAl ze*VM>z5H0}{@8R2g;EG_l-k1mse0Cv!=2!|I)#G5!oGJmm}`%JcdIS9?xu@TO5=R|EfDMv;}o4UG)aR|s;;rjVr ze~mlNBuvE>HPfp(zRAve1A@-Wb#+cAY+dCLn#4mAL zoq->V+V!%)E>+IfvHYH^9fryO(jLPs3JF1X=cI1OpBW+>rg`&sxA4yo9c;m6WgelX zfsp%z$I9)kJ)IVM(|Gy#n!aVIY{78R_(McoCgll(Ebi_2pX>~uCB*K1trVxu_7Inx zoP2S(`1RQ|_P}E*OH0d=^70S}Qn+3C{{FuE!`x*Le&%R{(JXHJ2?yBzlFlb`{)ZCW zH(rHcFbK?U)u&JBoT;m+h4nqWyU~+|HXr2U z$B(Yw-W}@f{x+WeUIEt~vR2Ewdwc7DK7?D()ugAUrhaP(3{<^xMQ_E-+1Xk0Jx7I+ zpjp7dUAT=3%mySg+Qg)&@g=#VUb&6NA3vBivZ=kN{G~IqvSNzr{0_v}{5CJ4U(VBQ zhEIRC$EZM3L6j+RX=rFH2H)$2i5cvg)(?2UDRqmajY%0^^4y2i+V{a)9etRpDvQpi zXTyZ{&@cXXc@m7ansSS}$>F>1%?$zVG`vT$)-+7VT?IXSv zaXWI@upc1hUob2|s;jH_A?-S(^+iQwMo^tov)Egocqw+5__B)12d@?VFDcR#!^P$u zP)A7-8Six#sKbh$%ioDy>JPXfTc8sjP+(AC_&8bIZZ;#Ft-0V%*)0>3Cy=bpiHT9i z$^X8Z{g9WRzZr@(B`a$TqquG3)D(Ne$u2V*J*%ruUtUR)oIQKiu z$A1uno<50qpg=3Y!^2~%M|#TQcz^39DBZ5r$-xZ!=~Pre@{;qq3Xx{Dfij3 zGBh%p%}z4A?CB|?`1jbu(9lqs^BISL07-GfsVtO}ASm9GZUzrdZEb8U0EsY2JF}ebHT-Sr=^=xZt8~1H^U-<2 zLE-+0uoFPoZ=yVpQoI zb0wON@wC%-A>nON4arq2{-o*dE|!{>=IC`_55;nXK^S^-KdQXE zy!~JCH!0cJVsGbY4c27OO?Xd*s&Y1sS9?5*j}P9P3S>XSt*LBda~?T4IzvVE`1x~c z`J>e`G}rB{3~#{~&dJF!LrO$rJ>6%PXnU~_2eK&+f{TyOG?A0Jp?pI#Sqx9Wpr{qi z+O219DrfasKorQh|01k`?Rb~XS@`Q~zu!dZ{=0V!jg5aM7?@$bq3CqkfoE)KYr})Z zSH5%!3z>##k?~xlM7IF!sMglj>HOk)bQ8Ls9v|@go+&LY6@hB?giV17jZ(Py=4`ZZ zrgcXwgRJMhn92BPdSNTraz4L5^N4ya>!O)0G&Gb!#+9SUydjq7wI6k%QAKlxO1Mbi zso!}nu9^6oig~-f#K=wgEnNLVqtI}+M})BE@=zB)K=?v72MkE2H=hG@j^^sw*;x^f zc}<^3kF0i<2S+L#4Pwkz3ej>%$|lbMUu_wyaLo5Ng~dR0PfuuEwqW+7^!4?v+}u9Q zj9AAha33BXMoGFINd_Kog%c|K9j#Ty^!hGlgsWbnY+YQur%iKZ>t~!29aerQ)C_P?(V7-6cit>CkcgFLn2HeR35MMt?b)j zmU5!Ris*=C&A+XPPtL-NO{sLac&_CG#F40kgs!KT7ww=!1#eVlmG7Q26^4P5k^>q6 zrryCNPb(NtTSNq*cFq7}EIl1v)amKztis;7wahNuGa6}i94=y<2%K(w92S@~k)sk$ z9_AaJ$s5bAWJZH~iAB~kSNqg=Z{2j=_ZK|7B6Koy>&$Fy&M!Jf%j}|c@^ruS_DXf_ zt??h8Cf2zCDi@bE$RCWFoSdA6NQ;RTzEi3S7pi@<^S*BjRzDJY35nw)SV5SjUUOBanuc;T$b=iouY`yZ+s`Q1TdIdgOAFIgMxA%t? z&CJfu?(_9MS}@zKX@JC>we7suE!+@+wE7?ICiuF%yOClm^RDlnDulTm*i|s#iKsv_erbI65?j7`O?8^;@eLcJNyh|0;iOQXh};g?~eB zZS9kQl;I*%0hs!e?E(IQ;#E0jYHF)(#Q+!>Jj$13acJphA%|T-W?dRi9&~K$@h9{x z+Yw~{0SPh zo=eaBwvx;qU3;OCZDnGT${RK4d-G4JK-NJ_ zOx*hB%^TvC&i7~(W@>Vu`I;uj6O~zC0Ns#-DJ-_o*w|RqojZ4E6@{@dwYBnZb8@l* z|DHm9Z3TS&p4C#nW44#=-yV7ZQTY zBKM%6{UMt9p#G6{pd}i9^lM?k8jzEuMsdwSf<|^29T9Y=U%qGp#_?PkBID6Y6-o7m z!q+r9I+~ln1GNpNtpfs#tRvuPO*$kb^Y)1q4~0safYQqTe#)He6Ir85GQin5*mu-tqq_d-%uJ$H!+H(5M(oS!tS2Y490eEkfFvRnKK&>RE-VuY2j*;eef5tD~~K2qhDWm(V&xk1~=$bkkitM_As zO$@wIbl*96X3VL2HTT2(SlGetO0swN@*PH#0MpJk5wN!cQ-ft^XD7eeh=T%0M@cpq zdfE7!UtPc591)zwDQRVPhRM@Of1p_aapvh&y9~Zh=xgZar(kD)?cU)fYg)1H=KBiJ zBC;Jw`Qjp=e6<*+&}F~Cy4qTP7D^?zSydWZTE-elA+y>DfP6{DyY~RLxO#Zx_lXJ9 z*8R=o&4h=aoBW%b%d1jd*s}l~0-yZO_uC9g0kwS;-Z%RB1qIt6(i|m^q{YS0l-c%( zJ#p~>mZJ7FN^v8q>xoX@h2b04@EF_4ID660zc&%?yI!5sr8MuXck31g7LnrN!-wb`_EBPvkNvWarW6AzxB-sd-*&k~Nl8Ov zvtK_vZ>HzpaC~xtcGUnbrClF9P*YXKt>2%GX8Nl*e#)8~3BapyVBkz_Y-~kM{l~fL zOnG`>>zab_!nVACO@dDL5}>z=$ull48}2*d!NDrkrKUfQO<}blL-Xyi3a4z}+Wn!E zwkIJaMMoX^cHEWF+uIu&gQVy){9QMloSZ_v=GT6{&;JRyL+#Nc*-0P;m`p2edzfku zm-Era>)p-QW=juCf$KW+`0-Zh!^K)) z(5#^+mozFX%-l1l#6AA=Rf)E~zI3nt-Rs->`gCZ+k4r%CGsUHz&-?50JFn?RECBdc zfX(z8s(!#l@KNHi_Te8&CMIG>M@LArq>N&3o2UAt%mTf-t^-hj2EkcZThOG65^&w4 z?1MmnlAB!u#TLLHs0pqnXy*?yC_IwTdaW1(&zcD!O6_al{!F-9LtA})y{$2OxzyhK zUf+{Gb#}}Cx7Q#b&!PMiG@Mpjvu%LkBP`uj_4p4biGps;xM1>l%!fY88@yORd$p;+ zzX~Y!-Cm#gQL=ed<@WPEt-$xW$`aXwfhEtXg>+y3`=LUP=W`>MPlu8{@ZEvWm<%|& zFAp%Kv!_Q=)8@<9ui?B=5s{IP6B1}mOig2oe$@NRLFtN^X$enCIsZ^@6nb;gzf}*` zUM-p9ZJT$o%01u^7XF$`TT+n!-bwB3n~hgBu$kP@!Uv0NSbmSn?BN{k>SWyy%D#ME z{wI(Hp+S&5W~c+?_EIS@Aba4OzrVlR;kG?YXO@0r)bCqo^ya*Q)*vJ#MAKOevEG%Q zZ_xH(!ovyBPz^Xav;=?^)>LyBWg6kJXMj>DDt)TIP*)HOpgkh%>sM{6;C=?cD3!5>4v z#mr5JR2-dZ0U`h_c1cHvI^gucj+9+-{2M3sVsH;E-{8Z-hQ>zah)lx3z`t^*9K*jn z`jgM|BVGb9_Do5c3-}=}g6z$q5OeX>#7PG18MFucmJg9p2LzJP|yA zl7359*Jj^aS-FvQKv1qyw(z2C8u=L>tybW^(e|H|MW$r@BK(EKs{_*{jo-U_(445M zt7kz?;BFXRlYOun0efq9(7?P6da7@L7B3ioBn5;ig;^#bs&sdC%`I8^vzwR=<8=b& zC^#=t_>fr$=l}U=CD8} z^?sw@6Mk}GP|#d`QpN6wT3+gZUuWD&YFkeHKR?3yvxZk7+KaCa)0DV9|+r8$fW9)5U=0vsNDys3w{T4Nsx6sLk~y^ z2}NoG0rfa;IqU@N&k$sZzK6C?MMvj(Mz6?RT^HoLMCSY-Kw@4!Pn|V&Awfg$CkL;H z2qnFci3q>%-;+J_YD-ND#eEJ{?Sc}6m7%XCrSiJ^>5lPsdAdeL<0yA;Yb;ju4w#}~ z&~L2a>T~J9%u4ws4nDluZKUb)_cU>*N^e0L&8t^6H6_N{OG-*8A^Usnn0+HFDk_4% zSE=p*T$-WF|G}~a|I2HoDc1m9U79coc5)>prGJq8(E92}o6pohtPJKG48OnjQFguI zZvfi%qe2q!Ro|*T>_BxQW0rPq_4uk_w}1TvD8SZw|07fnLzx;$Nm}!O1FqV-I_Oc|{ZR>k?#PGWgsCy+&qhcT$6g6wuua11!hl2CLb>_0?>PX*WF(u-& z-KUOjY!u~;)cHyu?yiv(>PJJ{YXKW1OfnKB7KzEp+h6eAc3mA2t*Wk`0SQw@LxUKl z7h5i7WMn+m$oAQ&+j|-xkE$zZ5r<|e>kW30j$kmX3@+k`FUC9+lw95?vC#2TQjsg%mXa<`9`a16A zO9m7&NJ&XS{eay*f&r{;2_)v=X=-ok#o)@xM=bg~inH~)bV=h1D4Lu2<(sPX{3LQChfb5xX+X#a_( zIkf+SCeG#MReS{Kkw%~ax8VPohZ2u~4ceSbGS39!rlho#e=j|bN%DPHXt1@lHCgxG zi^4knq0IS}6=Ub)JdiSABEq3-KgY#|hw^72%!zm{f9E%Mxev=lB5wPo5eWMy)6*8v zi=yH!>}uF3Z)R!%kihceTu;347z8rFkA09a<=&kDNI{DvNcD}d#7|$niUM&c1GaUT zkfO4Z_5y%_Hk2?1N}4(B%<5DF+O|oRcN)APFfbU_=mr9pq5g1L09EG*iHU>q^Yi-_ z%$}iO7HFg}Pz6yqgziEhgrH}5|M`c56d<-15KSY|Z-L-_8LokLs`FnM3ydqtMMXsc z)D{i-j=}ap)#!cDeaKYq$FeCzLH}V1glQO&t+yC7Vd(xq%hT4+j+vR6$LKcn+46@J zN$l-&0QuW>dr>=U0l0Qb;4y*C1|6Vy+$I5v4*--;$2UM-yaMsK6-sX=a^Z^mip1U9 z7jVu+Wxh;G;^|FSFy7wYrV#|<0McMIyWi&Hz8AYtH=`)zH!+J#OMDEZ zEXtKRLFbfvi&qu&maY1u^)rxCr+F7c= z==b&ZYWYFkY=a_7h3V_Rm{2F3Ah_asWM?oi_526GMNRZ$vZ*U@hX-q8dM2ed*Q$rj z^yysu(?B>GVV(M8+#sA#J+PqPx3>ax1r=BcgllSQqJZK@)%CWKkrDolb5VnTdnZqm z0o>`KT2&i_m3E$P6!cxQ@W^d&BL-7%utK;sVz5yDkFasmv}jpw69O{|3kxgLM2KER zMWqcuxc)vPkZCBxNC^#)02)_qZC6k}(+8MFx%nBbxr-Ms-T{w7)jdcI+vx_oKh8xR zW`}hx-+(4Sl#}yvEoAlTsp2k}$Hl!IDYv$?vvy^raslZO#G- zbFW;}z-w}#zh7qy$~@)C$;lO9hWa`>)KirL>-&Bi>CN#-N$InQT7}%ZdzVXAPA(d{ zUFu4wQJws|7S0BJ8&~cy7OiTJ>RbxqeU&#F*BOEjD)aWXXR3nl$BeT6Rj2VPDrle3 zVn=%23OV#w1=cANgb&G;LaJ`M(xT7U`KmFr-Qs%md)V9#GZ3hcF&GSSIQ|9jg4`hp z#GJ-O8{?Q~zI!s4{61ez5CbsmR{mguqHuzSz=Tu`Q&M?(xg7ayLib#SS@3)A_r~u= zuRGj9Vok0hcvkc-u1xmzeQ4e>UdbE>lvBS@df$}a z_j~!x25uDfF&y=0sZ)rGiT#dUu)BLV@}|;B`QrMz5UdF3h(oi0p-I0c#SB#Kpk9Hp zxrPsXKic1ws$cnmm#pH|t?SiZs@;Q778Vw^@!fu>D`OTB(K*g^-BkB=SG&pNlJ@Y2 zAlmAax*~1*P3SK@I|e`XRPfHel5`Y-b{>VwVBTp3RU!yTaFbe}596E>n1E|ffF~e; zy@D2eAa^O5nT?(0UO)$pCe^S&g9=+M5BDP@%gRq)f-)ZOos-AP>~7Zm%Hg8Ac$^f$ zWN4iiy?h%;0>~W!9UjgTNVBeUU~x+8`KQolWCCFx=7sAe%j)Onx4ByO6_w}sy-!Q; zc5ebPGu<3Yk|0F!8*(tj?P+7)?VK;kUZ|H#OZVCq7B1Kb&U-$N;SXIkaKg)o%Dn!scO6fUvQ#Q97};tu1+{0+KV8Bz{c&S8*`h z7!}fiR<70o8U^W(s)===`9M^m=hl3Qf#&&~tSl3(iG;@W>tsK;cYwZxeqR-e{nHmO zLZl~sa8REGaQyk0M1I$};-yYoC7Q_J40?6pCc~AjfC$$0LBq;rpRg<5dz{X z^bKJ~g+;Co72rMz3J#`XWi_=oLs1G!1fHgW7zYx>Sw%%Qv@7(tM{Rb9yN3tZSZ?Rv zw@8UrV@Dt^QNa~Rwy?SlPgz#*{Me6BY&}3(K47*<)3ogFjn#QBQG*AI-_(FdNr~N{ z$b_$wey$K?ZyMq&)j#3l|GavyUM9->%OtM|NXsWpPx(SpIdZ^>5b_Wd?9%Mx* z(Zfe8=g<|n8B>o(iJ($-dAS5C--B2RRC06&zwmGV*UwqGmMOkXx{@U4>VM&b(_mDK|N;ih6E)ax_Wv-0eXN$HG-!J z)23Q->dYyKIuYm3M{!Cp^|0<`XTMY7Pq@q-U$H0}p!d(kDEP{#Pz@Sv@Io=m3fNo* zktc{s;*K-DD0L#FdZfHq4N9X zS}^Cpq!ji^2IG@8L-#N|eZVm=%K$dYOgbw-tUl0LIePX0A_TjM$hZl`NMi!roVxsuJv;ZZ&37E2_y&WGFRspTwe^H6*&P`2CfNw7=DhT%3(Geg14;f{bRlm>PE3d(S`29PHa4iP#04tcRnt`|n{xbx!B19ci@580+q)i}P zIQjXB)4)xU)DF-n{B^(~5&S>_?TN<6O`s&R>rEbI#VK9}`M~w}uWKmF^it7Z7O2I0 zK9ev+RH6n`%N8&nR&7zV`!I6HW2Vo3BY5M4G0U@|Q#VaIKJPo((B{s(ndP5XRo%Ii zHE6lkKt$%gAv#feHeSEj^c`#tbN?M${UVb{NOaf;TFF40`w4Qy6|kXTA*jg&3>fJe z0|(H4iGt9KHbzK^=aP=oWNmfV5e1km(5!~|J5GTv`viE=tQ#p(2*$wNf6%oE+%jMh zi-z`;78Noo%-ECJjS=Py=YZJF^4pj5&wQBqwG-A$4(llfyZe&8JsPfR>}^~yo!BQ)#VYd`4M z*VjuveryM(Itq+L4h@Q8N*@ua%NitnkMLzITRyKC28vzaqs zBXu`!Wa}i$-$3SU@koM@G_9!^|NTu%&6grrKri6K_P4OJ3r8OX&8T;NU#@7OYgFT} zO%uRH?1~pSx$E9!l&PqGx9WWR9D+a<$PD0HU`6lA=!4V(1up|!jDs32V5@^V5f*t4 zc{AwXHrpD>G&lDEGI&sNu`o*ap-LPeD>Wdi)B4X3SR*4>@ji`lbm}ST{f)c5iqFlN znWD>23p{n~!$0$=LsJGEvG26fK=-)}_J!=RCDip`SnDGyS%66wb?z3lC3g_nMn_)TtAwq8|X+I{L7QuebdUI$>n|CnE@UJJk=Y72s zVM!_~y~3(0254o28yW)Bg3{+N=M0=Ze;yZgP8hq?4SpNViVS(WXY_P31Lmm);B47fRQq&!#Vi)t}36{mVc^g6XB<6|6 zz%;E8quYN%FM|u5|J<{yC16+d+?ZrV3lsblm90PrmGB(u{8B}#d2ZgSP#F^~>69R5 zL!W51ku5Pk&|8>Mb_FteNL*Z8pQe?=#S!e0h>Lf+&n6Hq{<=?rjJTFs>gVu4I-C!M z8{2XvHE9S*e!-ulN{CA3bbfaOvb|ReQu55BG_j>ci^yDEAXifM*n>MSqYOS$y}^Rh zsMdQe8^b}k$T&@ij5EK4{JV@k#Ddc~0;GJWy z{rxE`V_-Y6fO@t0`*#rdw!yLf0jf*Z3d}?#;2YFsnxz(V87!BPlb;hKu`JG51lIR8j!n(F6e2ZOVTS3J}(_XU}{T zEkIw3Shth5D1dX~wja=A`Ks4LB;q~aet6f5B(nZ)0910$9|b%D`Bx{Mn(6mwKr0T)l7 z2M{mK0RLTgmi4ZlKYWl5y>|29AT1C8ILN&UBe?YpK-?yfa7wJ&h-*P#jEIX1*T}|3 zplC6IfUiV-<_z$Z_<+si|9lbm0fv`sZ@Djv0ClJVQo{xS2&3r$zKV*8$w){ER{XIp zKOTarSm2;XJ2e1%lt7bJRmp&Xim8Z%f&!qoLdm^8PWj)R=fGRs^snQJ1Em^tKvcF2 z-RN6;y;itfcIsRJ89`Mhe|w^F>41x2aF7`(*)b9OeO-bqSrCq*AizwtgO4T#RV08$ zZwC(YX+pwXA81K;!=@>a@$vC*z%sWqWb5#OSr^DJVk9Ce>N22dvL1E+?6XX>btdStMc5tu^P7Fm6I@ zKn&3^F;*ZGZ|>~0K{azo&<7b7g*8Dz*eU`*X%L)!cJ&+dy`Xi1IDdwKf~~djF%H=< zD>(5L9&IB?=iK2Vjr_r(#Yxi|&&T@tSAa)7+}kj>KkW+oAqEO-UY~y_!zWA%VW=jt z@%5899ks_y=TzztgrNBCIj+j8vt=M=?_2=OEqZJP(z553WR=erC+cX0EJ_UQ1vY{j)cgCX z(MSRU8@hw{UK_xbqTmE&5dliO0gh);ctiu`Uh2s~3LxW;PNP9+R)fa19lW+Nwj8&6 zL=mUUsm@PuzH|y-65tW={Nsh;#b&T z^0&e0UIHZ_KrGsJ`+ze0&z}PfTbPG!Z)zlTe$&u9t3zLpW>~b3zBf@Li70c&&y;;t zL&HO=~bD(=pru_kl%5$Fvh@i8}^{1N<#V4eIHX^~s1?6s5@0pdW90Dk%I0`i5A z|4N>IL)777(V_EnVDI^itEOdmGLaF4(?JY96iNefP@h{3yRvWby`{XuQKmdaHr_Lg zHA$Yxd&}{I0)O^nkE^QRJzw0{)(0i6@#5Lr0!NxIQ(>{&ui68oZCbBoD_^>Hw*Sxd zY1{k|4nl^(Hj>vHQSD)8h#2}t%QDK~$LGNlgu16e)WHN?vbjXbBx9gU4+RUfC9vJX zSsLNW29YI*o}ko%xrpuy5}LSJm}k=1js8gk&(h7U7Wn~{%6WKu6Cf&-S9{=8O5IFZ z_HPD6>QY#8BOX_n^<$x-#nt=af2rL!hXW&Djt@6vl&Y(Ke@hsIfEN?uI}=DT0MOhP zmX=M44g3^Xr!ldS(TP_lu8!7hgI6dz#N&f&nTRK|nUc$qH0cfSs?u zBEd|eE^zTY_N})=8E;f^5-$H0jfH*)?preR%L_fzCwV?WgHhsEbl-XI#~{e#gE;-K z($2OLp?86mNst=OA9R4jZwgRzP;0L1*6bA&K7wRYp;adO>>5`|Wj}?3pfNcXiz2FLj?`Gw&$+IH#G$L1F3$)OM+>l07I*K40+cRl{?l-@E(ar@ zMUC>PXtuBr4tAnUAQ|EMAF8X_-i$4dG+LYtxdytz-h?ny&7wdC9V8)1#%^x9wupk< z1b?EuYO~|%@t>O@3ow6`|3iwFh71(~pdty43O^3nW1~iV;9g?b$l8|Fke6=PNjO|R z9IKSHOv7W1ThJqocufAt%k*?|=)J*PmGu+46JU6XHeZ1bVgV*Vamof4C!Z!FG zA$f?DNCDFW9s3qo2+(76u;<&*x&X?}Am0S^v*&ntIyRwNOKZnE_Nv)qg=%6KYO&)c zY8zb@Ee{SDldBAa<{~pn;t=^ZI?4zl5g%W>oXOj_Z*^+2H;Oe_99Ij=|J_AdtG)eA`GBCAbLyI`d)h4khzcpSx;HsH3 zfYMUH0(yrIqcyXyukYKEXP7lK4?%E?U+xbJ`<#n?RLe_?wa?wZCfvZEak79Juc3v^SLCG7 z_$25z)^x(NiQ z&`)LF|9VQO?3PDV(4!mSejve_;AW)(+F*7^(To9hsci6 zp3fECgqd{cqu(APe(4tOJdM>PmnY@?@#Y~DUz-&dradi z$%C?{5^^>K^VM5z`1&L|fo>Ma$d}H$)FXF{klY$t*ZGM;EF`7({EU@*`sg1(cd9Ws z@D#aDoSkVyIJg+8bt#6SK2~INw5Z$mD?K{BbLw+89gv^|j_~nvdA9 zmnv}Kv$G4O*%rj#^_=&Hh9>A#F?9)EpMipF&=7u=roUtH6oam~kH`1{zn1KZ42 z0M4CjmvzA8QsZK2y(p z=Y&!p4!!W~w|fUisMqz^6xLi_)zB)BcTk-<&h^_Me_aX*84# zVYqzRp)~xVG|1CCx9ap8$;}>A<)krta+bVUL7zy{7{2mhd7Z>B|Bkc~*56}cDuaO$ zVmMU)odu$$-Qf)KV6#-kCt@W=Upe;QSYlx;)>@LOggUlE6HN#PijdvE#}jXb$MYz8 zuEL3MF|q!R$~U8|(NuSl8=u%!6PR}XH)Kg^xSuElZ}9uXLd?wJ2b979+O6@|L$oG@ z0b`Q%7?)SB24 z(*G_^50}1Z#|(AQG&fA2XJ~P(S8Up2 z;;Pg0#{aHPf{ziz48-gpjl83viPlq^s6y7Xs_GA6lP$4cMOJ%-p`EO=#w<9H?yvzhI zwWE_8?{*ns1s)|80t!;S0b7zT(}~;lk=dFNg7qfl^|?d5gbHjkQN4fJCS5Y`9|A*< zJi<}mN1%bi$#VTi>u@ChEMZU|9PMOE6Swbw>%M@}0^`D2m#W4IfzSE=ZP4}7qH#hG zp_#Wl8_}Kr!Z7NUCQ=0qEgaw=0Fw@og*>Rq2nNC-DOu6)2o9NNFJ9RAfb%<1&JLWW zY720lS$N)$k*s}42R~UCqsGITlUqKto{B!eR7Ztm#T7Bd%ZsLC+>~-%Ni3 z9E#;=>0O}go8gp`!IESU2b}Ak*aWwU+O5EDt=PcF$K3HI zz(E3zq}hhcqL)+AE%4s_1}CDMKPVG$ECcpuvX1lAMDVBE{^pEEaW|Mffv-h{M>y3q z4M))CdO^LT0C0oqtLSM%P^qZb zTK_Wbbch*NETn*obZ8>uv!}*-Q&uJpEj=6pe-fMX_APp_-pC$KOuhm8Lqu#*ZMPV_ zLjZKfgmiHb6@LBt#lQxqIR*o%Vle%ei5;^^F_A`CE>G}GJT9dzzL;29}_45R`mY3M=2- z;{A8$9OmmyZ&TiYZuy8nCL{dyx!86atI9BVso<$Tpe#WodJQ7xz5(9fbeoS0-`DuTTHP&^4|$>)b2%GN!C~0{&LUCMEU% zYNsf4$k=%_VuoE@-y)T5Gwj=;1T=8idEUPpM;$}ny=N+Urv`3hV+6zC<E4eGMbUa@m1G-0zT0YsT;49Y6Y?p`3%E=whOY zHAsluqa|cK6gq5{mhEs?ILZ1wtf3qw9+83fXFflR1^xb{w=3vHvS173I(#1AgnxyaEZEpHz5c5`1(0tmAVAb`Z%(O~lc{RT-8 zW?|z#4r1sfL-$Ge?W^;qXi}5I$Asu>>2JD~hiKt)-WZ{k=NQ3@KitPc<_i=;f3XOW zs6fer1J~yfWp5>QUK?r~+QeBuoubeEX-WlxW4>dY`6V>sfeQAeY3e9!+J*P#ApQof zFN<-I>D3(;o0NOul%bkY*>Rqs8bWP-j>-E@2IbfKxgZIRStD2R1A6&=~MHkC%&ZFXd=^$@~0Mm=R7a`6gms@OBRI1p_d!paPGoeY%!AWRZmmxNdVPm`R9Iac$W8~$XmJP znGM2ZU1ha;`%giIv|<1#_qu3(v&YImopBfcT!5@~=V7_y@yUt838_yUw7k z`RpdecZyqL&=f=qi!#{XGDReohbgb6HqrPjeWwQY#tDI3OZO!82gcBs4X30XI54Bh zj5x~g=^R5zP5oA}kiTtqDdpL{y}seIKnKzYy~Q*E^E_^VC?^+dacl3bFh0 z4h*ZdQvaLRlx0ceyF-PH*D?)QUzY!Tw6SV(9&-J_w?f=2Vn3!eMC>Q_k>ot~=8r90 zCz)i6{%u&d&M#QgK>UP*@0JduxX%n#vgW}WF*`du9AE25t>hL56kdnDZQ8_F%@v^~ zIf31We3Sb|B);KHhM+X0$qn)Q2I3HX*X@Q7SD;Ve?2b4x4JTTf!PM*ut~&JKKKQJo zF1p;XwaqG5=3$pZ8Y8QmXkL~GFd*r5`I3+DWI!uw6IXhz>XtY3b_ou%SJ>E+A#fxP z4&jp{C>?|9G3bFC^u!P7l3yM=z9+ap?lkQr7_KHTyI7$a|4uN0aNpB}UMBAsWS};= zluXT6is-;>hmddZrUCESRg84_=7!?==(fmIxW2 zO%ABth%$cyYUvVND9mH4@Tzh&9pYnQgG_B$f45U$$f8}CCu3+)Gk^m2 zQlcv5%EO<7@LCJtGlv%hd$|9Xx_DezcwfaX=h7Ta;_U0PYfvbP5p~=u!xV{=kb=vk zN1jSR{=SsrK!hnsMC}h@T<#^OQXVAJs6#{2afsnm!S)LS__3PcjV4fYrC?a;qj&DS z$*K&{-`pCxK$CYv&T;rh6EVAd0XxP1PJRA}*V(rJR_{ElUXq|OKEx5y2>!PZ{znhc zBUB7xHiYO~bb5_%;}E)4hGuC@(fUcWLwHfGO^D{ju_Mz56@Sn2<#hhqYGD#iHnDg3zs^9Fr=6U+eKpi&bRx1 zeg?U}bXa0Ox7x`Gr%3pt^W0c~g=&fDIn`-6H+4x(4IhL8Vl*DkgSi{R(Ls4l-f1Sc z6l0XtA}a2QcO;iza?ev8Xo%*W2zg{_;>{kxDdO0}_jWGfJB$txErao<2z$G%Z3q4~@xnxI|=bDi&Ck##zR{oOVu^m{?z040^Sc;n%ZFM=4v zb*_sR^-VwstApZB&oe+!!SD@r)5~`Z@v{tvW#J_Vv>xg?J3LkGBPA9_r_VoC-kVVm4DhyxK_3LWvi*z?wLU%}UaGR(_UJx{;&wh~*%j*}2I z)IMFtGJFi2(1moWTm2Kf+TfUXj2SqBw^mFqED&VtM1m$e3n7t^{s}Z>c+1f?9G+BL zfY;gptZi&*!2vlRCbwg*+r*&P1j;f4tl$y{VJO(wpTMBr zF64`@g$LfK$3F`x%fk|K>GJoyms0nc&{v9}cK)NijSe``5skhI0}f&oY8yEY6*Q$@ z^e6#CaVluW0K?G(rf`~CpspRXt!vk=g{eC^74-c9kGi7b(W*G=TZQ$xm8S~@OBoJ= z7n%v{^+3@B8RxRl|5w_ZhhyD#@8Y*PQ-(~T3`K>As6>TOAw|eg=42=&NeEHKNFh>^ zDI!Cer;HghWiB#RM5aoH#6CaI`_{Af@!Q}1&+j=pp7-fJ+_%qgUF%xwT<3YNaQ-bO zA)CfdX{B~YZ@nm5#2_l{SE(gbS5$N6jnP8fu{s_C1>v;XM6o?c5K5K}1Ue2;4kZaK zVRKLu2!0x6{IW#_Q9PrOYJqMU3CtoIL;+%{xq=MV7+X&GVJ-AuCM03f4q$f@j$e$w zSjRqotU#0|m|iH&(%x*SW9~H8G~3}~qM;IRNFS*bqhJ|@Qdc=!Li(f#13_Zm(t=Sa4bV7t*t>;97 zNS^Rjf!g7;+gY?@J+5B|S<{y=ivkqBTmCA#i(3(Pl5N0&!h6<>vHDYO>d3k1aS*`J zg`MZNZ*MoRTVhKZduSy&_Lxl5b@T=@D5iq5HKE^k2p>a^np%`vpnLh^g$t$@7N^qf zaG=F^*`gP3<=l^Z1)UP?Rn*TOkU$dhO+v~7;T(W+8t~*vriGcpHsHYh?K@-`Gkrnc zf78DZFXjl^#AmfYHIQun7(#pC`dqnj27ge+WjoDbC$-1_WT3X|EuU&;qX_M?AJ{!6 zwF||gDhv@A*QQZnD=8^udn`E-Zclk?mq9;|-hDoFco14P2J*?HHUh5)1TyzI%5FL) z`?}X%lCg~Z(a_lQw9D`4XB_T~cY5#^#RP8iT;$}5`$F$7T14xFP1BkCK%T@a>y-2? z{N|sM_;$%R)WvXEG|rHQFGCx#(~lgX@E$G(!40~3uH=m%9hh;$OpWeV0!Nnjbd8co zWrZmx@2i*_y|%ra#3LKanI_>Ys zzkk!rh0v<1U+*6ZcOZ}**GaUdy9O_tro8C^`~Uytwg@s7UP6CMG@1$1N5Uck)o)I& zxpo))Dfb>ag1m!*^r-A{aAJ?tWNCzYfd9_>x4$ru7FSIfx;F=BT{b3wL8qY=BG9ZMAIEd>aF2yz3!rj0Gv2){e% z;33EofWHu2`KpmqHSXj(KA(={Jc5Ijx9t8ctdI!n-0=|4O7wrNPm6F*y}E4v2p;L0sIBFx_} zv3+EZ>fV0fI>r{<5^s zaJY%~tU&m>#qZ_H9(fBvw3h)nbS}ySk=oF<<=}YQM{_?0frV=N;@En3QF^v#S9)0@ zeLDL`3{!g0UZX!U9_71aI;ryY_Q*fW+DSTyG>eGVujQL&xg$zYzJ^hJ4_$<<%GX!X zL5pvy3{w`YB}_ENpACxJcW0$A$2LxlAnu&owIpUrL*sAjKo#}iflZ@Pk6eqOEf;sj zdYUZDAvZl1O0x0cnE(aONyFsi?(x}jhb@~Nl(ADAFWU!3#In%0cx9TLecrl|JgCk zp#~HcaIvAJq$H65_%SxU`Z3wV1S$)W5L0QRcbpnR&nERd{}N3?M|?+@N9d_OmrLvQ zIQGSc`Eq)V{qw*|qh&}?_*S4uBau+Pyhb)kHf}^cH8q8j$Tt2$1P51dEm7tseKqF| zel}njj(xRgu5|jS5c!_RYBk@kpFaNR6mnxf3nVLWHeE!TK_W?%{a}U11wDKXiNX&s z;XhY(IbZL|iLecnWdCJP+D2-|DV#6$EccX+UdTUFQG2SVj}?h}vr<_m1sD8GK(XBd zi%pQm)3V;jF!r$6_KPQf93JHQt?}W*4b9oBnM>R^61I^hr^Wgrcl!K+|AUx|4g;!FQf4gM7Dd z7$(N)vu+*rHnitr@vsYgFXj1+((LC%D zUtDX09@-l|ZlAL6)Mg$`e0VxQG0ewPen|qEsX50MBQS;o;WHl6EHGvju;ymL<^-}% zLBP&nTfjTiyyZRXM)3HJ$uu4Fw8neZl}7t1-QQ{$hUBV2PKH3e!LwApCCU}%Z_6AvtoXQ=}2TIoc5>E#B7;5Y?K7R51 z%iO&EEW~52doMBU=E6d%07@sjKxyH6geg3CA1jO208tl{#*rXG8)E@W2*RGE$iBl= z-#aFAyi=aH>}lMC54$T^K>86rUap+@y7lfQ{bEt>ja17I>d@Mrs|w5KYJ0cQr$Qkt zAT^%&I!;Gm-GmCA5F`=(4wwt9ClN+{>v{K?wis;~&Nz=sOaFjS8_pfqM<{|yiqEDHY1K?L{V>;w?Ig0roSo#YAR@Y z02Tt%I{WH2d$#B?s}P8wC3~kwNJSdHDw_QR_b+3v+uN3x4x`yz-=%ST^&9ik)maH~ z;jsrRkMV$YB@rOun{0u06VW{o6SEf&Ca9JuKB|BerQJ7P)pX3!RY+uiF{Uj-r)FEA zYO0HaveeB)?&|}&bp)mr$b#Gc?s+2+5Ud=lc1vwTyPz}XJF z9pnrL$oPG?9SJHh7<`O7)iLqzgJ6+FeB-;1Qnb{0mu@7V@p+V>jb%@r(NfxEL0x1x z+C*#CyQpFEgWgFMm0;sx$-mDD@e z%Le31PKG=?v=VF~X%MC^q%W1fdwTMtfy2T!LE9brUOgRp5UX*6$W!0idIWtF*nYp0 z>EWiMKl$BRW+&f~%!#YCUxsJA`A2nvvPQYT5d6=*>LuI`UmW(7~gacX7zjtl{yBt!DDKAR~29Qw}8yTr3J+ z2exYVYGz8AcVJ9R=m(c|j#}7kz97zN-O0I}%!)pj^&L*XTr!eJ*^G!))`4@~eGND& zUjlFi};0h;jBn^oLiapTeG&$}{F7VWvy zpaXtX$oyW4iv4(k50tfT8m4#9`0gdIx~CmGtdEe)dc1FqC+>7t!Es?0<_|55p=sX~ zP0y$b2zzn{B9rPhG8lSaS1j1+S{51YE_42q;duAW^?^O`g9*uy48dysFCY9arsgZF6&cAQ_Ve%{KQ;I?Jm za;6CqQ79W`vvY37<QmNfjkuLC$)Pi4YOZ;GXFFTUqA{FkhB@xsi`73WF^qe-n@|0#EcKtT;tPA^s zh2D$%ge(DCISb((H;Z}^o3GDVGXeIn$62+2mDs6WAWoV&*vpbL4R9H46lvoXC`d)u zaiMatuOo$}-hGktvVu5iDESZOtlKv7Qbhg)Gv%9C+qi;{T)nAG>Rx^5ZG&xrv-9y~ za1o9(QYa*-)_3UzB4r?X2RAGM8P538KX<@zc=AYzqK!c1bg&KA+B{2A&uEJfR^bn< zLfa&dSe@~=jhji1QsO)>Hc~}C=E1u;bfBFcNz|f*01eaiE90SKAl^2iN0G`xKbNh- zbM?0mdgPY;G6jWiFIRhP%C>jobCuH<6i#Gj^1QMRt%}JVDfkT$#BcamobTj)CsD{b z!PBVl5nX<==!S9gkok^ZPh$OdzWv_CwJ^`}wJ6%~P7xOOQ*San;%)25(b25zGYr+v z2sjX>t(7ERu4=mRw2@G^d>{R9pGUxLMgn!$%m?Ofy1kJ^3gdEIgtYSi!K=E8ry=&c z1!gSj^!B-9;lhf{l!aOc#;{YgbZ;Bba_v|37H@pT(sA|dYfVX zrs_8HX@|Ap;DPotqUe8oXqc{k5JkI9+ZPe0aRll6w`pI8Q&5md zd>6_BsCZfx-Tm!|vc_8K|HU+YYX@pSp+F`S#!&9FKTbTk9ngclEBeUMlS0g05Au}! zZd8gRglrv);Qwa$c~fiDQA#3zyR}f@%TFQz|Awx*yCzk65g z|Br+S`HELbNtFL7m;8p#IRs97hLQ-K5=(5%=}$7%``t{NbB%F44>{Q$M4{$A@JK*I zxl>Rkkd*zDFu4XXpO97(jvhZsFOAj`uIJ;uFSZb5bpm_`;9d)W`r%)m1I(JY^ik+-+Y4l4w&^tz9d1NQ9w0codOtXN zuIbn`amzlSp_~+tA_%#1SEr?U;djWwt0Yd=Jo{G zO5Vp)!gnn<9S&#}d+yJRm~j|vX?_wx4EleIw`mBYd*|`C%5gIX2POa{_-`^8iehF- z^j?R}gatQTatKZcY)(jMLV&f|9wrf)s!F6yhH}-FjpXPjB8VTpWb=t$WzbgXgvpA0 z9w&I5$ZPz6MQyIA9CzM52JnR2Fr1czHylhTCQN67IOCUlOMX-ngPSf40IwRp*FD916%3wQ62e7u(objizny;$Sl2UuY#PnD| z`QxeJmJcpOM?Y~;b>!ji)3V~ohBt0dUTy3#gm4GrNnlM>J#A{s<`ks%V(N?(1PY~_ z#M;40pj1rl&U>9~So(5U%=1wE;2wJ&P6<3?7B%{3vN;;cIy3rSd*eA21k++-lc}il zFWpckvAzs)l;4y7u192N5=cy%uD@4Z!qm|EytavHr<_hk-)ltUTp}7z9H3Q zbzEERV9Pc=uQD}7=3URS4tYL$yCGCWw#B&hbwDc<|2GlNqsXu{K>?6Cobl*+TUQUx z2$6$4k~01CiC3R&SbfyaiG2UA)|1+s4uW&l!(5^CTLo=}i4UB*IB>DOc7U4X_|cYH z+bP)2xvapKz575H&6fS{N{Jg@n2ySpF8;EKuuj`sO}r8bu=ktw(e$(aWK9*j4(F=h zSEVZ1e(_Kl-Ybsngnr7Kem5&f*wZF#5es&-WTOo=ojFKcXWdMQ_{9 zx{gHQb9Q#@_ABdAD_^wAZ~t1}<#~YnydPHa-6DwXZOPcAo|(mugIb**ccS)FL@9U5 z=wwpWa>_4LYnrP7NJ?{88H|6AGIYjF?A(un5|mPf1db|0f$FgJ)3Z+5H}zU^n9)x{aZ)Er(ZInTE<2FMPLr6cC^hgyWN*ZNw~i*2UgW zeE8znJ!HIRtFGotnyUeCf(A&qI$*|GBvxmn4Sv4;lM(c1Bv~29(AG z-W2=o;&3qZ{`i*}%A)61UOb=Kqs)KCtYvS3Os;KP8!d_wN61%?3HgVsT^e1RU%R5y zP*k^zMTV<;kbyLLlPVx4IJn^&dRTasschOPGyKBy5>#PiD0pz-+Qzu)(lZ&~!Gr*% zc`rpZ&)Q7tz3M(gvma~g^}cb_Br7XNn*GWBWE<)wDsPNCvqe}7uy~7S@G{5+cGmPw z%Ay|hy-hA7^{uO3?BtdMxi0cXh9vuhZM%8$REx@WM`%M&s)gjk>H6YhbFC zzSnNj^9>yQw)Ox+1hdZZA3QZE^mWQK@G)8F$$AY=%fN(ye)EunTyG>4-3&~lO%j;6 z9ahQ@I_hl6c|uvN=9%1oG17JQgX!(*3C3s2y9_o5re3r7bD>4W5%>Hzyx3cE^B1*j zb&=oMs;3DZf#CY8sM$-Rt`rcF<-D}<);WW|Rj8#HCeNmwMNtJ(S1 zlPxn_Ko0qD3ZC#;GoyVNT!ET%9OR@@Ua|J3nzL2xJGv%_b^;LVL>W3=x*Y_IWzzCi z2d!*C^LAykK;EHcNRmv1C>)6z5Nu|=Kw z<|rsmZy0-U9RLdkPM(gdv?-q=hd{0Z*FU~Ua`uEE>BD-xbuvB-W3JXV_s%(_N~5^vTAj_UVcbs8k8qizv*3_< zqPLpBYkwn&5#=*`NELmCHwd6ZW3NMf(it_s0DUit|EyKR810Ppb(!Y`g%4vn*kmbZ zky5xNCd7gSwVmcOuFpNVvnPJ$LC8N!Y>ECSaae=#J&(A#lYZFTRF~9 zTJnk9+~H#$h)SQC?1C#Vo|)v$#jVu-y%qM@m^-5{9<)u6M2pz2`#efx&t`v?S9tIK zo%AnM^%C7&p>kbLEovR(6t!6{k(z7x9 zR?4h3TJzs0$nxdvv8KAYXfbPNL9tJ)y&^lq?irc88ViJ145|#iJAdA0RY9FiO{HvI*G!{PXA%9@DBTV@hFiG)tK>!qUv zUw>4YCBf?UfA^lC)YJ9}VxemXlDy9AChB|50Bl=ttm&`1t?EM~Ifd`p+uxGgCuS#K zxO%)CWqtd7K3MbW!sUvlYNfX1$is)vJq&mg@aM~HxxrimM8FPQ2se%ew{!2@)^cUv zPwrW!uUFHzi+9~I;BD{QkTuZLA{K1CL96;tGzo|a^?JWiX{`0CWu77Dy@Ixq1n>kpWSa^V32ca4SoNvnGeZy= zERH^SMLaN24}nL8x*z)BKePUfuU=jGL&QJ}decp_66+G4PI{P&EhZiTaJ&v;;jyEqYSH7QT)za&bBy_DeB29XG zDKg7Q7{&(F+ieUuCP7JpQ1(eq!S%@Zy;Efoeo;MfmGfT(VOyVR?0V5vZeM?XuQa4A z&&&JBk1AX8!?XfmGq7QjcWRRSg*$4B#+SZq%HrYr>T@xN#?;B=&^q$$)tW2)-{*{1 z=1t6RslQ=nk&(_Pb(5|6@Y}vZq-VLdlLEvov`#W#tzYMqm-($28^-m^LPw!TBE779 z4ZB0gSI0WZTgp1+e|OsFj3>&z;KO1Alapp$vXz;*3 zN^xJ7LoXq8o#V4&p}PTZ{QvGHS?r~Kfb9X^kQ8{X+ByHs&vIS}lfpC9^*5ib><->8 z(gl9VP|<96^t(eO4&5{A%AYI$#GpJe#IKZ*oJ%`Y4!vqcP=vV1rkZ)N-|xIY0R2Jd z=o48w5vu@t?Ci41SoRlZR+55vq^Tw63HkOzB;Z_D&3jp1tWtCRb8@q&qV*ZhHUC#Agmo?e=b8;C^dDdWEe+wXN4!XHojk8Bv^`iUh z_@-SR?NZX=gnvx0afOM;92KVWJ%Ozh+~^>x&o%7%4Hpc|t3-!NdcSy(AcvSfAzQ{s zC_#C7UMTGM9Bv6rO-8TE+?s}IQPv0uFQA6>ytvs6UL7ZS_p5fiu5;q0a>ZM4$`Pc+ zr4Q-j3m?Sa7`W4c%4OxjuH4yAC|UjoUcSQO2oCMrlkIfx&ok8& z+_`f2gx3*k=jCKeKhI*mJDxm&zoR-TLU{$#o(XqQ_2c#l>l^T8s7RpUU=-a@-}$)L z>Jnu-!!nRuz%;Z1-g=?%t|KNZfmlXEx;`0)?(k!;)oX8J`?32FVW;N*gr_&APM|Fcj;H+UBN8lOBNs`~G3e zmT!i_q>5ugbzj|mBr{8xEc>lO4f(qVk0;S{{SQut;rn22!+*E)tN&Nn3lrnaAW8E> z3zA?}z=GtN-s}-&OEvT+K!CuL^3mIde(bur2P)v_D&IBqZ`!d`_~;%J#QP33>Wljf zQQR41m}VyyZ1RllrboYXXuwN+$kvSvNQju;rl3lsw0NNOl<`)ILM$>Dk3Xf#mK-r* zR<^W7S7tXhiX?Rn;x5Z*3`klMOd{?q>}~+NG)&|4Ovb zBy2bI5I)RItD#y=6@Lz8t?O^UC?|c6w(6C24<5e+;0-+CIXo~M6hJGJ^%l`UDyd}wEt?toQ&aw<$I1Wwg0p%zxQ|#!-MyPdP193~`Z8^{UIV>Dj-qz# zD?(E1y-v4@?0a=y)Y&SXziZ3i)nQ1El<;+gJObJc)s(ocZ$EUbM;#}SVQDhln5`Se zP4VxK-dka11l%dO!>D{UE+d%%x$CfP`twVM7ub{h{=H{)wuf5YxMx(wG#5cbxS9Xb zUrDdz;&7uOm6t5He^Ws&onOC{Z*+!_rP|+@l&2%ay%V{Lr1PlZQ7U4qcYI()>@b`n zYPitBXhjeA^r7Fe2X?(WB6@SVH%B*Y;%~K|a9>;|iwh8vKuT)ocwtT+FA!23V>f*# zT1tX+wt~OFxGigR?C7Xor2;4WW`^&z}IK0<}v+Gb)dX_zXYvq)|}w5 z=(Cp5l4okw#LL?Er`{~dc-c6hVt=CKxvt~)zO9D~a`;K;fzF+b-b*km)vrwz$2~ju zr(F(tmgx3&sC(AY8mJs?-g!gyYEH~uH4>VqrS{I8MxvHontOrJtUWqbTfBjGpWDt3 z$9{m+v!R<^cfH|3gy>b#(Eaz@rZp0~1Z1)(dmM7tb3|B&JTAz|^^Wp=b*0T9**htw zsS&)dk%bMkg!LxQ3BOUC_Oqs^9e|Qj@f{Hjf5l;Y@NYm$h-m=HLCHk`cL{SLA0s>d zA`S8razgS`JBN*%1GnN(u#kMP_*gOg3hiY5<49QNoU%&dIOt)*pF}g193kRZZ58g4Us}%j>H}v+c zQjox=e&WHQy{uajc{fJl?*glrcHYnOg-YmXDGRgJ;GVn6$!#aL&Vl|GoR`pe$uqD| zrPuM>e!%A5!0Y@*AG@gCo|w2rQ{uMe3H2}TQ{SupUe|*oih<3=8Nq&lX#LO4cjod2*DuBm6N>Sfxru`cwI~~=e4RMeQ>`5d+I_EdaO6FK_ zs;c!El><$aT>dzK^~-@pxI!`_&wE3DrRXyBm;h~AM$GKkxQctXzxh^Nzw+-Ysf|CG zp@^wT=DX>_3>+_$2Wkjbysu<^1$ikv?)OpXXS555?BGOoMP%~d_pTnAxqr#Gz$ssk zi@fUP8uh-MyzCvO|Apq!D~bb9j3SdChtN_1r32jqN_4c+4uo^H6)+2bX4@h%XJyJ2 z84+x^kt#LyXKolBPC~^1cLP)F;QY;d9P_R0%q;$WzcTi)3j`}S$H>9)3Fk-Psl$zHh z^;{I5i-nlx=5xWrqGkrcW*R~nw-CD^msSPRiyA*omtQUKfBWcxbGrkx&av@-$$wB2 zDnh?qED6-8>@YQePkw6!VRXcQMn!sQy7|Aj1)=2`jfUO1tdBuT?yKQVd~%Z68z8F@RO#cmMg0}A(tyt@lIQeC=k|yXy1X!Lhv7TeCcc2XVL~)e%qZAnk zMzGw~tT{If)%g0e4HYnpN&EqP5w#Z(rJEmaw2V*KG2MDTQFiy4+rO{%b0Tn`H{>#% zhMS}f;l8s8|Dh(cKjOnThvtiGCOlX+Il5JN(Z=iV*Gsey%|o-Ggk^A0o+eJ319}JGlGYaPkfqHg&^QNdm0gJzf z151}GK$*<*>;eMm%wLvjgHNQhX(Q*TtgB>g>84vp&Kw)#wS`Ng4zPr(J2!=QxLT+& z9xP94oX*IkrEbPKdqa`GaVB6Q4LkNGPo*guL%DlTr{7Q#zL8S)>RcQAU?1qf6srcz zaVSATeNRg{qL&e*mZxe{{9dHJQoY>bm=O~XHsP(faQ2cAhktLrK0+;T|JE_KEy$%| zbMd6R|2B2!{wr^LDbOo1PDSGNlD00oXp5 z1Z0^w?_FkzjPOU}?~@^IQ{lVq_O$EfiDlgEuKsU67%-vk692AE*JIMNj$~@L0~9fI z6Z7v!@vNnE1QPahls7r=%?In$S+VK4N^47SG`5W_|j{m zcd}_8m!Gr^a1f$lEIFJS=Xqow=>!RP*w+7z<64JL**PC=Y^U)>FlP^4qaeYmBu|Mt zQi*z!M=@Sad^fKs{RfcK%rzy&`+FNRFQ}KK+bpTxuu}Odwi9aIauzm`A@hvFr5T>O z#?`Z03Wg?sH-n(9M&nO;&E}noI4qWgxY<%=wrfwOmFkIy_cJiC@1;0wSHu988Fwtv ztUIj0g?t%mj-;_DNY@yT>$VSHyuc=v=sjx{=K6h59F8^bVh-wC{*8I5JKC(Tz(r8& zWJ1wKuKU(&gCELhRIWTWBYE9F>tf(TPfqUR{X|F1zLaH$WZr`tL4GzG8i{7xjbjca zw5N3`T3pQS7U}+BZo8r5>8S~whg@>jhU$52eE~Zv)hLs=M>c6FkKTGX@-o|DZPz15 zw$iOtFWnyxz6o`Db(LxGGIh(V%lE%ejjiQZesEc!Zm)Z<-o zJYJ9x|N2IH4{xDc8T;wXf|16&hj*HqGYb1J)Fpp@7?gdmv-S0Uyo;H)BIXZp^Ih zNR$N;Y?;-eEb49cmDeA-L+yV4)I4s^lUb0Tc$QK#;mO+DVAa77 zye=zz)|dAWJ1f};zdw{U<*M0s*yHRpt^N1C-Vjj|Q+j9L53X{MhL8dk>(hZXQ%JQtT43t#&jfFIv>QPIP;wrMY$ zo0{n1F^S2jMU&rEDB%u^Td2KByFtfd&qGFBFc(JK5iar=!L0arGWD3cc~?SHKIPzLv=QJ6D`B7le$XTNYpjQs z+wbgxa6KX-LLsaC_x;Ny1M5o$GCc-*dlyM>@p+bc@d9g7`E$?T%kib9yv@zc#1!U{ z*TF1MV!-I0moy7|$Q^ll>Fgi~?eb&TNdBCZoY{h+Jy2xwH{>jEHHjjmpK^4p9}5EyH3e<~4cc zAEje8Ig;rk8@a`3z7Bh8eFo&81-zOE-+ zUm?^e;obW{BU@iAjRrS$x8%~p0&jqHw_z9{C@IN;)mx1T^@MjUu{ubEe>8r__S1{7 zm<@}L4iGr(S7BeU1;aPIR=>x+z+hjJ!sW}mF$FLLHigu<5kl{c`7=QnAdq%Hkgz`a zCXL03K{lJ>uK(OfnD7=C?DdB+o$$EDsF^|zSfR08lP)lz$(9(|PmIl&_Rku$|F)NK zkx~9JluZW5Gr_l9d;a-5cQrNmm;pyEdPxxKve?{yFGkV9d8J>Wxs?)>cqhJ1S?>*v z*WF?qI^EXUNl6O0ecO3X1D_c8l#!9C92^|%>TsmZq*!F%7pv)k`Qzg~S$xj>W7AVQ z?6`S&h?W2Do#FA!xSEwWt;C<{SQBM@(@hw2cKIK*rp+RGQo%BmOWHatJ!M$Ra82Iw z^LbDF$@q42#jxyKkK)6N62yD)d-3w27wNq-yZ^&RfM%TM%%TCE=?zum2JY{Tq)3H@x79k^~~^U zRh@tEp*O0B zD+uq^JVRoDu>e*RF>Ed+&NJG=x&98EAjj*NjmJrJ53Sf=V@7@P=5v8-jr64()6x=OD)%?>GUly}XRUzx3b z3GMp1l5JxUg$F?u+=QD>%FWdQ^-u_2Uy;}d+ht(e&WXX?eR+0KIyyS_;U^C601}T` zE(x6)G1AAtPu)R`d}HI}Y{H8Z+Op;FMh9!_FvQ4)H*X@+(nPMo9;)WTo~9Ykg%aat z_jE|!)G%oab7qaOZ+FVMd}YR@$ZPXmrz5#fG{(inNr0FchM8Ps`~cT=LKgJ8Ax$;; z;Z}wzqKd=~9a^2c^ZNaJBXBTB-ddbAF_FMae+iF^B|I6pvnpfZZG)Sb?o@T`oyq}E z>ir)-eQLm{K_PkhGoHUEP10-oWe;5mVdQVX_$w-JV$PzV)AyU>Q&Ys{oV0}#x-g0O zB4zu>CxMwKWoBwV>npvMqc6ts;{BAM;7iOTId=5u$?I7NA?k*Ph9`~`Z##bcI4dXT zHh6h`%W$}^ZbLe7U=3^G*VoYnO8qf5UviZvXFh8(p7h5gLOPPH`_JbAuHBO3;%h~h z#}K%b($dnl?%!|NMRL>jTqYkR7nYWmCXq0vmyC4o+&Ra6yho28*R{ooP7DpWcMm;W zi1455i9)O#8H0&L76sb4tF5}L&<1T478a4gJH+e*B^#S(+Pg9HiYr7l zSr`)OuC6XjVzoU{_qChoK;iW9fTIM-J1B^bcc0@w^OlB|mgFXzpI@I}h!LE^77O6^OA5<73zJ#D+(_+s2~w3d-DivZslhbc*Zjca9P?2vz@X@w6JeKGICj}^mnj`NiD zdHD7I*kO1M6Op7G#a{FPeqpLeijH?Cv z`K)}&*}8Uiq7##oy7P$l5U)pTU!FYs zVw1^WMy>gaiVJ%Vjqh84!#mNytAQJU=`Jq(A>8PP_m(d)GEh@si5=L#hJ<2nKFjND z-bL_-Q=dH}n|-}f65~F00iV+eDagwkhB+khV4;IIDx$+MhPxPO%(pu0-`Hh(qIJ-~ z!D=e=X%=sA=;s+oxVLZLell~F?D2Jmpy1$3@Yq&XQQ0w8O}OL3(2*D{L27NuI(LC2 zb5wa3d;$q-H&nfcH8gJ9dSEE%?9ZR7W@hm(Gm4A3abm-?nwo?}XcTcJEnKPYSWaRQ z{y)8`#Kfl-e3{sUKIq>+DJ?y74a-|__wG|m<=e*i4tcIzY+5Cja6o7%H3_7980=}t zEY#MjV%SsKx1OG!#OKd7p7j+t@S!DuuZf63!@GLeak~s63z42;Qox|a()4He@XvnJ zj$1|ogP$a|{^yR<%b6-Tn+KtloP%c|kD{Vt+NBc*sSOMaQlCH9iDjy;uD%-^8-gWE zKKpeg=JQ0!$<&4BSh>>}cKX^Xrvbj$k##tBBV%B)e-y)9F(2dv+y}Oz&3*6DqvyA; ziup>xRCC6@Ct~inN{}FFixL3Su}QroMZ0m3T7X$`Bw5{ux=zI z#ZBP%*Yxz9M(&l?iE#zSIGI1fbSAQ~QSE`$IkhNtUjzz6Os)HbmyckYV#ty3>Xk9p zgAfWFM68c!dUIgdW|RcQ?#rJwko2Csf1{zPDST+i)S%e;nfB3FAb_ z_{*2OvWUMmZpti+oW}r9PFzm;;%kYIa6x=&+m&W)ME+5;?nz#r?qiQ{$2KB+!Zt{6 z35=bwzA|RvSP*x^Xuyp#JHUM`CG!PYCqL=zK^)bQG57CtT*G9qWaI}Ular!^dk|(q z8t29BKHpN0!L_e1>``}g%oa0vsm#0HKkaF1>QRh%6H-moGPkhM9ZZxc$|L*nV+9d( z@Eo)r=28!~9z1C5iNB`T#%iXN!jF`dn_EOU{22T@l8TEBbuTY{JJ(QOPn?!7^u?sy ze_pV$wbkw-PGxj`h4GU}$Sk2gJ@9zS9x3Cw;i_A#?Ce5t(PZV}5yh!`vaHu#do>{; zLELK)29`Sn%O1|hUj1-4>-v!u$T4yY#1} zQ@0^y>2Vf}y<$6_;80dB}pfW}Tfnmgmn~U_LbrR8OApKR;X29eF7*U^)nQd#x0|D={O35!soF zv$He_c_TULjn(e2E^<{>RT0aW!d3Ofd4$PpkpjN;Bz{%^`8NP&e=x+sMW@Hy>91p*u&1tCmM8 z2`_n^KWmlctMd{B41txTfZ{)D9+fJWoQoKCwafcn1OhiV_+B0e#ogNYXr@$S-le4H ziYvj!ff$xh`QTB#d;eZHkFXMhFBuCfD|ztiw^Ij2MMT=(z9rA;gJH^$FB4HN)YsQ< zZdM{LW2dR$H&-0O|IG1*4N6Q5H}biE=6qmdOS;aQ>geeuASx4<`PIV;Ncv&S4}K0C z-sLFVeDDm3qq!wkF6AqZ4R%a?lqaSnudH}HH_W5Mdl?&CW6_GML#gv|4h6%{nZY}F zHVH3Yu;9WMKS_8664Rtn!=g$F3=Fi!ikTsYd?I^}Dc@KGGYUaA9 zNlEI~M%9KNq5ZcHq*X!0);Ig1OgAN%-fOil%a=lBv)C0;mng AaR2}S literal 0 HcmV?d00001 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..80446db70391b9c15eb61d4e22eae6328d3c2d5e GIT binary patch literal 41519 zcmdqJcRbha`#%1(RT3p6E8S_>Bzq+dtITB2l)cF=8QCRCh?FQ<*_*7Ske!#k_sHJA z^WuJge(&$&^UwG1@BQe0)XVERu6drvah%8H*&W3jM-S5-#xU&Yt((`BF$~`u!|(=3 ziQy;wzk5dDFJT864F?tL`wq?qb|#pDfrE{OwS&b&LuMxvJNt*$R(za1+?;~!%nuwK zZ0v=(xGewY3QlV~Q?5!a$3}R{A)A}`>@n<^0s0>=MI!kjhP{iqbxlguC3dFQ*+o@t zlVo8&uHKlnbnWRsgrudueQF;rB_<}O4|LOHrkl)Ko^0su2%NiZ!@+h*0Z&RQkhw)} zg77%$WT`3d=#vHPyKSdCw}OEg80j5WrIn0CiG_t`hP5h_v1P*74*Dn*?!pC?BD z`khpRg$D!#qy!mYH?Lj0)>gxf{lmh-GG$1M9f$87dC0J5@O=?2u{V5uO^1v?624C4 z^uuFD52C61f9dw`R^>aZt5(uMaYFXQDj(&g?%bgul&Y9&PsJ7Anq^^QJMx5iKyPpJ zuA9K>&h}J81y^cTFLEW25l9HNQm9A5~u-tmv z=y#&bY;P>N5%hX}q3W23C56V_yChs(Yo%!?PMlDJfBRk?%rnY67joT4&uiyo5DgbG zHq%>7qo$_jvN0OP^XTu0lU3Z-7p;8q=g*(hio3hG3DnFs#!G~K{(R|YyaZELR#r+{ zT20_dHg59{cJ_N;3DF3|E$x`*9{4P76@K_Y z$W^w^g5}&A*!juVG{CgC?vD$T3t64T`I|Jnl1fs(r)?T=52p38>Dx!dy1Kd=S%yTg zfrEzY!sIfFvtxH+``qSBsmHyPs6?zV4OLY_jFg<*s^2dh*VfjSBP?PvE8R)=gt=hn zbz}4N%7X_FY&Ocg_I8|A)zl2eenw;7-@aYN8a))v%*_XE_-a8;Wt(>xxV%Zh*0 z_>5}FF8Y{fKKRY@At`BaHoxDA7Au2gPAgzVZ~S$P{O!Zmj|M~4&(J_DG7=|Hsp3HSv zwHJ0>eh|zk)@_UnWe{O6_HK$3@jq=jNniQq?nO42RK?G%cyIT{T8BFdT z9+bzAKQXBC8-%6zJU!#{=Wo)-#GZcm@Xv)v*=yIY55d-|eoR0@FP`E}h$qM0&x{;q8s__?8u}l7%PBgyO4RMihjVhU~9XXT`jBMXA{}yxZDT*VosX+1bei zdTe~^X%juRR;4Hi-4|&p zp=G+uc${(^-G%Et;)-UwriOfQDPUL5pi!uP&K*EN8%E|<~Nq{GL@r=X<`z#||Y zC|&EbpWG#qsz{UxT3G4SJIl|1@>1mChYufim3b7`d-~Z@*M%|Fapd+=m^{}j=axD$^dw-!x3`y?ZpzAjgC`qhd3ktT=G!bSJ?``I^4dVCJ1ZziD|je5Q$x@f9`WskDu}pz4>NLkto)XmAldyc_wt(DGOO}C>ZB_qS3CT>-(yYZ!-o%&pHzp| z7l$$Li1W7%8)Jn?j~?}vP5K6zX<;}_LX*C(&))2ghDLzMe8~wVC8ax`RB10i2(NL@ ze!n`?l|@kHf6QMt=_(BSEB1RsEyMh4qfvT86TjcPj7Hx5q8lF@YjJ!VFQjQ_=1=xJ z^;Fxr9-*kYruL>!S=MD6wU-Gr?%eU_F{(|-%cDLfx`2KA_HChtMo+)v%RR_@Og6K* zo%ope)(oHMLIuHVKGV=;DG9p~Qm6U85G|`hBFqMQ0oQq=z0;g;t@rZfOZ~b~2ADfS zKXL}MY$fK{By@r<3VPAZ?SL~G^Zbf`JG4I%^<3BKF<-40~C<-4eBh)(Env-`2AUcp>v4XbvBF=NwKF7o-ltuC^aeku> zQNxc(7`>H0Ro`Ipc!~mg(_1m>a%Hk!)r_*uOV^YLv68<(*j!fsn4)DolwLe{RY{4m z&uh;^{xvrq$uUL?rH-|aF;-XO#XT0f%`*&oiyYU-CH9^*H{U_&GfK}>NY`t}(S2v7 z69oqz_jMb%vvO|HL73sAVqD{$1ADHRzxLOlQGL zht}`rW^e2A9cqZK1Rd!*=9CrNt2pcRx#D@0o}T*qpA`~1<+ahEw@`c9Iw2>A3btWO znZ(ZWiA!v3i&5prs5(s}h z&U4z^qg-B6s;X4Wu44Cpz42ubaT<8*HYWug41fDJ)Vc2wN%HOZE71=|e@gPPtJTRXD%Qd`wgVM-+c0ZV#>O8UJcXTRXw!?PE|Jj-QgD?n zAAc!(j+d7=(|%kLlSI?pBF8mYIR3UuY^}#OpsE*rdMgwrl zAYd+%?d|OaK&J+)Gk*YJ`BLc#x++#xRRNmxhh2N->{)^!YL2URcDw*3+W-oAY z<*zq>?RX1_T0WYW2&KJ2Uz&t=vqea1#oLVBDF7}`+!lSD{MPlyCj;u*ze`Ja3BSbS z$1c+yE!&zC(cj(PNUVf?{SQ6`!ebA$)YKwh$HuZlKz}A2r${BER5JF)yc+PqvsFSO zqGv~FICF}___Ix$s7p&rYv6BLoSnUW4J-!RZ%+@k2soT5eaK zc_%0Q4CeW6_v%Q*?ZCsnSJ^o@BJ)1AoAC$=YLoyD`CtjlIhppviL=x+G%@t{b3KKI zckYBccs1tyqO7uqe0Ns2#6_vQ%x%pIo&}g&4oA(YeE~mHuoj}o1!7|2K+Uu~HA_5Z ze2TSM+<8k8uicUq2q)$lrRv?QFdjizH2|(ZBdb7j616#L^d60#jF_FAoJ@?0imG3Q ztrPU@*)!_3J-Ak@rwP{7S<6* zF`Iq=*YSuKFRqp9a#=zsj3t%q`DzlBV>#K9#Ddw*_m#5d#|zpfl1gsFh{@-90cl8- zXXO(j2}r!)`M3T&Tr}Pomw2IQRrpkDYU+vBmX_+SE)5UD+R%YnoQJV8>p~rpcmq1G%WKG$O-|=3%7cBuGzOUJ;`eXm> zECjubszyRFR0#v1@3W@TnJj})<`7hI+1*7vSARmkWJ0u)FI8~386 z{g{vkWIgE4T|JL;I-SkU%%O~8n#*fzG;C~a40)!o1rueHcpq7XGD&d$7`037e;z8j zIq}}m&hG2-I*@^yXg*Wn5}jMOp3$B8HPaDRqUMci21hd zZ?PQC$j}f4T?7Xp{W@&A)LPoM>B9P(+@lL2K6maMfD{q{X9`7W)Bt$#qW(qWF+gp)#0!Z91sa<%5Y^zn4@SbRS4@As6+3NfYuhqPdG@;g z$oJADqF>^SXbo?b|NTEHr2!m1eZfjno1cHwc? zQbvJ3m-qEc)C>$EKu8T6UU6`kwdy$cp4-vmj56xVG@y{7M4LND3_APi(9z*ojVmQoWJ9KS%7T0O4m7!b8Z8vfgCF_>FmKJ4%(oz0ZD6;EF z)A9#i5Ydz&CYy8|g=d94kG-9>iPq#}u4{9af4B8KH%2jLu{{otIK+t{I)2f}y1E%T z^CJsV=@pBv0lPIW{Y_0^pYI9^Xo1C|(60Da^x@YG;90n^Y6l82{xPg8705Fin z9o-Y~?2QwP68zgNeQXe94#0Tsk0)D`rITAB$5(EU^cZig&bk26Lij{B<9#uQc0u#V zUs#pLFvU1Gi>%d+OBXLbFC!qkHwcJdGNUtpLi~#dv38bW&0#XS%Qt`Mp0d9$swdaq zR9i|$TsvR<<_#Sc4UN&y*E}$UfN&gKe%42vk6ETb`S9`M$DGb`_#u&`WIBH&hlQqr z233LLa=~L2puUo_GD31gKco4+*Vp3>^w<}9?zke_1fls=y2=GAmx0G5C_!7|^8Q$c zDJEAzvg2`EwLtK@yD#}R(iTygDjolb+yQwM2p{_jp+ z8+=FLd(}Xgcwu{E2@p+gSK;k=@s<_IVRk;i8o{(Y_@MPvfAc+(lv7-J+PdWN`x{{_ zXU^cAy&ha~gsWs=Aq23)Q(zo1mLLu&h7jM(2-~jZ=TJlWX#H zOI?NdS06xjo@h^{`FJ}%bbZ~W$F`1vGi=YMEfn7V@NcLTZ14I}KGQ}?Dw*Y(KdnQB zB0!;*OA3FiOOs>3jet8)UtELL_cAh4Nc1g)2%&eFLJe(EZA8UZ9$^tL=hK7dNiLb_kxCc8u z+hxveWr`?a!vpuC^I`pqa~ees7ZJ=2rsMxHA|5B!0XP(Df+<=T#L*oJ>;ql|g+4JzYAN9rwMNK#ekBpDUY)6wgm%6Rhf{wvs-6#4* zrUxn^0_sW z-RPr-4&gO#Yxg^}D$t5Ju>*4Aye1=K0BIXxVeP}jX|S+uA&jakym(l@^{1iX2#^qJ z9G7;1H|Pfp2o@KNK#Ic8bobIS+I1)BXnNSlEfHm6Yz%`hx;2wc9gkolqvTRyP4xIO ztgQ(&k?R6W8a3)}fq{W{?%p+BDgL3kJ@_3EKmc%`0pP37lTK^>UJ|g$y-_C+(`#76 z9fv+@WZypuIKTVry$aBkDq(jZ(afSN6OT>(O9f&b<8R#*-_*Lp_64B0sF7=S5&{WL z2~Zaoh(lR+qq6ex;=O6dzBA00f|`@%FsY2dF|C+uj;gjR^qu&sw_Bv1hEAgicKcJwL;^ge~;LkY`P64)USONF8!2znG0 z6j+?%xausw>03Ed80%U?93&`(`!{jZuNGEUYuoSYg&-^dIU7ltFV4v=%w#teEf40j zlk4f}1$~JweEE_pATeNM#Q5TLn^Kxa7Cy*$kn%52Qp3{xBv*F`Xba?&MWFG+9a`2k zP{mn<4gY3l33jzqntJ|bof;@9$~tOKNY zcG(YtM$;W>s7&x9l1+hi-T40fB6s#1u|iRhrL@TMZC>JW{~U8*jZuMAc2VUc2x~?l zDFr-xmenef>e^ai(R2>e##o+14?x69=$uZn4-=VhcrDLbb@HzEV`u_C%<2_g zj}MvQH#OCwkuX2RM#28TGLka%T z(%3CrFVBZM1~Ta)s>8lj^Vdw@wpLePL&}_MY2QDzdDN*ICnzcL0G6fpt91+=bK}%5 zz_6C53`U@_-UM0;y3<*OZ9?YuA&-f7{tMM7ZWlOsrLDjaBCQgL0PigRTOK1~h?YJ& z#ZI?yW@cs!kjLI^%kppuPR!1RlzD7jrIG=B2SGe2Le<##A44Ofv{x^q*77Z2E05F`&0^4Es#0RE=W*LKJzq}?(AXKS)Ev`(qk`NG+q5YHPzF}8gUH#hq z)Avz3L{bsmY!~=yE0DrxY2N`-Taqo6q)~?npmkQM$M0&xH|l?tFz`mp|4{CB`}O5d zu6b0wM)J&z#g}{8s32v#p1NH$o$jN}Sr^1r_F&O9AhByq=3(zG&M2s#M6BIWS62^u z#a;rm#AbWlRuqJ5XgRBdpm(i2(&;oseSQsqU~)}SksL{7L6H5Y?x?5`RBUwKN^V7I z1!@KrPz?FvlrQA=<;fF#dO@26nci~0wYbH_({ywTurCYU*-hsfeivR#Qa1QIQqSt; z$Ej%EI}T8JZ)e^MD5TK~4uA+%fT0u`PxABgzlK4#v$M+tWR%*gje#_L-)=AczFJ5$ z$*SL?yx;y>0uiUpFy>wJ*TGnmY4>qZFal*JKSO=0ZD4oeFS;k z`qMiWuiK|(W70~?Wt1EWgYRlR3l6RZxh~VV{&*@|3$53A;icHd*E|Fp8yl?BSvi^< zD7%AfC#Tb&2NNP6CwwyXZpJHo!Gx^8HGs-=TrH4zrJkl~vPeTo`4~zE2JN5K3O1)w zlXi}tE0*6NJRO1U+)6eevAcL|d){lW3Igt81;H_92xWTii$uJ9 zdL^Tv{`!RV!LPFpW^pLbcvm^(=MF?09^yWiuYje3N z+4tw-_qLN&aSs7dE&Toa4Pr@xdg>y~Va|kP-cRB7)&qJGUlS?gKD_Yd8YL8U03Z#qmhE{ z>)cnYdLHoHU(c#ztOCH|jVfqJw`*x>xudBWh+tH_``<&b4vn*qYUNwyZhBrJ+-Qj6 zx&cfP6_9p!wr~tIosjMUi=au9fmni1KmhAJkUFH_oDI_G8Kmc-dKE05>k!+NK)Hv2 zI|y_fH31uc+v+w~NP^^bC^CkEf^+ub#X}O?3qEZkLqDCzA+`k|fPlbZrv76p$<^u3 zrlckNe5>9sq4Dkz+@YLi^7j!AfD@oN|IAEoZ4kX#BeuM2mtHOsYTVG?iRIKQqt{GR z|IF}EnfN-D%pz13ki=3AGQJX2;(!!Uz+N%`+5}sw8j3jqP_0Q+{zL_7u)2=XP5w(t z3WZEo7W)sQFfmqgpp9gtq!c^b+tpG&#Lt*KmY=JrygWuP$P8FS4G9_^9v(FfoW=6n zAa%;G+9r_&zh4bBG&&SszPplP=;Bf^F+b0RE*5?C_d`vNmk-dNo9F02&NNu@JaYJO zweJy{z(a=)C0>0;@&5h$8YHP~Lk(QVEaq%&Q;n8&={eSa{t1vxQl^*iERi`yoTknO z0d2?d+O?;kn_ceRfo(#OEFaBs{rdGlB-y}lRRgKKd#Qbp9UB5(SOW^bEQ~`kgoK(i z(tfXBvQpNJDHG<&3L@f8FJKirF+P5Nb7SKfP#0<%(EZ*DJIZ#~Me=fwt?_y7y8mu! zY8vu9uNeD4`Bk0zDO&UVTy@>Sz1{h}599Bg=lh0QlH@F38Q7>^slTIHRUrE8Lg3Kv z_cuTQ0?C5i$|!>@IPI_W(W6I)rZdVpVOnpiUWzmXUibRdE7r30p&;scxf>Y~gISte zd|dYiCNu!gP@4XHZIJtGETcG=mm4HJsv9A6=Qh{Z1E6$21cS{PY0Km3)$M9!5$VY7 zHLEQa^?Iv+Z}&WG$$P*kG+k;%mv8pRz z9-jf~L=bu#0Y3izAu8i`lruSGP(8HZPCT2w2X|Lo12?~5DHHMa{5x{2XfL7l%T2$3Iu=S za9Y7(VKn1S@z%=tKQ&L!l*aEtDT#-JLt%M!^~xhpi#WGySy@l(fknzdUf>ZG)h>Yu zm!!;DJI-~tQKGTT^eDXtSG<^;daQu;TUrJNj!RUBc{n*0OQt@jR-*!}u>=4qNaU0D zW3#gyuq z@My~e zNuxDIs0CAV=>`M*&T+O6(Da=*{Y%K9CH*vj^Akw)?knwg3!wPPq5c9DEC9c?KJ<}D zur29?CtKuRp;fw3wZ``d|2`)6S}{30CC0R|zrVaDNlDv4<8v)FAi+;PM10vGyBp(a zoSJi19Y1c;+M6EbF>?acglI+pKnYY?AojZEQ2(gK`FGMXbXxd}&-5I9canR0G~TlQ z%E}5P8UJ1nu*=M~e0le(8}e@Nw2_ffuX6X}fB?~;^By!hvQUtBE{ILz%k|W;y%=v! z?0w*)I{z5?uN-;k3AzpgPbe%bglfh_8y`2=t%Oe~Q7NgZ`GU?I!wDHm%`|G^xb)JPt5(cW4TGbj?MZd( zLf??lt0)J6{K)9&yS%(S^!>>6bkOV9v|#JVcUBQSj-E926{J5%WM`rpC{Le0Vr^}G zKSWqii3%0;5))Z~eI%MTz17L<$`(V`vXln12M@e;Gj3eHiVuzrT(QsytGKxMoeNZg z;IyElqf1c!gnUo_XLw+=9{r?n6n9sc9nO}9zW!@gn_|&*86+ZO6zxjFA^*TJvE@6( z`93{Cs6zoo0iZZgfmBg0mw&I&?i|%&nbWxe-A7Pc6BVKG0#F|?AuxpgB%Gh0U!GEG zidfc>E@=}g^+9bVM>hc(OpfCmne`Gs9b1CJq)U=Mz-mO`ZiaZF_u z6+Em-bSXH;T3PuxqsOw6J3vPlcvYOMd)w;1htZK6)|>D3_XrUtSwdAG$)NzpK43PW znO_M5r@~_hk0763?Pdjc$E#PwrG1{)TLfb+km3VoSJKe9V5tT2=Pr<&VaT2->FIvz zsWSTd2@PQp-Q@cViB;F`lodU7zt!5Tm8Yktl>{pmbg%LW^G&H?u(H`qr{xcUj$PgM zNd@6_RM@sgWFp5`({F#32D{Xo%>Eq@1egq)$p|V=Emy$O23a@}vWgxXP=Am614vlcS(p)kV`93RhwDyf42_oN>@ zxB!BHABxhsxw!=}gtqGo2FaoJpy?Ye4%MI{2k81CuveH&OiU=Az6ukY|C4U1xby_aAlTDht?K^DqE=iMyLB z=U@SVi^g5_Xx9wobB357}G56J^fr6fXW_8%<nR%9GORXT{{Xs2blEE*!t zi-c8Bndkdx5`rkNsWVt*{m%-=2kp~J#7eD{Sc0LE0Q-3>$UmRv1guEQcV%Ua(b3T$ zaruFN%F>70BOceqX(_bZqPp%X5)~z6Gk^hq3y+NS1#TEI4K8LUl$juk_{evx@B9X` z1{$~_l+7>@E>TerSwAYl(z*zazrknc-%2_;T>)DJhM{6Ak}rFDv_Ysi1Jemkgld)B zV&{50p~#A2HL@9l%Ly4I$QeZgDt$<_a>gMpg3^R}L)nRvH=3;9bBhDT4Vax_ARgej zi?kAW^w$^_sZj2-DjcPTVw64W!Z{m8?VkGjz5|sPvl*&BgbID|wLUn{ zpx7noIED1v=P@xsUB^8$yC<5l$c@4d*$U^7Gp{I+WpgvrGQ zxEN+qRPLz_We9>|2{~{TsFJ7!g|(rduQ4guuMQtMg3LNGvwVd4#9q4_$#QGbAG?TV9&3=k2?&_^ zQ|cywB-|NX{xKkb6(ATx0qm)`$7ari3#c1xpplF<#fQPpLgnWI5Ik*hmicF33ZO=2 z6gCJ_YaOZ^0BG1AX4*sM@CCrl_!!ICeJdr9`h*jMsM1ArOQ z%1oOW*>y`a=$6(1KIwx2aUJ}RzCe`WA>!Q8kLA;+sqh9sMnI-Az)s-5Iu6#DS`egW z`tI+W>nlf^+{07OQxwwDQ&L7BA|tDYx(4#=p;-l}K!&zHl$T0n>XslVfl{Fax(8^B zM&M{H7z$(q!zR34GDQ9)l+L=zxl!-Bex|hfo2E?%=j_!^@LjqK=ha8>P}hm2c>AH)r9%m;bEQ0 zHBytK+XWOXEOu{8qZ8Y*Y^gIn7>p8p2*w@`K>R%eg9K}>?&+Zu7JZ5ogdqFpJ1%K= z95S?n^z&CgF7ZfLtU-^|g9jm_P_WA0na8qU<9I~R6ON?mjHPl zJWNkv3nhB(?Os`vgKnbm31#*HD0n=9??F7XSsapy$yZ8k2Q$Ux4G8osQxAk?Xfm@v z;Xr=Reb%&dJ&e&(`I;RR-I1XHETK<8eElW26sTTu`Dz!aXkk%9IJTHu0_h7Jkykex8XK)FBM`meEL41sMT z+ytrxGUCIV&%Y z=Zy>#qc9SO`8SjM-&etp|N9tGGlA_G7d$Zc9I-c};z%sSSQa}z`1j3Z)8-opPWj^{ zkO;rhIG+?@ua; zQS8H##JuYNIn*@qxM24`t%5fE_xYy;lA(WK&+Ye3tnGQwsNhSTj=o>}@BE%xdRp?; z6rIDmll2u$@{DP)BHTPzGx3e!;fm|zp5A;}^MCcAg}=^gLFqd#;|yHYc992LmK63_ zVh``qSNpF%eHX-3%~zaO(fKSp1_H@rANW2KsAL=*+?uNaM0rx}Q_@~)ZjqD4As9)= z>oES!c`Cjwd+qz6^?3|>vRESihdqIbWmY46v{5vJUUff>ZjSqO4mIEuy3FUq z6P_~D0*C1RSuX^{wj_CXHRO6q{PC@(sCRi%{!^~|3c`r^Pt28@D;mR^Cj=?c4D_Ix zir_;B$r7(rK)Viw*fJR_?ESW*=7x3^QbOCQW>T>VGIWM4_bANQ1zt&_1R<<#>NOtf z-kJe6QTe9WJNmyq9rteLCO~J@tg5kQ>5ys4MwEwH%uYZrUovC7r;5FyKi_v~G1m)H z*vJzDYVLg!G$Ou#0qJ#}ii1(u;>MQm@VNVaTnK!Q?$Cp2+3St&agk^*W?EO^;XmKd(VI!!ZKn z-rn6_y1flGtmX%oQ5g~Hjuw4mu&pb=<{OOc@?biuhJ1m1LG)s-%g6J2@@j7}hwO2s zK77z$3mZ)-nFVTnlbbX_fq|$U;3h(51ogE-sCz>N&nMTsQx1}hv; z_XtZOBBFrM(7Z=6=i}Xv1G;F@b8^bl>b5*OjmVmk)+7~P!i7MMoClOB7XUat3NCPR zYd$dDS@e}a7c4#6`^1_;sSK)`7RuYg#5)SNO77L#c;&Wrs}9bj#QB5ZONs7D4g)1X#QNpF_28( zw7rMu=PhQ`CA24AtCHNQ+uwqP$6!a+3M0v&je39oJZQwe9(j%S@-w^y{}+d@+7pK# z3f2QlC$H}RdS{XK1mLio>cytZsIvi}5(TAu%D9=9}>aWlYXngoDZD>wHXj5a96Zg&kBqqvEzak~=+obi*nj$t zn%S61M|u#vIN@1uFlmklq5GlG83NIT0?bRn7^nV!-6{L|^oLu_bpLg=-Q(>2n2M%Uu<9R*gIjgbC!r%! zXkDRf?Xs?G1=eMRxoT;S>z}U z6rP6ZsM}wIBUWg2KBW0Kelw!sl-H-(pEMdYX_DS0lP3)iegH!5vATmrjc(IkQnvZ8 zcdT|h^)03%jH4LDA`5g3Fm zlq${wy@v`2Xr*9%D{^Ebl*(LhXlk;alzw>P;3dVmZWywDeM5mE69OpW%5WS*fP#@R z6v}g5lf!UMQ@=Su3OVa_iuRx^bcT!T_GVCUu*2l4Ug=N~^1+?SMkssTix^BYOfndrAhf|LsRtN_bggj!Q_GARkSuk5!{#hW~ zP95h#I0y6%UWb?zRsjSc{N>BG`)j99obb6VzCnnbec&1ihhuo?;1lS~SFE!^^8oSg zmKR86mL1{*kBqiSj^4!w+{PmCa1Db2kf?^pj~M1jHg$BA63g`3^F(fWOQ&)$Fc(0n z9pqR)VBg>rGP(3D~V=4!l?dLJAXJQi?#N)D{zi&EV zV=Mk(DdaFgVQ6T`IqA`Dq)qyU4OMKO7w5nf1J}J3aSNKm%@msq5E?!3!Vg&*(}Dr_N7)myULeC0@CK~nqe9FFb{(L9awh5^ z@DVEIF#H3Q^XJH4AiSXs@`KuS)E95Dye!VL-8%9`zU{eso9&Xg?kXZm#RV zl0@>;cm7?3TE$^_+rZujMn`>70t15q57=-qOk!`F4_@5Sm}ahrFL;n^GJ5&EK@qBg zgR>lYxYWANNZKNAp@ZF!)Jq_Vv~P`4SANA0i~e8{ zw9et_!|f1qv00S;#{!b!tHZIS#;k5i_YjMCHQwsR=tISd(D~INtfe+AU_|m@@=F4&t>U876{p zN&g2~5TlO=DJBk~hAJxBf0G_mzbZf6t5E*;$~M>i8>0=#nD=A(!RLR04oX2+TQ5j* z9eh5`gUw)X?&9wk00K0_0CVuk|N9BBjXnu;)W7-X7(oTOB-QD?QFcTt>_`vTnsY@+`!jB&zrwGUQp0D8*FCmoslANOAoaaigA*Z|k5Ag^d9(*Ac;@KE}C zt$_dVpls0N4fiGt9ArxbdgMP7T!?&Zu%Fs;8BvQa5mZI)L#Lx?Or!=5S|>s5&ES1k zg`t8tj?uNWt1%6+;m}ajyI?%k?}QJvd_wP`0B6R4o_dMkthHyfAUP^k0O3QuKE(YG zp3n&d@%9-Y3<};Gxym-a{T-BsaMh_AuU|>}UyQM^adkrvSH`G7B;_Qd6n=d%zE_#s z|7(MiZffPTk{@zyyztf3chtlPPW{5+c1evhe(>zrN-WbSoY&i_4&%i~%&rbyGR&#( z`x0;DfIdS58LlnSN-8chLmxU2^ZU|-e}8}1Yx+Z54N?fc?+-yrF1i`7HrxBpKFFIx z59gN0ACyIywoMbE2JQPL|DEAq=igmw7_Q;vh6_em<5;mCFj1wKZJL3l6@Hn;7v0kC zK}%%M2Bi{GLn0g}ha%;@nEY0?QX_l}J1ins2|IzZFtL@;8$?&Ixl_Q2Y3bzECa|u7 zLD%+=Nfe6pE?`Qsfjnt2TpME9qug0d_z9y`!2kX4qNsyhKET%Lz-#vXV=6|;!_tM2L+Q-=v}KZrUUHm|0H8hX3rh{l(}_h z4zk7MKR)7ONCDFg-LVoWiiG>Pf&9?3HwFzr)1 zSFqsF(CboCkL&Y*h5^ac{I!c$@n6Y#OZoCiAR*zge}yeXbpEl-ux!@-dIi#`(s;8; zu{Vr%$jWqQjKvlR$LItUsD}HJoMrFjR5EkCPJxMV74=R|OSCc?(LlfnJll!pGbpJ_ z8bwE;L<__Wqsw*%T`pp^937v9bK{~Q#S1?A`;YkM#93tja{(BJd%r<`F(&Y8JXf0K z_$$;ugS13!;Q0;4N+@)p12K3Q*jPU3Q(!M5BC_BYOCb7=W*Ro&;&(2`pJNJ^PJ9RFkMeb${&!Euz6^Nn# ziC;s4OftiQ6+MBju@_U!{HdWWN;m3Y$1g5gy(P7Kv#(3#Vh~Wzze!s-ouYhmum4dV zq3aUWyAT{)OnC|sp0P!p@-E4UhCkPbcY_&zeX4$?;@fP`ju=NY;!^G*MecZ#pTdpi z76>Q*&YK3N_06ezp8We``nk@1F-at0rGrA|A4I15&h2S@eO`e+oC>U4x}*>&6Em!d z^l9Tz2vXe4oZm<6UY8&5Bnyq?q*4FGyiqQCy&_O>l@>{a^43P9%VHA z5brx-9h+-O&Z;1}9=r8cEhZSN6Ka_pG~*x>@L)xwOknFp&bWrv zIlCWI;d=M1oy))daQ$+-N&2174!mZf1ulQw8~$p*hDpi6>Rv&_Mx=(@KQxX#S}->3|pUFec5T2IeG0}YaQxY9tI;Jh`4q+Zy%F~UN8Pe*+4J$S za5Zh8l0f009r9SN8zynoH+q^w+_{VCNw{)$1<`81LW?A(#X|hH< z$X-*g55C9hh@~$uOi!mBhkQdAT?TvJyVqRBEUaQRdqD{IA-wUm&Ci|om zrRhwNnPYlMU@4kbz}SV?(>pi3aVGN0S@}|hi~W)D_4|)|LHtK9X_dEWE8b%;YZDGS z#3Ee!Aa^-fJ=wp^x02+-Pl-qCP1<<5{p1VIRrpgb`uWY>{W27F*q!Hh+zm(8EnWlRiel-Qe4esmz_!`>v-z?q=o~8^n zc=plv-I{S^;@3Xc+V7fmUpMl;eRhrgVcmF~QBaR_M~^E|@UE(*eakJmQ)@2s5_-ha z_}t}Jdy@!G5A%xWz4$uavSc%2NEAAvs-38g_H{La3H5|exA>hGuhTlN(WOw?X$xHL zGgjxu+>6JbwA(s8@u@$DWpXt*`7Ovx@$ZV>4P-ht%2J$@r>2uzn}3;8W;k@wKT1=J z=#yfJ_KO!&#C0O@Z4gA9XZN_nJxEY;XdfBe5^}c6yITI${58allu)ae~aAzrrKk z7YBd_j94+Or{+yl65{pg@{;0HjJ&Ui&2b@2DPdm?I;?u;`wfFEkKX5^s%`wvBw7s@ zPNnxdBw{eQ?>dCze;t<@aK9DOx)sx}ZWdL#qv^1mr_!0g7yZ&UFJ*C&smb2z%ZKa9 zbsZ`)U9O+MC;c>#i}$C?rIui9tT}WZ2St&%6R-}(iq6;;oZ(=!i^jML$z-`2qVBMg zx;V&Qnxl=e`08<6{i1Ha&2hc$=R~lawh!ZaQ~9S}ahy(e<~Hh561XoDS8J6fspX%q zzSgX!8OJD*JU)2GsjxNwo+CF2Ow=7*^HJ*GG8qE>e2Z>R*B6p?FgDOsnEY&g=q{d!iVp3E8ic zcT=!FLIoG#lux2cT z2e)VW4yR_Vs6Kc26ERc(!+pxP-LmM%k>~s(yn$I$`s8)Z$06-hkaPajdsbL=f_~2Q z?IBSuRi9bYK+Bn4C+*kY%_7y$=M`+V7R0QpN}Ys|ow=nAXS!}SeUf~rV3e29_26R~ z8wc;)s-x5W`L{onRoexe`SM3D-Z6I+)Eg4~?$q9NV=3!=Hq|7%n6#pxmTJh5+1k&! zCQeJTZNZO`-yMPf;F-P(EVjBypH4?>^p!&_xgM`-qt(_MdHmM3HXu1P?^M6@u%4s$Vu<<2@`&oaMk*_N{BRXr{}YeY zx}tygw9m+PT0K-CxI+J{?sx2%-cD#47Rnzo-EiScZD_Eee{|+>VI-#ldlq%@ZlB+& zj#m!8o#qx{O%}~lS%VMCREC*EJ_l-s{L+l%x8l67*zxC*?Con&K!P|wni^x*S^e=b z2Up6Ynqqyu4s^HEWb2Y~O}QI12KPdUx4yC$>zKD`K0G97%FtkW?>bxZ&&w(%<}=+t z2!uF)k>=G#=f2YV)*B(@DHAugv3K0Hz!T;~r^y1W>5sXj7Z7u8JIG&}h*eYeQ8;hm{L4Li zOH{o3je*jg?vqd*yT`#}f2q8ig}Yew)%>sP29Yc2I;MPvdHB3{>u7~j`7<6DLJ0nJ zI-lP{j0A$gos8cx#ui+;Dnx3Se(v(`!H)?7?=vl*IGWYI)nsV`niE0$N9^aMhmS{z zrEWg%I<@g)(|McA`NwG*rfe#GYV%I2cDyg?aA-fOWbt#|LiE-A@^MF!#;8AJ`m*M9 z5NZo=q)BhL4vI#ppU$e&?aXBRVaCt#HevX4;wAnDukh;VlYRl;sR$|*t8ubQ56rK+ z&xY;r?HQ(ZPRQR+aQ!htTuN#ouJk}jAoGj5k3{3Egbzh{<1B&=6MP0LZ=))b49JQfr(Tl8Mm9N1d6#564ia+rJiEC-fATF zvEtNK1T0NS?US~!NRzBKmE;=1wft;ixzWT9eubKmin1l~lT-JHJAO*MET`wH@O!^; zM?m34^Tcw&9c3Ri60XG055w1zeh>;7jpEfke!_R2aY^1tuE73lgltSa8&OK~yJX1~bs z_~bxKTZz;ZP2P(=hA14$S5weTY~oY&!l>}1C2x9Ow$KZe$NwK;Uma9c_(gj`1Vlmv zq+7bXkuCuN>6C5|DM88rq?PVQK)OpnM7pFw5NRX@X%Kw-{(kSxym@clJjXv6$9p*U zobUVg-fOSDmSt1I%+rZuc|_3~J8^6QE*l5W#OBK#+JJ@U`{AZ%ezuHvW&L^lrLd8U zHdx0&J8fv9Mz_ZCGKJ3W$k$kc`<~ggk0};ocnLYTr4+X0eC$h8&FgPla%h*nmgf&jDC#zfL3w_QpyN*)uR2A*rm2G zma(;b?gxar#Ruc>#s7NT#|tE##bI?7X>-0s018BsXwe`7GR0dTdaX=q1}?r0jX)&f0<{*(1|gpL!BK zoX7;u3b9y>I^7ujzMOj%#S(jD#41tg2G&JvDh{YIWbVxst82@%WJ@(%IJMtZMCTM>P_?Z|3Abwi> z816X%3z!Fo-EHibX1zMNTl+UJYCkZXwJBau}!UYIRJ{)s0slJlrj> zYM_ec_RPen)2krXte8X+o-T(;=Y|PsD?U+m#>uct{+8odn*G_hdJp-#yD4!VJ^Lol z5}rK6maF4@4lIlbL&{9Ugy=#&>Mjf;-pX@1+JKqBDxDr@S4B?U#h4lfuTviC*BJaU zBQ=Z_xm26PY}r%asTCW9N(L>0Z@x$ov7BNwLvR~RPk#z9GnJR%oH5wiOR~J#TT{bs zZ1Q?|Nrg?vMW4r_>zbleG}*GoCRewxCnnTgt((Y2Oy_=G2|QFt?3dGDTP)-oYmOZoO{$5SK82Cy6{@ zr;+zEtCtF1a@DM?h2~r~A1R^BBbg#D1y4NWV8FX64zx=y{=l4b=2ep9JCRZes65iF z>6}H~D0$l}vVQ4g-k6Br&Z3r8kTIa_qFI7g)NkbNKTm-R-qoy;%cG>7=j#h7#Pz|BZvEg5za>5_< zPBB?*rCRjx$RQZFnK+0p?P-?sl3&VE1!U6d@1?4)jYgVoX+m7!M z=IoBD@352whc(edELIA+%(Hbh4Dec+=@@^FSyyZeh{k0yZas$#(V}g^l+&AOh>S48 z;58Y>`<~DXnwmy*_mFOwmoAQQe&WXs1r)8I{!P~=>`4cX?$gN6DX&=no{k)jRb^(( z5;qZfp0!h^FrI3<(2E_<{@&m&a3y3s3YW6?ys^bEdjHKuu0mN?+lQV0EH$QO#ln2L z3HHh|9mn@NS`ib!5D|M)j&hn`mm=2Q-uff0o!#GO+v{#HAk|czStYh9zfC$qbjhVM z7x7WGCnj^yb(p_7mD|AkKHfBrbGm)oi4rHjXpiF4+EBipqL0(Hv)D%tb@vpj2S&vh zzb&`AM6`ZxYBr6ik)QlUK=5l{xNK8%sG_LHmFafQQS6eh(8zVWzHWr<>>5ej-Mu6C zW%ZEMTpYvJhc7)IPVIRmnsQkjhIKRGRy>MuJj}lQ9L%ddW&h5|G-kpsFrPtGs3}Su zdgRsiR?kQj7>%-aSqRV~m2U~nPKhzz@s9Z&&E;inuJV)l?D@cZ3nq7qA+Da=`vV84 zXy?otnuQ%4^vz3Y62$B$VmcbZfL5sP5Ya}(U0Uefs~*_mu7jueAvKy-OwTwf_9Hs> zL#&rDXM1RT3lcOolYhMlt?5(^Q}}krx$+JB1s>fh5>lUiHSC^<&vs5?XCIX6IKDJ8 z!p4W(@=N9*?^!c<_qWA{xSO~)1}SaBH4O8lb(J|>NN0`1 z;7XW1S;pJ?Qo+X>1~G98ox194dM!){eqT{ts>ZjsTIob5&u1Qce&h*4PQS*+#=_JR zQdS8lJw5AfK$U$>gUJ}ks?#%qTGz1p^nrMwj#=CyH{It8jB{6l$4?C$@+bc=hwwN} zy?TH++kDCi6rJUg9~*9s_9=q0VX09)YSVl!-?k1Q&{S<{;7We?y|?kuK^6Y@^ULY+ z;Rn6A7Q0L0#X-*$SB4||I(B&DNrxh8kodh>jtl!O%g{2nbhs$g9oP?yHm{v`4{eg=mN(<1cFKZHGz+VejaS1d zNxPsNd5zP<&Ln)xZn*ZhCwKAiSpxT`1eK4I`fXfu>oapx4%?`Iw;pOx6`AJs0HXH& zZ$J@&H6n@2ELxGu2Msx1tPo8RhU|kv`@0LcyLU+K{T0lC!wloG3??V>hUU)lw8ZIK zQw|(3R9^1ZHp?7moUDNy@-(sn%s-!8Z&V1AN zzlh20s{0kyeKLl!t1CYu5$<0{bXH#q{2zee1_04-F4=yFmUoYSjW4HZnDJg22{X$p2p&AuK5>-PVsuO`Sk$_c;VC5xu@6Dn~bOD=L zi(Q7Z%v^QC|0ppdnV|>3i6%nq!=CF(WIq3JZdMmMf2D$70dy~ zu&ccg+{~1Mqq43R8d&{up6Tds_xa}TnH0dm*-PI4O5r_&? zvBlUQ6@?D=ciu0pR(ruYE_g%A2-}n0(Mgq+hx^*8u4_#9Bp^}PccD7maa8R41&Nfb z0^gOz%}=eVzq|HVcATCFa3)T|N!qzk${T^Gto?u@V(od)l1^UpYuG2&Hq9RP zGE!1ieFKbU=ss&H=N+vZ*(gKI{bemmGrXSMIOq183cje|Aa!eYv#D8kF zYP6N{E}8B_>)2DdDHNk5<={c1POPo`Qp=Gc^m6GU=iqFbE6QMzFL+XToKT9QN&n?b zw}fB!)^7(2;e8Z=Qvcsx|0{&FuQA{f@#S1)_I61qdztwK4>Sx@mAyjFORXJ$Y7*6v zF@M@o6p3!$#A4mfc{iYnif=1&vEX4dpNrQGOt^iW@5<1@p49J>Hv+CrGUVHe*M}UXG%A*|K^>WPI6=KRB} z?K&L8LZ;jGT`Lb<89E&bL$iRq7Ky^n$r++vD61X6N%jy%(|Si50<={AAAIoKU0)B7 z6;uGF$s>?SUYzQYj#@!2Q+Dn*^bj(dw1P$T;mFg)xlH=tg9)cl%&!oQ2d?&0HBA*> zSv@7&j}|*QNT1t|+sCo~&1GKpkS2n4x~$=q(v{@v{CaA=VkQBDQznmKG<$p{!X&EZ z0~g8U#9W$AI1*2fcHi1@BFe0{O^W#O!!~{J9_r_*$HDhZ`ld=Bj<}x%@BF&-Jq(j^ z5>9n>|Dx&`%4l_OTB>Z){IuyVp{{`>N{E4G@cmNx3K)}Q3X_9k-lNf;dc_Ku}*b?EQKm2~a0%(6m z7C;zBidlL1{9o*Mq->EmwA{zmK(G&&6wUXKe$N{Xvq36~(8uKEs6q(R!eN&9I9r`8)?3O8;&S`WAzXmxZo|O$@9Wx42R^uDm=v7u)lcNxDmK z2JTjwmgkAS9QI54Eo$^?4?h3^;ny|t0wiM`GpiTfrynne@$092e!xotG~mNg)GIwU zJ(Oq1=~Ko`jXEMtKOSCmv9b!^eyXC5gJCzfc$Kj?CK0^tS=)9T#7I$1>tIx zjrDUuwU9&Eu!X;u5vg(cjrjmkc?8Ve<2D&bzd{{%8G_zqVb2*0Ar>|%FB&Q7|04d2 zO-~$^V00}Ob|%*R_&)x2RU=Xfw`=niEBnv1D92EJ+(sd4yxL#YRKm}yP~)`7lj74v zXwZu_QM63Hxa35wv|fBgTb?bsNSznt8N`#l`^v4dIITbN_3T=a{Wn7nD!(%ek+zux z>rs1eKx$GL1b$I1%(S*{zOkOZ6iag&kt#15r|DHhfm!eA?AH|pKSpzzMgPi7M9{$w zL?gj@`>)MjAQ4$JR9MDwxYRh=-s27b$V0=4DHPM_u|hAFmiQGFwnN|xb|$ep?VQmV zT-gq{8{XHF6Wj9f(*}6q*afd3*K)cSBvik7RzO-$N8se$nr1uCi&dgoHf8Zth8v4j zEwGogeK-X1jnXaqoF|gYRZ{aU+lAbB6PZ6rmgkHwe?vCujBfj&P{<~iZJVEDoPK^w za%zU0^+{=w3I`5e_QR3GfbNE%h6W*+L96sRw6RZvLla~<)T;eX{yS|!&&c?pEOnDd z#*YZ|_)x+4?W6I&VUel+y4nJO9G=_QP#4+^$lv_<)cPm0y@WZS*wB3EVdp{PMIo6U zzwR42TfIkZ@uKp4^OeJ@uD;D{Y$hJZjZ?s0iX6W!wfN8C>V4yG*nEz1l=~L;!@Rt9sQ-4Avf~Zh)i#x<+8!+Ne`zt)` z*XQ}Q>1l2@#fHwLi#AD9bb0p5{IPhr^tmwcNA##JRla_y_QDL=HCo(mcf>|a|`50-NcKQ7ZYRw@o z9NIG4>tpXam{;+0%&QKPHX^$H?@G0*FE-MlY{8*H@Y|UXV`>&^$)lDnwtn7zZF~d<40Xx#OdltiiXnR{jec}^oR~If*9LgB zE*1ao#X?{X_D!ARG2R#J({nYu9{3+l9EXE!Nrw0Z&z{d2zy+PhqCbWE`ZY4#I930} z={e;8X}7d*7w}zpX>W10XZ@m}Z0f2OeRJlMSNZ`ZK&3*Ruc_<# z+XX)991-Tm>%|KlWMgFoQ(O5uOxNPmf1b&PoIZj;23LbLt5DaTC$pCk0lS!@@DF9o zwzy@zoDo1v3WfZ{pInto*MA8*L{Zh@)W6=F;;vA%qgo`=oyvm&2PlT+8%mqh)Y!rY z$tWCo&xd9YBH{jenTr&{Gj92J3(q$Ad!{1xk!t9gn)pbuC6InZDj>mR`D=cjRp2JP zBaJcwGDauRv7%s*2v`*~92_`-e-ALg^1yMMvpGKhX}e$r?S_MYMIyLNnD1jCSc55I z)Rlvc89zVd?35r#Wy-_ey%!0vtG?CmyE1-vR=1t`w)aojUMz8t-~8}TkmshfB-W?; zrTkhB4jwekm#lxis_)>4w(@@%niWVaJKt2|ET~_Cp-YQwm#DpxB~YG1u%_(zgaW zxEP>NFw9`5(_<-&VKOC#YMAl%=LkQ#B!Dk+&zNzolK`X;XVK5o>P@FI&bYf2S|iU< z@%^`Yef49gFB;he#^?~p#<*4mW(mq7RI{{$!&(ZX(yYZT-=*%LL5gH?oqQjX1tkJy zL(A#eyJtQFRclu7$A9p42S}#63P|x=-Hl%R4|NKzlbiG}PsT>rWsr(;>4#Udl0fu8p0o}iaWl0YIs3T#S=yubL_ z+QLNhVzFihF@dgA7wiKF~9G${W1)ITd(D!sR6 zC((_cZ#EUVfYBW}`Vfs4iV9WoL4UftAM2i8*=-vfMcw*(!Y7|RCWJ*F*IDUddoQTF zk2{bu?u}VA@#2T2TXx(R_~yp1T}^zc-YuN{6YF8f11zOis-WvqCvRMq(46;$X5M(` zL&jvk*?SHnUKq~_;+jM zkWm0NN&+6UAk_dekAl>ZmT^WA3(<3m+4I}0TA?0w9Iz}I#u?m2!;(hBs9m*fzw2k8 z5N>fBP&~Q(QS|@*wCiol?p5R z{IEUq9H@o5M0+}p$Bdn7(8!`dr(Z|Q@g$XcWbBh`SXL*J)A_#NH>#82_uy>mqUGTC z)$CU)0?@gX)@i-U!Mm*Kf2X6h{gi^vL=+ow>%EN`S^XC8%?ti44XwNzPUy~4-Bm~S zhFzV84Q`IHM37Ibm$}>_D3w`Yz~ucGV9ljH<*3$OO!{%>^7*B{$*_aA_W+NVl;vj; z@8_sU0zy8wS#dtjH~#Byisqx%KEL1W?H?r4^GXHhH4)ttz(85-GPufqH}FnRGbV$y ztI=Bq3s41kZ~4m9|I&X@51iVh4W6|I*in3HT|LWsO&U{v_H5dxN36q_Qqq0k)OzK) zDZ`jM>(i@m!uAwnh3#2g$FeJGjMc~pj0_OEuKw%40AoI|9B-ZkHfDjrLSTD!lJcEo zI36ml#bbbAl^pNjKXg#^9=>$~4HY#XdbmY3k49F*f~*x&Jdtz}=GCDeA8wRWBI?UsV&D(o>ANkQr*>L$EN%nD6^jx;5Or5fq*?)%lO-9c<8E)l8-K)nx zvUI}7Qqljyf=AYQRzY|1)bRbn&zJocMydsZh~&E><21m)EedL6WVsPZV3@CF^<26_fA4HECkY$+qa+L4D?K`0jbsOZuYcPFw zIg&<`t@B|2d(D7fVU%iXUUU8lKr1qDvrvuH4Wm1qxyo$}ysZc=!+HB1G<}hiM`+mx zXbxw7sWC$uz1Ec{luH|{7w@|n&~jgElf0)0m&#h7>qfs^)D8fSxfQOSs2xkx?bj1g zSrLh`x1zQ)sfOIlyhD>z(YZ+TCm&HX**v94+#8hh$v}r*LB0Y84MT2|H~^L!y}KCn z-3DfsDqJGvzXj8cSP@5uK774&57CgLdH9SYxj>R&TyT@*dsXi<qe zT+`riQe4ORR8g0*UphMmwRjW-GIjI9S=I9|tOD09Y`67}qQ8V1k>ALmz10CVA=f*w^pSftL`_~_J$K?DAgYx&<)giiF_Xz}mrYC_ zI8T=KQ+-2^_7ld9&-EaMh2U4G;hy+NzMXAX{qkIS?%rWgL8w6#v#cPG)LJmnWg4GQ z-%Yl-YH5;$AU3$dN__!EWC=CSzsyZtRP$M!xbcV~Ee`=Cm+1yIhpG#vaW=`!YU z4WXNuo;Xo8eHjuMo1n|j6;I1E_&XL_t3p@3XKVT><&PrTtxo4gwI>}b&o;Va_5E{i za98P}H3y&5mgL`~Z;4QEA6_LxZkVbPw+qCyuJi#UbXKPtJCb@1Zb)J%Q1)c}iei+f z`tCV0S+6XYBAceihk~~dwzzMlGe)t{U{Ap5NI(D6C>5WKs7Ow_fQ;q+wR#27Tz~jI zszQ$GU7YIov>shw27GCT?VR#W3Da!0`)M$_Zy|6@euhYW{pNpu%2EvAtLAuiSDFHW zJ~Zkd^0A|2d#lp0I*`9})-@XCzQrV}5o~{z$5U=A959m=<&qOXGEGnM+eOk@>!&M{ z1*KE(WV3$z0VQ9P@ngdXhB@+w*Kl8!uDZp_LY&`7g6fXtiuI5y z6^vfjUW~5lyZ2xbdyt7E+159(if5QJukyPCqKH^NT__cCe6*E^2nuVZtArx0S@8_| zId&AooIUYvnqDo{cqYtAAvcZtkLz$w%$PepWmmpCQJz4va#6;fqnz@tt*4UW3pc?5 z|L$LWzI5$aOn*xD{|H>sCfx$U>Rc|jKvU?(r^iLIDq;IQM;~|`LXN#qo@mH$yHI2X zK{4Tcvdz(}XFir7F3Nyy525NCo6L7F1Bd%IcwzF5?PHrRCfOnS7P0>*-vejs)K(3}$O+4uPqVJA{b{m7pJf3%?T1ds{^F9FI z&bDW)(27*sRm*=_ibl+tZ3fTv++X>#@E%D?$UOL=R*tN_Yxzx$xCj55R{f)cKCz2? zdU92@9zfV z-!q}x1||}|nsTiQp{2`YzvS_m4nLMkqRaaCxyuWjS*f&3E%f<0U6x(Vad~QY*W+yb&d0f53P=#Ytr5%B8wZjT3Afqyo%YkD$I| z-V~alF9L#eB7{tTSj%)Lg|02g6A54WtCv$`AH_=l;13zq{ICJqED;sdWE)Sznx8lE(9VfnQvD>*PjI2}bu~A^k~`SglRpiTl}QLTlwenUxo=tqB*=-N5mp z<6t2qPpfvq;EZ4Td~^JC_l#;OS&{}FFlZ#*8CuXZS%=aLXZPNbht>JeE*UV{nJ&UWj3#ezs$@>#+n!?*w-BgI^bzK>*@ZORl#dk5N zK#|JHt~TBBttfIFUYHX2l?@<*mLim8pPqU?h+82YMk&WmLy}z{)n~|SXW!L32~%in z-+Zw(O>mQhkorx9m)Yl;cy4?4q9y{L1l)mh?FoC0xomlW7pBNs+tW$WQTQtsXv67{ z9Xn4l%44ONWW^dM^m-?}#ct1iek-Nb%s6w<+YRa5u-u*c zeH43(WLDdycY5x&`xw~(1`yq4-Pn?LFQA1P;J}))@>bQ(Z_kw?F=#uad*z&(yruJB z!|AKOn5rla=7B%oN0^+;on4%ehnUo8_T*>j7B8dXJZAEvsJ4D%E0{;zup|p&{C*QHW zi^v)*`3hz3QP?fHw-u=Wet1$%I+V%V-_J1R@4FxkAld03!6Aw}uYZmZ=78qU zS5thT67U!Vucnat@AsW_6Fov?2O8h;vLd9H{E7kW7bF?BK8y298GQ?~Jw+cUTP0M2 z?bg9s`XJS{fyUKA8v&Vvg@wcY?)*3A2NwiNQMi?9Zi%;1snSv%__isivcEd#-K|>w zERQUr0fbxCGrUIJ((ujc4?_&RyOkApF9uOS-1hfSyw(?=7v*eYXZ^stoA29hnd^(o z8{2KLqvolEmy%F;ZZYj6{><8PGY<;!zn2Ysm5e}o z8FwIub50p0AT}yBL2BhI&^EBRXPus-H^tC2zu=Z3)&j3SP)Gw^1{&^fzM7Apsj2%= zUW*&O(qu8eXW5CMDQQ&LN^G{aYS8_eRfnB@?pC&IsU}l|SN(8XHQIc;z9Dia1ljW$ zo4{Ewj9*FZaO3=G`w#?6T)h2fp)#_BZd0+N4fMRoJbcp!Ym9GwCep(R?Fw|_P-$AE)tH9t2(2JmILmXfE^EB0T{jAK5F~e^N zZWaMl-`2Og&Xt^X5|(g2snE|_8utdm42_d;qATw#79ulq6hk8Suk%!-6H7oKOchcw za?TW*3@`|Pj1Zy|L3_bl=6%b^NN+at+D%#u@$1%uPUsFGOQ?sOQnX1OP)Ln^FxS6Y zc~x3&u)`Y2mqHu-d|Y?7%yc^-#SE${;A* zfbjIpcPGE4}{vHptAfR^{sG=eaXmlRgf zgy1i3ejbw90#Ct~Q#mpB7{&Xc%ZtG<<{R>IGb--xAcp#B1dfMqhS~~`hQH;FQHd{j zFIjK)JCH5Hpc*dVmKtSKxq9OyB6{c+W_-f;U=uzHRpa9yp0e=3@Fu+Ht(Zct2QK5} zp$z)$2h(>Ue-=z4^|;S6k2hjEIc;;fHbP#$W5+;*rP~_gl1v9 zDX$-N3&sfyp0RBmEZ7W`55HjYNUE+FF`Fdzt!nakZ_GG@jcCYp!JPq$7L-erfVP;C zMc55VthCJS$_UteIyo)cGw0yqd}m_LU`-pby&owrp#! zXw+P|sl@jRkG^2%eSfEd(NV?opn!Nw;Dgx*zKkM1(w&me(4&O4Eu&ktokF^@b_=5$ zLqy_bPvRSm0dq`~;F=Zvyw}>#_dd31^k#4kIk>{C6JQ`@~4HRW!>#_s~q1Q(L*b%jOdM{=j^Pca{; zZW08$sNJjt9GUy$3ld$*!S|@23vG^Gjy(*4=kw-__37kieLD}|zt7mIB^OK)a!#U} z%tI%f70yyYDkycj7zIfbBjr`tTCmyZ+N-H#Q@6x$-`Tu{5iaGbU{hcBvqA>hWB&Ro{0Of-()f2Gkf;DIz$=7NkIN6?zsvK%Rzw+ zi#0i(Y_#@+^JAfEf?d*#yCK!z(c_eF4^M?f;~hvVBObp z$}VtMa^YDz=Tn!`C7jOXgR8b~msUw06h^*Qqkau>il?9c+!Uy;6F0A{_+p`&l=Gy8 zet39K$oq%Jwye^pRbKYwiDVwtTHX#e&KZmtMI9C)wZ8WQQ@LEHn@=;@7~;Dxj{<4Lw~acFbB(j=e)=zN^@DSIrMOs zWtBJ1pg-x<<=UKlUfXe8!*m;n2HBtLrCCe(Z0*G1U;*sw;R77mBU8+IDEPb#*K!qhKl_hW#Lt&7~GICia_0hWf*fMa|CN@#OhWzI52-_EBnV(*5x_$UK)sLnADB~`{ zD*er%3{3z`Vmq()U$8M@zPVu0Ftltf`S#gt)A}wVdW#CH6|)fabN%9=em^t?`Esi6JO^yc4@UeZ=SWr3g@HNBVi(}6Ky6Qy*a8uz{|2!ecZ=~>NED`q zilq{Pj6WdTGY%;%aI;9RpJZ7>=jExM@6v3FKV`I!mu_W+5HdXSN^@`hssF3_BwYvh z_|VH&Coy?3@k)v5k_4d@dY5?Tbutanm%m1dly+e*=54=h2tNZqK^~G3Uxf>!vRUha z?6W7J*l^#FCB$L)c`yNjvBY3z>m%-|-ttFjo_(F(C59ve97GgD@Ht>H3!96B_W~1$ zQFTnUcE9i_JUsJx(Z@Y(;k>z??FqREfF>5Z=F>LrtgEPFrG&9wJ<|pi%z^RKf=Vx) zU@u2l4`C117=#!`58G{`y-CE<5b)Y050EQRxLh&T!~yJJNxRLlBsFr2D*P+=gSg!3 zAbTE<&%A6uB))jqOFkmZ)|#P+#iw45fFO-1GmWVx!(rVPP{KTL)}hpfgU&Q{5)VKt zPJ%lQUM>+r1JO;}ij^@9hq9vSOXYN9-BcN|;m|T`&C4j6R4xbAJIZiX#>MJ|Jr0aT zR#|*|J>>ThrfrlzEq*t^{X(gs>6|Dhq1RBJHnLjPT^W7A6cHBJtOTVv3{ZGi6^VmI%R?SsIgtB`xLv!`!dxw zG33lQPr+GaYJ47!Vkneo)d*f4rH4PfOPDf!TlFD!7i_UK0)m|bJs)ZKCivO>KBhYi zaaZw)D4}%*?UXN*R72h(80dC9PMbZ%^F{`Nu_*ks4cl5RWxT|RFcQ5q=5kvVyA!cf zcBsnHQx_B_iU~D*M3ruU3Kuk?BSbjZGu&Bh*S=dAhfnCbTSPSn?Ra9OpubDRAkQQW zUsA45%B~>_ydbrHi2cwHM+LKvQ|PHOXj!hJEl57rhAZ&osPe%UY_ zvd7)>(!0a6f9zr``G}VI-Q6XosI-ikfq*o>$b|~oG)6Qy{J)s3O`g&BtGTuh*Vfk? zGcHyYw;p`0U3l(gW7c51HP6EG$+Hz>cv`H`tGX& z(tc=!nNKb1%ia8_YFYqe8JH)>f_;BZ!mBe`1V`f4_Q=By?wPDbeX>FgVG7pgh;$j$ z)crmshogw{1i(GjUE(Mjvzw5f(QvR}BuBqKj=jbBdk%VrYUM(z-!yxzlOYABkCmlt z8_2lwU$!l1vx371TMfH=Rr0-3l$V6%&jrX0s3`xUB(51@GoHG+a^bAB*LQF}$<_Tu zN)!altTeN7jOM%rC1asq|((euq z*Ee>9`~2zO>li!ML~7Lx|L!MQu`wLu&H%HHBFE zzdPu3d>Gv{d6iiHmal-8y&lU#3k@!=5u2gpcUP z`z_M*&gIy=0o!E{c<^}j>9a^S#?z>;#eZ2#tn+s=m>>q^xT?}XtKNY9r_I3NOCDjE z2LfCR$v?vpMGg(#eA3#opqnwM#B@7rE?0{`k6lwMHoAXBV+o%2=m-n=+a6O#N0<4(YR16=a030l0NrseE1{ zOE=iU7G)Ni7(FDg0eM{Q^{}VnWl8!#yBvDaopoFP(wv5*!S#1<1FEETv&>#pI0FU+ z{fnh_{98b1mREVxu021et9g)eLyfxP{Ibt5jk3wbPW*0U2)oB;^IS?)(7T~hVkCHK zZFb7>YJp>d_{R?lRRwfrP5lCe5v4$n+sE<%P>%{WYeeAyrCy;zF<;Xsg3XEj%ti9X z9KPeh8)L>~)h$Gjn?xr7E-eMy^P~CqrSGJh_753Dg!~e@l~YW}JK;!XJBwaKNHQrw zScfADSO|a0qWM`HoN&<2Nt$f-M*kt>uSrq>ym}$%Szv0g+*#g^GTWW_RAcR-LULlA zko#!-nRE0Ov#5&4B}?l&vR%AjWAqeNsq7dT)&hP&>46!oMM|$okR8dF@%S>O?dP(r zfBJCj?6Q-TZOp;;ZK(?|#IeAKcI>|yP@dP&E~sz%n|A*6b>kDWxX4Cx#uu@p8-r(_ z+XMt}O*Nwk-pU-Z>(TIzpbonjj?Ww7C}mgi#~&DOz|4s1HgVM8GFe=t6I{xE*Uby1 z0YpdTOoFUg5~o^9z3+eAuG4zCQ6-Vyf|nq(cM|*qg=Xf_(%4MjLlKuT1HGfLuj>TC zW7e`9qBmfUjhZB(O*a4&*>Y!e(@0hH43Fn3~=qGQPi!$=|uK z{>_c+U-TG~et2RQ9N(4ooeMcgrPQwf61tzy`nTB^Mi4N+5b2`LhfBSO?VFt z1t3rjs8&Qf`Wi3wAgu0H#PhC(STC`=j?WCaI*#4@%(;l{$`!?tO-f)B4>%ajU(IMD z8|_?XD_q~A&oa#C0~}X2P<5~%Cip9V=N(f&DP}0045dgU35WjL{WoVaoCNPo47aDU z2Z>Cdp#WJ38l4uMqkeI(ruMjV^n|@KINmePJntiGsf+4qE_PC_SH`Rxq)FYrw;th! zsaVwBF!@Iv;uT%tCtPTEmTm2+Ae%t_lNJ}Q_priiy z)~cR(RthfwzG$e)n+Xx0vE<_SKV&sarf9$Re}~Z}-lJ%jVVm!4qvKQMPsQVmpIWw4FX+q9hmb04W^Bp3H>3~e%QSA7EHHVU zZNtCybj#7bSh1$Rp;MNb;rP(+ZG};Q-O1b9gT{1yi7?|H(0~!e>9Epy@8zU)JEhcm zKK#V!cK!E8@F^S&MU3)!R@G328z-v)WY8`>I6iq5_p{h!IH-|^P_Ot0FFPHpM%Y(w zvo{w$54R`1_;;JI^HmvzA+l!*5-1C2WWv)uLfJFm6Nbup{kcGH^p`^{a}YuU zO5IJMZoIju3-qCMu+o3=z3~Wld(6bauWb%_Cd*ODz@dEnSwF_~c5{g`=5ZH@KmhmW zDSN|RM9ckR5JymwTb2Iq?sU(=>*Uopv;t0JM^dUIITNeK@#R21~|F0tV|LJRjl<)fg ze!LrmuD-nw%f&>1_`B18be7)NVE>#8`F&6h8Uk!!1&@LNf2!|INU7=7k6;-C8j*G0 zeBXm&`5cAtR(T+7W@d*9F0*gkq8)b|18jxEpjmq1Wh z%F61#%}8bxOhl#ZJT^0n0S$Vj?ct;GW6`?f)2#lV;{`=0`>@hsx%p7<=V0P!;J#Jc9Yp`rx0aY``9;7cQ(!UbC!$56*WmxRDoTq19pQK_mMF+SN zA4XwUNeW z;#|(dkeY>z%&SJAwYz}?|1waizk(d8(zIipVHH>)EQ8@r*J}*CaPUnb?(FRB0s(5R z?;GH(k_+zAXT9_>Z^1N+82n28H93w_T@%7#zJ^F@* ztc;9|;Yf|eMi75IfNLy+J4IJyB({AP7;})2 zsW!lIi!LNMI1J>yiTL>Vy1?h$6lrGp6YO+wuY{4F;gaCzybMkt;Nb{Xt*Yhi+A;QE zJd$h7N&NHq3eEwnhGo7HaeAchpoB#05Vft3PZNl*S-GA6SzCq$)dkj{|FPQ~9~)DR zUfPTqA9b{^1e13+(80BEaw=@mn4IU&iF|-=U;qYc_pumTue%-QRX&LvT{qMf-mxH!x z$({}RP+Ur_R;uPN3)}1z2*ng&%RdA$@jpIhW?8Tk2Byl4GG-<-ws&?25n8&sjCB8; zdNVUK|A4(0vj7-dqDq0QtK4dq+UL$r*;3sSt;ZS~G`_8-48a;@B8yT$S|#D*pM%z_)t5K7RQ9ebBtXHqh00L(1G8)eXBi20f0a7Q|rej=0{9ZTLu zwx-Kke*$9);H@ix8zwmH6r+ExO4PL4PX|-jBoK%7>;1RF`l6y{pJP!ozoCIxM@L6V zNlAn1aDQJ4O#R?#q|eM|wD_I#BEaf~8lj=1bF<|~c|Sj#WN$)3SX1DCAQkPQ=T^YT z!-FrFh69?XHClZ8Bv%6T=l*@e!wS!y{o(L$ROtrmAT01J2^dTvz}eiEk%*@7Yli^^a-#|?FZax)$my&X;W)|Gz z-x^JsFsy)Y43GbX2V9hnoBIw}gMMrXGqL{dc@H-zuNO}D(dp?z-&|CrdkCzBxaMYe z+|V1~_4qa-q7#OLQ4qQ%M4}n$vwoL2gI0Bh7(?^zUM*y4Lm?paj;lzK0P2KGa30Tq z?;3|=dF%n44zkkHC|^1|-Bx??LB5gVRgo{rwg?7UpQ`Mnuq7VY4U>W1A-GoIwwhKP2~D}T zf&1P!hq~UrK2nb#M7&;JUb%&Z7+|HXUu|^@DO_CeFo{k;fCxk-RRYT^Pemi9W^vwa z_rlvUF`ZmV-r}EB=v9Z?Rbg7gb@uSLd;LS#JttO9&Ew? z)C4?z(aEB^XLqs5LpxCrO3KO|@U##Jq^lIz{2`r|VPEy8>^7lgqEN_#j{~>Me4|nM zlM7Hce!@?dF)^6|_Fp({=j-J0iHR0q6WhO{kcZ#e+WKp2OKTEu(2Aezf4-PjQ#@+w ztQJbef{!1uGBPrfg1_gSKcvaDo!!iSGw(Rl(I+^yZIG zeOfjONA2WnZ3`W);AB685cqHW)IvCA$Oc4bApt;IAc556+D>`NDuvyBJ{%ps*p$oA z_NggdLLwp-+kY3A$(>BE;sx971SW;9O3BX6H@O$Nj(C)QaKwMwGs6yb7lAjrvFSvoqCV0lNQNwoZ?d{dL zmYCv0l`=M-=V;g$0hhfmU%ns^kjw;++G!u#)y!%b7!>!uT7KOYVD0c;nQ zA&?S((lD2h3f`bF42iUKBdPbhDmQLI#lt(E5^|XrUWTb}6FPcJR`7Y_pKzLJr`yF$8%H{hP%i6O^aEVxOu^|+%Lkv{NX;ug_m@jf~lcWnjYoyrh|yZik7+8`Lc3aYIZ zyeVI;cv-fuk~X=(qYBpM8oaG+1 zhAdcrv2ZN*4GzXPH;c|TxMo4>7d_+r1Y7|jE0$}*Q8hQu0UxYhaHs0_7~b1+5qWV^ znhtKjh+eRs-~h8Ko$>(6VpVXO$O8{RA8;rt$jOlwbpGz4=SumkB^_2dOq<5^LLO7l z+-z9Kg!AdUXYIz{hOD(Amo}{D>{g%R(u0G8mp3yYK+2L}o8_;|sG3ZJ{Q4gMqp*9AY3hpO z0A7Ly6|hugun0^G0kSyDRBizkW_Ph8D%98Jn%Xo>kuF$mh0<2EJbW|*iH|{5o}!h6 z866Oisstt_EU69{pePi1nAis8T{Vrheh>d_|JgK6nx>E3bMF0}Uz79w*ml{hRzYC3 z!4Qw61XD)`ALrx&r^B9ncVKU%!C+XIIU(w{rLTjD)%Vn2%4KHb%7^g#V&E(JdcFQp zY4mQuRM&)=Q2{eprEkNwVwGB*Ks0oUs`(Y`Wllm;k{=}W?s0n^RvNa6i68rry#IIZ zLQj*~yp6}>g~Y{a`|IuX&(i9YZ-k4zy?sH|UZDEPe~_YT>JnbSh!nu0j?2>B?D>C&1;5s@wmsrIO7xUc!blTq2+ z94M7aBmBLl-o4`^{ah#%{yt`}h|VIm6@V(MU2{v=umtnupT$4Wb6=&FSs1MDqvKZV zcN$HMSnJGKv;W=-i^USo;dCrd+LY7SNcE>tub($~GKME7nfEP)CKG*mdnmRae*Z_` zC~(1cuxUiYDogZWgku*uwvOVb;ETzkkkFhZ+#|{q5xYv;?si@$67%^UxMJt@_U^&* z!;M&SZp6E6I_Zol*m`|e3r?u0fTe_5fUmYpP%~P|p zd1x^{IGyr0&m&15aLt&*;rP0uWP9k`+#HVNiG8y^EN6=f+0~+L}^IH%4T7U24A^j>+B1?o=N?zq7!>A51;;B&27K zyGLb@tqTi@nlhA|Sl5SRNLuFIdCF8jf^;cr*7)LJFtRU&B|YryWFqbO#h)pw*$(aW zHxjl&T~t&=yqv^}*mD>eAYr4(t2f>Hxjq6lqxZ=ZvKaE?h)-E$d_?BsOgw+_V(!e0 zVgr}kmR!3ET_Zy91%(3x1NqhEnk7J23Dr|8?Fq$aoYA`Y^!1=l#{-*ZxE5 zSAiIRjMNDFM;=&Sw3d?EeLj#P%FBaLVMhu3N(2eL{w&5cJu@>nBxJ$#^mNVoTs$sf zGCQzuQtotQ0BRe~i