From cfa4b7785e7bb4a5c0360aa248e0f9918e203d25 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 8 Jun 2026 05:31:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: rbelanec/train_qnli_42_1779286680 Source: Original Platform --- .gitattributes | 36 + README.md | 81 + all_results.json | 13 + config.json | 39 + eval_results.json | 8 + generation_config.json | 12 + model.safetensors | 3 + special_tokens_map.json | 26 + tokenizer.json | 3 + tokenizer_config.json | 2069 +++++ train.yaml | 65 + train_results.json | 9 + trainer_log.jsonl | 2376 +++++ trainer_state.json | 19063 ++++++++++++++++++++++++++++++++++++++ training_args.bin | 3 + training_eval_loss.png | Bin 0 -> 47369 bytes training_loss.png | Bin 0 -> 43261 bytes 17 files changed, 23806 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.yaml create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 training_eval_loss.png create mode 100644 training_loss.png diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..c8d4a9b --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +library_name: transformers +license: llama3.2 +base_model: meta-llama/Llama-3.2-1B-Instruct +tags: +- peft-factory +- freeze +- llama-factory +- generated_from_trainer +model-index: +- name: train_qnli_42_1779286680 + results: [] +--- + + + +# train_qnli_42_1779286680 + +This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the qnli dataset. +It achieves the following results on the evaluation set: +- Loss: 0.0523 +- Num Input Tokens Seen: 11312256 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-06 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen | +|:-------------:|:------:|:-----:|:---------------:|:-----------------:| +| 0.0929 | 0.0501 | 590 | 0.0807 | 571072 | +| 0.1054 | 0.1001 | 1180 | 0.0708 | 1136384 | +| 0.1201 | 0.1502 | 1770 | 0.0836 | 1703808 | +| 0.1436 | 0.2003 | 2360 | 0.0888 | 2266496 | +| 0.0749 | 0.2503 | 2950 | 0.0761 | 2827328 | +| 0.0141 | 0.3004 | 3540 | 0.0862 | 3399808 | +| 0.0051 | 0.3505 | 4130 | 0.0710 | 3963584 | +| 0.0782 | 0.4005 | 4720 | 0.0551 | 4530304 | +| 0.05 | 0.4506 | 5310 | 0.0634 | 5095424 | +| 0.0293 | 0.5007 | 5900 | 0.0550 | 5660352 | +| 0.0534 | 0.5507 | 6490 | 0.0558 | 6232896 | +| 0.0467 | 0.6008 | 7080 | 0.0598 | 6801984 | +| 0.0404 | 0.6509 | 7670 | 0.0556 | 7363968 | +| 0.0633 | 0.7010 | 8260 | 0.0546 | 7924800 | +| 0.0632 | 0.7510 | 8850 | 0.0540 | 8494720 | +| 0.1023 | 0.8011 | 9440 | 0.0547 | 9066048 | +| 0.0665 | 0.8512 | 10030 | 0.0526 | 9634624 | +| 0.0855 | 0.9012 | 10620 | 0.0523 | 10199424 | +| 0.004 | 0.9513 | 11210 | 0.0523 | 10764096 | + + +### Framework versions + +- Transformers 4.51.3 +- Pytorch 2.10.0+cu128 +- Datasets 4.0.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..b56f90d --- /dev/null +++ b/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 1.0, + "eval_loss": 0.05228454992175102, + "eval_runtime": 15.7327, + "eval_samples_per_second": 665.81, + "eval_steps_per_second": 83.266, + "num_input_tokens_seen": 11312256, + "total_flos": 6.605086766609203e+16, + "train_loss": 0.0722960903008882, + "train_runtime": 1744.9763, + "train_samples_per_second": 54.023, + "train_steps_per_second": 6.753 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..08bd85b --- /dev/null +++ b/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..ad20dff --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "eval_loss": 0.05228454992175102, + "eval_runtime": 15.7327, + "eval_samples_per_second": 665.81, + "eval_steps_per_second": 83.266, + "num_input_tokens_seen": 11312256 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2b8ae57 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.3" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..b2f5d3d --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fe1fb096411094fc5340a86058ceca9a61de7648bc71f4cdd68d71da0fadff +size 4417933576 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14daf45 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ddc3ce0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2069 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.yaml b/train.yaml new file mode 100644 index 0000000..eff78c2 --- /dev/null +++ b/train.yaml @@ -0,0 +1,65 @@ +seed: 42 + +### model +model_name_or_path: meta-llama/Llama-3.2-1B-Instruct +trust_remote_code: true +flash_attn: auto +use_cache: false + +### method +# Full fine-tune of every decoder block, but with the (tied) embeddings frozen. +# `finetuning_type: freeze` only trains modules whose name matches a trainable layer; +# embed_tokens / lm_head / final model.norm are "extra" modules and stay frozen unless +# listed in freeze_extra_modules. Setting freeze_trainable_layers = num_hidden_layers (16 +# for Llama-3.2-1B) makes ALL decoder blocks trainable, so this == "full FT minus +# embeddings". Because tie_word_embeddings=true, freezing embed_tokens also freezes lm_head. +# This is lever B of the embedding-amplification fix (see figures/amplification/README.md). +stage: sft +do_train: true +finetuning_type: freeze +freeze_trainable_layers: 16 +freeze_trainable_modules: all +# freeze_extra_modules: left unset -> embed_tokens, lm_head (tied), final norm stay frozen + +### dataset +dataset: qnli +template: llama3 +cutoff_len: 2048 +overwrite_cache: true +preprocessing_num_workers: 4 +dataloader_num_workers: 4 +packing: false + +### output +output_dir: saves_bts_preliminary/freeze/llama-3.2-1b-instruct/train_qnli_42_1779286680 +logging_steps: 5 +save_steps: 0.05 +overwrite_output_dir: true +save_only_model: false +plot_loss: true +include_num_input_tokens_seen: true +push_to_hub: true +push_to_hub_organization: rbelanec +load_best_model_at_end: true +save_total_limit: 1 + +### train +per_device_train_batch_size: 8 +learning_rate: 2.0e-6 +num_train_epochs: 1 +weight_decay: 1.0e-2 +lr_scheduler_type: cosine +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +warmup_ratio: 0.1 +optim: adamw_torch +report_to: +- wandb +run_name: freeze_llama-3.2-1b-instruct_train_qnli_42_1779286680 + +### eval +per_device_eval_batch_size: 8 +eval_strategy: steps +eval_steps: 0.05 +val_size: 0.1 diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..7c16ff6 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "num_input_tokens_seen": 11312256, + "total_flos": 6.605086766609203e+16, + "train_loss": 0.0722960903008882, + "train_runtime": 1744.9763, + "train_samples_per_second": 54.023, + "train_steps_per_second": 6.753 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..044be33 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,2376 @@ +{"current_steps": 5, "total_steps": 11784, "loss": 0.8772, "lr": 6.785411365564037e-09, "epoch": 0.0004243041412084182, "percentage": 0.04, "elapsed_time": "0:00:00", "remaining_time": "0:38:31", "throughput": 5087.73, "total_tokens": 4992} +{"current_steps": 10, "total_steps": 11784, "loss": 0.958, "lr": 1.526717557251908e-08, "epoch": 0.0008486082824168364, "percentage": 0.08, "elapsed_time": "0:00:01", "remaining_time": "0:25:54", "throughput": 7223.24, "total_tokens": 9536} +{"current_steps": 15, "total_steps": 11784, "loss": 0.8691, "lr": 2.374893977947413e-08, "epoch": 0.0012729124236252546, "percentage": 0.13, "elapsed_time": "0:00:01", "remaining_time": "0:21:38", "throughput": 8468.76, "total_tokens": 14016} +{"current_steps": 20, "total_steps": 11784, "loss": 0.8723, "lr": 3.223070398642917e-08, "epoch": 0.0016972165648336728, "percentage": 0.17, "elapsed_time": "0:00:02", "remaining_time": "0:19:48", "throughput": 9726.9, "total_tokens": 19648} +{"current_steps": 25, "total_steps": 11784, "loss": 0.8359, "lr": 4.0712468193384224e-08, "epoch": 0.002121520706042091, "percentage": 0.21, "elapsed_time": "0:00:02", "remaining_time": "0:18:28", "throughput": 10511.73, "total_tokens": 24768} +{"current_steps": 30, "total_steps": 11784, "loss": 0.8734, "lr": 4.919423240033927e-08, "epoch": 0.0025458248472505093, "percentage": 0.25, "elapsed_time": "0:00:02", "remaining_time": "0:17:46", "throughput": 11007.6, "total_tokens": 29952} +{"current_steps": 35, "total_steps": 11784, "loss": 0.8972, "lr": 5.767599660729432e-08, "epoch": 0.0029701289884589274, "percentage": 0.3, "elapsed_time": "0:00:03", "remaining_time": "0:17:03", "throughput": 11251.94, "total_tokens": 34304} +{"current_steps": 40, "total_steps": 11784, "loss": 0.8139, "lr": 6.615776081424935e-08, "epoch": 0.0033944331296673455, "percentage": 0.34, "elapsed_time": "0:00:03", "remaining_time": "0:16:34", "throughput": 11618.27, "total_tokens": 39360} +{"current_steps": 45, "total_steps": 11784, "loss": 0.8369, "lr": 7.463952502120441e-08, "epoch": 0.0038187372708757637, "percentage": 0.38, "elapsed_time": "0:00:03", "remaining_time": "0:16:12", "throughput": 11934.21, "total_tokens": 44480} +{"current_steps": 50, "total_steps": 11784, "loss": 0.6877, "lr": 8.312128922815945e-08, "epoch": 0.004243041412084182, "percentage": 0.42, "elapsed_time": "0:00:04", "remaining_time": "0:15:52", "throughput": 12107.61, "total_tokens": 49152} +{"current_steps": 55, "total_steps": 11784, "loss": 0.5185, "lr": 9.16030534351145e-08, "epoch": 0.0046673455532926, "percentage": 0.47, "elapsed_time": "0:00:04", "remaining_time": "0:15:39", "throughput": 12192.19, "total_tokens": 53696} +{"current_steps": 60, "total_steps": 11784, "loss": 0.4927, "lr": 1.0008481764206955e-07, "epoch": 0.0050916496945010185, "percentage": 0.51, "elapsed_time": "0:00:04", "remaining_time": "0:15:25", "throughput": 12361.33, "total_tokens": 58560} +{"current_steps": 65, "total_steps": 11784, "loss": 0.4352, "lr": 1.085665818490246e-07, "epoch": 0.005515953835709436, "percentage": 0.55, "elapsed_time": "0:00:05", "remaining_time": "0:15:16", "throughput": 12550.24, "total_tokens": 63808} +{"current_steps": 70, "total_steps": 11784, "loss": 0.3313, "lr": 1.1704834605597964e-07, "epoch": 0.005940257976917855, "percentage": 0.59, "elapsed_time": "0:00:05", "remaining_time": "0:15:05", "throughput": 12588.78, "total_tokens": 68096} +{"current_steps": 75, "total_steps": 11784, "loss": 0.2376, "lr": 1.2553011026293469e-07, "epoch": 0.006364562118126273, "percentage": 0.64, "elapsed_time": "0:00:05", "remaining_time": "0:14:54", "throughput": 12646.43, "total_tokens": 72448} +{"current_steps": 80, "total_steps": 11784, "loss": 0.2135, "lr": 1.3401187446988974e-07, "epoch": 0.006788866259334691, "percentage": 0.68, "elapsed_time": "0:00:06", "remaining_time": "0:14:50", "throughput": 12867.12, "total_tokens": 78336} +{"current_steps": 85, "total_steps": 11784, "loss": 0.165, "lr": 1.4249363867684477e-07, "epoch": 0.00721317040054311, "percentage": 0.72, "elapsed_time": "0:00:06", "remaining_time": "0:14:43", "throughput": 12946.56, "total_tokens": 83072} +{"current_steps": 90, "total_steps": 11784, "loss": 0.1788, "lr": 1.509754028837998e-07, "epoch": 0.007637474541751527, "percentage": 0.76, "elapsed_time": "0:00:06", "remaining_time": "0:14:39", "throughput": 13022.88, "total_tokens": 88128} +{"current_steps": 95, "total_steps": 11784, "loss": 0.1682, "lr": 1.594571670907549e-07, "epoch": 0.008061778682959946, "percentage": 0.81, "elapsed_time": "0:00:07", "remaining_time": "0:14:33", "throughput": 13091.5, "total_tokens": 92992} +{"current_steps": 100, "total_steps": 11784, "loss": 0.16, "lr": 1.6793893129770992e-07, "epoch": 0.008486082824168364, "percentage": 0.85, "elapsed_time": "0:00:07", "remaining_time": "0:14:29", "throughput": 13184.48, "total_tokens": 98112} +{"current_steps": 105, "total_steps": 11784, "loss": 0.1406, "lr": 1.7642069550466495e-07, "epoch": 0.008910386965376781, "percentage": 0.89, "elapsed_time": "0:00:07", "remaining_time": "0:14:23", "throughput": 13227.15, "total_tokens": 102720} +{"current_steps": 110, "total_steps": 11784, "loss": 0.156, "lr": 1.8490245971162e-07, "epoch": 0.0093346911065852, "percentage": 0.93, "elapsed_time": "0:00:08", "remaining_time": "0:14:19", "throughput": 13281.54, "total_tokens": 107520} +{"current_steps": 115, "total_steps": 11784, "loss": 0.17, "lr": 1.9338422391857507e-07, "epoch": 0.009758995247793618, "percentage": 0.98, "elapsed_time": "0:00:08", "remaining_time": "0:14:14", "throughput": 13308.98, "total_tokens": 112064} +{"current_steps": 120, "total_steps": 11784, "loss": 0.1468, "lr": 2.018659881255301e-07, "epoch": 0.010183299389002037, "percentage": 1.02, "elapsed_time": "0:00:08", "remaining_time": "0:14:11", "throughput": 13381.0, "total_tokens": 117184} +{"current_steps": 125, "total_steps": 11784, "loss": 0.1524, "lr": 2.1034775233248513e-07, "epoch": 0.010607603530210456, "percentage": 1.06, "elapsed_time": "0:00:09", "remaining_time": "0:14:07", "throughput": 13402.86, "total_tokens": 121792} +{"current_steps": 130, "total_steps": 11784, "loss": 0.1493, "lr": 2.188295165394402e-07, "epoch": 0.011031907671418872, "percentage": 1.1, "elapsed_time": "0:00:09", "remaining_time": "0:14:03", "throughput": 13414.26, "total_tokens": 126272} +{"current_steps": 135, "total_steps": 11784, "loss": 0.1492, "lr": 2.2731128074639524e-07, "epoch": 0.011456211812627291, "percentage": 1.15, "elapsed_time": "0:00:09", "remaining_time": "0:14:00", "throughput": 13433.76, "total_tokens": 130880} +{"current_steps": 140, "total_steps": 11784, "loss": 0.152, "lr": 2.3579304495335027e-07, "epoch": 0.01188051595383571, "percentage": 1.19, "elapsed_time": "0:00:10", "remaining_time": "0:13:57", "throughput": 13459.57, "total_tokens": 135552} +{"current_steps": 145, "total_steps": 11784, "loss": 0.1422, "lr": 2.442748091603053e-07, "epoch": 0.012304820095044128, "percentage": 1.23, "elapsed_time": "0:00:10", "remaining_time": "0:13:56", "throughput": 13519.4, "total_tokens": 140928} +{"current_steps": 150, "total_steps": 11784, "loss": 0.1357, "lr": 2.5275657336726036e-07, "epoch": 0.012729124236252547, "percentage": 1.27, "elapsed_time": "0:00:10", "remaining_time": "0:13:55", "throughput": 13568.43, "total_tokens": 146176} +{"current_steps": 155, "total_steps": 11784, "loss": 0.1692, "lr": 2.612383375742154e-07, "epoch": 0.013153428377460964, "percentage": 1.32, "elapsed_time": "0:00:11", "remaining_time": "0:13:54", "throughput": 13639.47, "total_tokens": 151680} +{"current_steps": 160, "total_steps": 11784, "loss": 0.1577, "lr": 2.697201017811705e-07, "epoch": 0.013577732518669382, "percentage": 1.36, "elapsed_time": "0:00:11", "remaining_time": "0:13:52", "throughput": 13660.94, "total_tokens": 156480} +{"current_steps": 165, "total_steps": 11784, "loss": 0.1464, "lr": 2.782018659881255e-07, "epoch": 0.0140020366598778, "percentage": 1.4, "elapsed_time": "0:00:11", "remaining_time": "0:13:49", "throughput": 13665.42, "total_tokens": 161024} +{"current_steps": 170, "total_steps": 11784, "loss": 0.1223, "lr": 2.866836301950806e-07, "epoch": 0.01442634080108622, "percentage": 1.44, "elapsed_time": "0:00:12", "remaining_time": "0:13:47", "throughput": 13682.79, "total_tokens": 165760} +{"current_steps": 175, "total_steps": 11784, "loss": 0.1156, "lr": 2.951653944020356e-07, "epoch": 0.014850644942294636, "percentage": 1.49, "elapsed_time": "0:00:12", "remaining_time": "0:13:44", "throughput": 13669.17, "total_tokens": 169984} +{"current_steps": 180, "total_steps": 11784, "loss": 0.1584, "lr": 3.0364715860899065e-07, "epoch": 0.015274949083503055, "percentage": 1.53, "elapsed_time": "0:00:12", "remaining_time": "0:13:42", "throughput": 13672.25, "total_tokens": 174528} +{"current_steps": 185, "total_steps": 11784, "loss": 0.1117, "lr": 3.121289228159457e-07, "epoch": 0.01569925322471147, "percentage": 1.57, "elapsed_time": "0:00:13", "remaining_time": "0:13:41", "throughput": 13673.53, "total_tokens": 179136} +{"current_steps": 190, "total_steps": 11784, "loss": 0.1283, "lr": 3.206106870229007e-07, "epoch": 0.016123557365919892, "percentage": 1.61, "elapsed_time": "0:00:13", "remaining_time": "0:13:39", "throughput": 13663.61, "total_tokens": 183424} +{"current_steps": 195, "total_steps": 11784, "loss": 0.0957, "lr": 3.2909245122985577e-07, "epoch": 0.01654786150712831, "percentage": 1.65, "elapsed_time": "0:00:13", "remaining_time": "0:13:37", "throughput": 13668.55, "total_tokens": 187968} +{"current_steps": 200, "total_steps": 11784, "loss": 0.1221, "lr": 3.375742154368109e-07, "epoch": 0.01697216564833673, "percentage": 1.7, "elapsed_time": "0:00:14", "remaining_time": "0:13:36", "throughput": 13703.72, "total_tokens": 193152} +{"current_steps": 205, "total_steps": 11784, "loss": 0.1305, "lr": 3.460559796437659e-07, "epoch": 0.017396469789545146, "percentage": 1.74, "elapsed_time": "0:00:14", "remaining_time": "0:13:34", "throughput": 13701.2, "total_tokens": 197632} +{"current_steps": 210, "total_steps": 11784, "loss": 0.0971, "lr": 3.5453774385072094e-07, "epoch": 0.017820773930753563, "percentage": 1.78, "elapsed_time": "0:00:14", "remaining_time": "0:13:33", "throughput": 13712.13, "total_tokens": 202304} +{"current_steps": 215, "total_steps": 11784, "loss": 0.1392, "lr": 3.63019508057676e-07, "epoch": 0.018245078071961983, "percentage": 1.82, "elapsed_time": "0:00:15", "remaining_time": "0:13:31", "throughput": 13726.15, "total_tokens": 207040} +{"current_steps": 220, "total_steps": 11784, "loss": 0.1071, "lr": 3.71501272264631e-07, "epoch": 0.0186693822131704, "percentage": 1.87, "elapsed_time": "0:00:15", "remaining_time": "0:13:31", "throughput": 13770.74, "total_tokens": 212480} +{"current_steps": 225, "total_steps": 11784, "loss": 0.1398, "lr": 3.7998303647158606e-07, "epoch": 0.01909368635437882, "percentage": 1.91, "elapsed_time": "0:00:15", "remaining_time": "0:13:30", "throughput": 13804.01, "total_tokens": 217728} +{"current_steps": 230, "total_steps": 11784, "loss": 0.0853, "lr": 3.8846480067854107e-07, "epoch": 0.019517990495587237, "percentage": 1.95, "elapsed_time": "0:00:16", "remaining_time": "0:13:28", "throughput": 13789.05, "total_tokens": 221888} +{"current_steps": 235, "total_steps": 11784, "loss": 0.0764, "lr": 3.969465648854962e-07, "epoch": 0.019942294636795654, "percentage": 1.99, "elapsed_time": "0:00:16", "remaining_time": "0:13:27", "throughput": 13792.56, "total_tokens": 226496} +{"current_steps": 240, "total_steps": 11784, "loss": 0.1296, "lr": 4.0542832909245124e-07, "epoch": 0.020366598778004074, "percentage": 2.04, "elapsed_time": "0:00:16", "remaining_time": "0:13:25", "throughput": 13782.81, "total_tokens": 230720} +{"current_steps": 245, "total_steps": 11784, "loss": 0.2537, "lr": 4.1391009329940624e-07, "epoch": 0.02079090291921249, "percentage": 2.08, "elapsed_time": "0:00:17", "remaining_time": "0:13:24", "throughput": 13799.01, "total_tokens": 235584} +{"current_steps": 250, "total_steps": 11784, "loss": 0.0764, "lr": 4.223918575063613e-07, "epoch": 0.02121520706042091, "percentage": 2.12, "elapsed_time": "0:00:17", "remaining_time": "0:13:24", "throughput": 13834.27, "total_tokens": 241088} +{"current_steps": 255, "total_steps": 11784, "loss": 0.0627, "lr": 4.3087362171331635e-07, "epoch": 0.021639511201629328, "percentage": 2.16, "elapsed_time": "0:00:17", "remaining_time": "0:13:22", "throughput": 13831.9, "total_tokens": 245568} +{"current_steps": 260, "total_steps": 11784, "loss": 0.0585, "lr": 4.3935538592027136e-07, "epoch": 0.022063815342837745, "percentage": 2.21, "elapsed_time": "0:00:18", "remaining_time": "0:13:21", "throughput": 13839.34, "total_tokens": 250304} +{"current_steps": 265, "total_steps": 11784, "loss": 0.0933, "lr": 4.4783715012722647e-07, "epoch": 0.022488119484046165, "percentage": 2.25, "elapsed_time": "0:00:18", "remaining_time": "0:13:20", "throughput": 13852.83, "total_tokens": 255232} +{"current_steps": 270, "total_steps": 11784, "loss": 0.0946, "lr": 4.5631891433418153e-07, "epoch": 0.022912423625254582, "percentage": 2.29, "elapsed_time": "0:00:18", "remaining_time": "0:13:19", "throughput": 13855.03, "total_tokens": 259840} +{"current_steps": 275, "total_steps": 11784, "loss": 0.081, "lr": 4.6480067854113653e-07, "epoch": 0.023336727766463002, "percentage": 2.33, "elapsed_time": "0:00:19", "remaining_time": "0:13:19", "throughput": 13867.86, "total_tokens": 264768} +{"current_steps": 280, "total_steps": 11784, "loss": 0.1051, "lr": 4.732824427480916e-07, "epoch": 0.02376103190767142, "percentage": 2.38, "elapsed_time": "0:00:19", "remaining_time": "0:13:18", "throughput": 13895.51, "total_tokens": 270016} +{"current_steps": 285, "total_steps": 11784, "loss": 0.0954, "lr": 4.817642069550466e-07, "epoch": 0.024185336048879836, "percentage": 2.42, "elapsed_time": "0:00:19", "remaining_time": "0:13:17", "throughput": 13892.88, "total_tokens": 274496} +{"current_steps": 290, "total_steps": 11784, "loss": 0.0791, "lr": 4.902459711620017e-07, "epoch": 0.024609640190088256, "percentage": 2.46, "elapsed_time": "0:00:20", "remaining_time": "0:13:16", "throughput": 13902.9, "total_tokens": 279296} +{"current_steps": 295, "total_steps": 11784, "loss": 0.077, "lr": 4.987277353689568e-07, "epoch": 0.025033944331296673, "percentage": 2.5, "elapsed_time": "0:00:20", "remaining_time": "0:13:15", "throughput": 13916.03, "total_tokens": 284288} +{"current_steps": 300, "total_steps": 11784, "loss": 0.1016, "lr": 5.072094995759117e-07, "epoch": 0.025458248472505093, "percentage": 2.55, "elapsed_time": "0:00:20", "remaining_time": "0:13:14", "throughput": 13922.61, "total_tokens": 289088} +{"current_steps": 305, "total_steps": 11784, "loss": 0.1201, "lr": 5.156912637828668e-07, "epoch": 0.02588255261371351, "percentage": 2.59, "elapsed_time": "0:00:21", "remaining_time": "0:13:13", "throughput": 13921.05, "total_tokens": 293632} +{"current_steps": 310, "total_steps": 11784, "loss": 0.0812, "lr": 5.241730279898219e-07, "epoch": 0.026306856754921927, "percentage": 2.63, "elapsed_time": "0:00:21", "remaining_time": "0:13:12", "throughput": 13921.78, "total_tokens": 298176} +{"current_steps": 315, "total_steps": 11784, "loss": 0.1464, "lr": 5.326547921967769e-07, "epoch": 0.026731160896130347, "percentage": 2.67, "elapsed_time": "0:00:21", "remaining_time": "0:13:11", "throughput": 13922.8, "total_tokens": 302720} +{"current_steps": 320, "total_steps": 11784, "loss": 0.1395, "lr": 5.411365564037319e-07, "epoch": 0.027155465037338764, "percentage": 2.72, "elapsed_time": "0:00:22", "remaining_time": "0:13:10", "throughput": 13934.71, "total_tokens": 307648} +{"current_steps": 325, "total_steps": 11784, "loss": 0.0854, "lr": 5.49618320610687e-07, "epoch": 0.02757976917854718, "percentage": 2.76, "elapsed_time": "0:00:22", "remaining_time": "0:13:10", "throughput": 13952.48, "total_tokens": 312832} +{"current_steps": 330, "total_steps": 11784, "loss": 0.0932, "lr": 5.581000848176421e-07, "epoch": 0.0280040733197556, "percentage": 2.8, "elapsed_time": "0:00:22", "remaining_time": "0:13:09", "throughput": 13952.26, "total_tokens": 317376} +{"current_steps": 335, "total_steps": 11784, "loss": 0.0609, "lr": 5.66581849024597e-07, "epoch": 0.028428377460964018, "percentage": 2.84, "elapsed_time": "0:00:23", "remaining_time": "0:13:09", "throughput": 13970.84, "total_tokens": 322560} +{"current_steps": 340, "total_steps": 11784, "loss": 0.0641, "lr": 5.750636132315522e-07, "epoch": 0.02885268160217244, "percentage": 2.89, "elapsed_time": "0:00:23", "remaining_time": "0:13:08", "throughput": 13970.35, "total_tokens": 327104} +{"current_steps": 345, "total_steps": 11784, "loss": 0.0712, "lr": 5.835453774385072e-07, "epoch": 0.029276985743380855, "percentage": 2.93, "elapsed_time": "0:00:23", "remaining_time": "0:13:07", "throughput": 13982.46, "total_tokens": 332160} +{"current_steps": 350, "total_steps": 11784, "loss": 0.0986, "lr": 5.920271416454622e-07, "epoch": 0.029701289884589272, "percentage": 2.97, "elapsed_time": "0:00:24", "remaining_time": "0:13:06", "throughput": 13988.07, "total_tokens": 336960} +{"current_steps": 355, "total_steps": 11784, "loss": 0.1444, "lr": 6.005089058524173e-07, "epoch": 0.030125594025797692, "percentage": 3.01, "elapsed_time": "0:00:24", "remaining_time": "0:13:06", "throughput": 13993.35, "total_tokens": 341696} +{"current_steps": 360, "total_steps": 11784, "loss": 0.1736, "lr": 6.089906700593723e-07, "epoch": 0.03054989816700611, "percentage": 3.05, "elapsed_time": "0:00:24", "remaining_time": "0:13:05", "throughput": 14015.1, "total_tokens": 347008} +{"current_steps": 365, "total_steps": 11784, "loss": 0.1123, "lr": 6.174724342663274e-07, "epoch": 0.03097420230821453, "percentage": 3.1, "elapsed_time": "0:00:25", "remaining_time": "0:13:05", "throughput": 14033.99, "total_tokens": 352256} +{"current_steps": 370, "total_steps": 11784, "loss": 0.0541, "lr": 6.259541984732824e-07, "epoch": 0.03139850644942294, "percentage": 3.14, "elapsed_time": "0:00:25", "remaining_time": "0:13:04", "throughput": 14046.93, "total_tokens": 357312} +{"current_steps": 375, "total_steps": 11784, "loss": 0.151, "lr": 6.344359626802375e-07, "epoch": 0.03182281059063136, "percentage": 3.18, "elapsed_time": "0:00:25", "remaining_time": "0:13:03", "throughput": 14040.47, "total_tokens": 361728} +{"current_steps": 380, "total_steps": 11784, "loss": 0.1249, "lr": 6.429177268871925e-07, "epoch": 0.032247114731839784, "percentage": 3.22, "elapsed_time": "0:00:26", "remaining_time": "0:13:03", "throughput": 14047.99, "total_tokens": 366592} +{"current_steps": 385, "total_steps": 11784, "loss": 0.2083, "lr": 6.513994910941476e-07, "epoch": 0.032671418873048204, "percentage": 3.27, "elapsed_time": "0:00:26", "remaining_time": "0:13:02", "throughput": 14053.63, "total_tokens": 371392} +{"current_steps": 390, "total_steps": 11784, "loss": 0.0916, "lr": 6.598812553011026e-07, "epoch": 0.03309572301425662, "percentage": 3.31, "elapsed_time": "0:00:26", "remaining_time": "0:13:02", "throughput": 14069.94, "total_tokens": 376640} +{"current_steps": 395, "total_steps": 11784, "loss": 0.1429, "lr": 6.683630195080576e-07, "epoch": 0.03352002715546504, "percentage": 3.35, "elapsed_time": "0:00:27", "remaining_time": "0:13:01", "throughput": 14075.44, "total_tokens": 381504} +{"current_steps": 400, "total_steps": 11784, "loss": 0.1211, "lr": 6.768447837150128e-07, "epoch": 0.03394433129667346, "percentage": 3.39, "elapsed_time": "0:00:27", "remaining_time": "0:13:00", "throughput": 14068.86, "total_tokens": 385920} +{"current_steps": 405, "total_steps": 11784, "loss": 0.1086, "lr": 6.853265479219677e-07, "epoch": 0.03436863543788187, "percentage": 3.44, "elapsed_time": "0:00:27", "remaining_time": "0:12:59", "throughput": 14060.26, "total_tokens": 390272} +{"current_steps": 410, "total_steps": 11784, "loss": 0.1208, "lr": 6.938083121289228e-07, "epoch": 0.03479293957909029, "percentage": 3.48, "elapsed_time": "0:00:28", "remaining_time": "0:12:59", "throughput": 14091.23, "total_tokens": 396160} +{"current_steps": 415, "total_steps": 11784, "loss": 0.0872, "lr": 7.022900763358778e-07, "epoch": 0.03521724372029871, "percentage": 3.52, "elapsed_time": "0:00:28", "remaining_time": "0:12:59", "throughput": 14089.86, "total_tokens": 400768} +{"current_steps": 420, "total_steps": 11784, "loss": 0.0613, "lr": 7.107718405428329e-07, "epoch": 0.035641547861507125, "percentage": 3.56, "elapsed_time": "0:00:28", "remaining_time": "0:12:58", "throughput": 14090.6, "total_tokens": 405504} +{"current_steps": 425, "total_steps": 11784, "loss": 0.0373, "lr": 7.192536047497879e-07, "epoch": 0.036065852002715545, "percentage": 3.61, "elapsed_time": "0:00:29", "remaining_time": "0:12:57", "throughput": 14092.13, "total_tokens": 410176} +{"current_steps": 430, "total_steps": 11784, "loss": 0.0669, "lr": 7.277353689567429e-07, "epoch": 0.036490156143923966, "percentage": 3.65, "elapsed_time": "0:00:29", "remaining_time": "0:12:57", "throughput": 14098.22, "total_tokens": 415040} +{"current_steps": 435, "total_steps": 11784, "loss": 0.1154, "lr": 7.36217133163698e-07, "epoch": 0.036914460285132386, "percentage": 3.69, "elapsed_time": "0:00:29", "remaining_time": "0:12:56", "throughput": 14106.29, "total_tokens": 419968} +{"current_steps": 440, "total_steps": 11784, "loss": 0.2025, "lr": 7.446988973706531e-07, "epoch": 0.0373387644263408, "percentage": 3.73, "elapsed_time": "0:00:30", "remaining_time": "0:12:56", "throughput": 14110.88, "total_tokens": 424832} +{"current_steps": 445, "total_steps": 11784, "loss": 0.3266, "lr": 7.531806615776081e-07, "epoch": 0.03776306856754922, "percentage": 3.78, "elapsed_time": "0:00:30", "remaining_time": "0:12:57", "throughput": 14164.28, "total_tokens": 432064} +{"current_steps": 450, "total_steps": 11784, "loss": 0.0826, "lr": 7.616624257845632e-07, "epoch": 0.03818737270875764, "percentage": 3.82, "elapsed_time": "0:00:30", "remaining_time": "0:12:56", "throughput": 14174.06, "total_tokens": 437184} +{"current_steps": 455, "total_steps": 11784, "loss": 0.102, "lr": 7.701441899915182e-07, "epoch": 0.03861167684996605, "percentage": 3.86, "elapsed_time": "0:00:31", "remaining_time": "0:12:56", "throughput": 14187.76, "total_tokens": 442432} +{"current_steps": 460, "total_steps": 11784, "loss": 0.0454, "lr": 7.786259541984732e-07, "epoch": 0.039035980991174474, "percentage": 3.9, "elapsed_time": "0:00:31", "remaining_time": "0:12:55", "throughput": 14186.72, "total_tokens": 447040} +{"current_steps": 465, "total_steps": 11784, "loss": 0.1319, "lr": 7.871077184054283e-07, "epoch": 0.039460285132382894, "percentage": 3.95, "elapsed_time": "0:00:31", "remaining_time": "0:12:55", "throughput": 14194.74, "total_tokens": 452032} +{"current_steps": 470, "total_steps": 11784, "loss": 0.0636, "lr": 7.955894826123833e-07, "epoch": 0.03988458927359131, "percentage": 3.99, "elapsed_time": "0:00:32", "remaining_time": "0:12:54", "throughput": 14206.7, "total_tokens": 457344} +{"current_steps": 475, "total_steps": 11784, "loss": 0.1349, "lr": 8.040712468193384e-07, "epoch": 0.04030889341479973, "percentage": 4.03, "elapsed_time": "0:00:32", "remaining_time": "0:12:54", "throughput": 14213.33, "total_tokens": 462336} +{"current_steps": 480, "total_steps": 11784, "loss": 0.0904, "lr": 8.125530110262935e-07, "epoch": 0.04073319755600815, "percentage": 4.07, "elapsed_time": "0:00:32", "remaining_time": "0:12:53", "throughput": 14213.81, "total_tokens": 467072} +{"current_steps": 485, "total_steps": 11784, "loss": 0.0983, "lr": 8.210347752332485e-07, "epoch": 0.04115750169721656, "percentage": 4.12, "elapsed_time": "0:00:33", "remaining_time": "0:12:53", "throughput": 14210.51, "total_tokens": 471616} +{"current_steps": 490, "total_steps": 11784, "loss": 0.0815, "lr": 8.295165394402035e-07, "epoch": 0.04158180583842498, "percentage": 4.16, "elapsed_time": "0:00:33", "remaining_time": "0:12:52", "throughput": 14214.55, "total_tokens": 476480} +{"current_steps": 495, "total_steps": 11784, "loss": 0.0929, "lr": 8.379983036471586e-07, "epoch": 0.0420061099796334, "percentage": 4.2, "elapsed_time": "0:00:33", "remaining_time": "0:12:52", "throughput": 14220.88, "total_tokens": 481536} +{"current_steps": 500, "total_steps": 11784, "loss": 0.1216, "lr": 8.464800678541136e-07, "epoch": 0.04243041412084182, "percentage": 4.24, "elapsed_time": "0:00:34", "remaining_time": "0:12:51", "throughput": 14223.18, "total_tokens": 486336} +{"current_steps": 505, "total_steps": 11784, "loss": 0.1143, "lr": 8.549618320610686e-07, "epoch": 0.042854718262050236, "percentage": 4.29, "elapsed_time": "0:00:34", "remaining_time": "0:12:50", "throughput": 14221.95, "total_tokens": 490944} +{"current_steps": 510, "total_steps": 11784, "loss": 0.0585, "lr": 8.634435962680237e-07, "epoch": 0.043279022403258656, "percentage": 4.33, "elapsed_time": "0:00:34", "remaining_time": "0:12:50", "throughput": 14218.6, "total_tokens": 495488} +{"current_steps": 515, "total_steps": 11784, "loss": 0.1, "lr": 8.719253604749788e-07, "epoch": 0.043703326544467076, "percentage": 4.37, "elapsed_time": "0:00:35", "remaining_time": "0:12:49", "throughput": 14211.52, "total_tokens": 499840} +{"current_steps": 520, "total_steps": 11784, "loss": 0.0818, "lr": 8.804071246819338e-07, "epoch": 0.04412763068567549, "percentage": 4.41, "elapsed_time": "0:00:35", "remaining_time": "0:12:48", "throughput": 14211.93, "total_tokens": 504512} +{"current_steps": 525, "total_steps": 11784, "loss": 0.0705, "lr": 8.888888888888888e-07, "epoch": 0.04455193482688391, "percentage": 4.46, "elapsed_time": "0:00:35", "remaining_time": "0:12:48", "throughput": 14215.54, "total_tokens": 509376} +{"current_steps": 530, "total_steps": 11784, "loss": 0.07, "lr": 8.973706530958439e-07, "epoch": 0.04497623896809233, "percentage": 4.5, "elapsed_time": "0:00:36", "remaining_time": "0:12:47", "throughput": 14210.53, "total_tokens": 513856} +{"current_steps": 535, "total_steps": 11784, "loss": 0.107, "lr": 9.058524173027989e-07, "epoch": 0.045400543109300744, "percentage": 4.54, "elapsed_time": "0:00:36", "remaining_time": "0:12:47", "throughput": 14215.99, "total_tokens": 518976} +{"current_steps": 540, "total_steps": 11784, "loss": 0.0958, "lr": 9.143341815097539e-07, "epoch": 0.045824847250509164, "percentage": 4.58, "elapsed_time": "0:00:36", "remaining_time": "0:12:47", "throughput": 14224.12, "total_tokens": 524160} +{"current_steps": 545, "total_steps": 11784, "loss": 0.1709, "lr": 9.228159457167091e-07, "epoch": 0.046249151391717584, "percentage": 4.62, "elapsed_time": "0:00:37", "remaining_time": "0:12:46", "throughput": 14229.62, "total_tokens": 529152} +{"current_steps": 550, "total_steps": 11784, "loss": 0.1825, "lr": 9.312977099236641e-07, "epoch": 0.046673455532926005, "percentage": 4.67, "elapsed_time": "0:00:37", "remaining_time": "0:12:46", "throughput": 14230.4, "total_tokens": 533824} +{"current_steps": 555, "total_steps": 11784, "loss": 0.0982, "lr": 9.397794741306191e-07, "epoch": 0.04709775967413442, "percentage": 4.71, "elapsed_time": "0:00:37", "remaining_time": "0:12:45", "throughput": 14230.74, "total_tokens": 538560} +{"current_steps": 560, "total_steps": 11784, "loss": 0.1049, "lr": 9.482612383375742e-07, "epoch": 0.04752206381534284, "percentage": 4.75, "elapsed_time": "0:00:38", "remaining_time": "0:12:44", "throughput": 14221.97, "total_tokens": 542784} +{"current_steps": 565, "total_steps": 11784, "loss": 0.1076, "lr": 9.567430025445291e-07, "epoch": 0.04794636795655126, "percentage": 4.79, "elapsed_time": "0:00:38", "remaining_time": "0:12:44", "throughput": 14229.42, "total_tokens": 547840} +{"current_steps": 570, "total_steps": 11784, "loss": 0.0785, "lr": 9.652247667514842e-07, "epoch": 0.04837067209775967, "percentage": 4.84, "elapsed_time": "0:00:38", "remaining_time": "0:12:43", "throughput": 14229.03, "total_tokens": 552448} +{"current_steps": 575, "total_steps": 11784, "loss": 0.1165, "lr": 9.737065309584394e-07, "epoch": 0.04879497623896809, "percentage": 4.88, "elapsed_time": "0:00:39", "remaining_time": "0:12:43", "throughput": 14229.26, "total_tokens": 557120} +{"current_steps": 580, "total_steps": 11784, "loss": 0.1119, "lr": 9.821882951653943e-07, "epoch": 0.04921928038017651, "percentage": 4.92, "elapsed_time": "0:00:39", "remaining_time": "0:12:42", "throughput": 14224.49, "total_tokens": 561536} +{"current_steps": 585, "total_steps": 11784, "loss": 0.0704, "lr": 9.906700593723493e-07, "epoch": 0.049643584521384926, "percentage": 4.96, "elapsed_time": "0:00:39", "remaining_time": "0:12:42", "throughput": 14225.69, "total_tokens": 566336} +{"current_steps": 590, "total_steps": 11784, "loss": 0.0929, "lr": 9.991518235793044e-07, "epoch": 0.050067888662593346, "percentage": 5.01, "elapsed_time": "0:00:40", "remaining_time": "0:12:41", "throughput": 14227.18, "total_tokens": 571072} +{"current_steps": 590, "total_steps": 11784, "eval_loss": 0.08071617037057877, "epoch": 0.050067888662593346, "percentage": 5.01, "elapsed_time": "0:00:55", "remaining_time": "0:17:39", "throughput": 10228.84, "total_tokens": 571072} +{"current_steps": 595, "total_steps": 11784, "loss": 0.0317, "lr": 1.0076335877862595e-06, "epoch": 0.050492192803801766, "percentage": 5.05, "elapsed_time": "0:01:43", "remaining_time": "0:32:29", "throughput": 5558.41, "total_tokens": 576192} +{"current_steps": 600, "total_steps": 11784, "loss": 0.1199, "lr": 1.0161153519932147e-06, "epoch": 0.05091649694501019, "percentage": 5.09, "elapsed_time": "0:01:43", "remaining_time": "0:32:18", "throughput": 5586.21, "total_tokens": 580928} +{"current_steps": 605, "total_steps": 11784, "loss": 0.1215, "lr": 1.0245971162001696e-06, "epoch": 0.0513408010862186, "percentage": 5.13, "elapsed_time": "0:01:44", "remaining_time": "0:32:07", "throughput": 5614.47, "total_tokens": 585728} +{"current_steps": 610, "total_steps": 11784, "loss": 0.0926, "lr": 1.0330788804071246e-06, "epoch": 0.05176510522742702, "percentage": 5.18, "elapsed_time": "0:01:44", "remaining_time": "0:31:57", "throughput": 5646.62, "total_tokens": 591040} +{"current_steps": 615, "total_steps": 11784, "loss": 0.1686, "lr": 1.0415606446140797e-06, "epoch": 0.05218940936863544, "percentage": 5.22, "elapsed_time": "0:01:44", "remaining_time": "0:31:46", "throughput": 5672.42, "total_tokens": 595584} +{"current_steps": 620, "total_steps": 11784, "loss": 0.1433, "lr": 1.0500424088210348e-06, "epoch": 0.052613713509843854, "percentage": 5.26, "elapsed_time": "0:01:45", "remaining_time": "0:31:36", "throughput": 5700.08, "total_tokens": 600384} +{"current_steps": 625, "total_steps": 11784, "loss": 0.058, "lr": 1.0585241730279896e-06, "epoch": 0.053038017651052274, "percentage": 5.3, "elapsed_time": "0:01:45", "remaining_time": "0:31:26", "throughput": 5727.97, "total_tokens": 605248} +{"current_steps": 630, "total_steps": 11784, "loss": 0.0805, "lr": 1.0670059372349449e-06, "epoch": 0.053462321792260695, "percentage": 5.35, "elapsed_time": "0:01:45", "remaining_time": "0:31:16", "throughput": 5753.59, "total_tokens": 609856} +{"current_steps": 635, "total_steps": 11784, "loss": 0.0443, "lr": 1.0754877014419e-06, "epoch": 0.05388662593346911, "percentage": 5.39, "elapsed_time": "0:01:46", "remaining_time": "0:31:06", "throughput": 5783.56, "total_tokens": 614976} +{"current_steps": 640, "total_steps": 11784, "loss": 0.0862, "lr": 1.083969465648855e-06, "epoch": 0.05431093007467753, "percentage": 5.43, "elapsed_time": "0:01:46", "remaining_time": "0:30:57", "throughput": 5809.5, "total_tokens": 619648} +{"current_steps": 645, "total_steps": 11784, "loss": 0.0478, "lr": 1.09245122985581e-06, "epoch": 0.05473523421588595, "percentage": 5.47, "elapsed_time": "0:01:47", "remaining_time": "0:30:47", "throughput": 5840.05, "total_tokens": 624896} +{"current_steps": 650, "total_steps": 11784, "loss": 0.1452, "lr": 1.1009329940627649e-06, "epoch": 0.05515953835709436, "percentage": 5.52, "elapsed_time": "0:01:47", "remaining_time": "0:30:38", "throughput": 5866.32, "total_tokens": 629632} +{"current_steps": 655, "total_steps": 11784, "loss": 0.0974, "lr": 1.10941475826972e-06, "epoch": 0.05558384249830278, "percentage": 5.56, "elapsed_time": "0:01:47", "remaining_time": "0:30:29", "throughput": 5894.25, "total_tokens": 634624} +{"current_steps": 660, "total_steps": 11784, "loss": 0.0974, "lr": 1.1178965224766752e-06, "epoch": 0.0560081466395112, "percentage": 5.6, "elapsed_time": "0:01:48", "remaining_time": "0:30:20", "throughput": 5919.91, "total_tokens": 639360} +{"current_steps": 665, "total_steps": 11784, "loss": 0.038, "lr": 1.1263782866836303e-06, "epoch": 0.05643245078071962, "percentage": 5.64, "elapsed_time": "0:01:48", "remaining_time": "0:30:11", "throughput": 5944.99, "total_tokens": 644032} +{"current_steps": 670, "total_steps": 11784, "loss": 0.0969, "lr": 1.1348600508905853e-06, "epoch": 0.056856754921928036, "percentage": 5.69, "elapsed_time": "0:01:48", "remaining_time": "0:30:02", "throughput": 5966.36, "total_tokens": 648256} +{"current_steps": 675, "total_steps": 11784, "loss": 0.1091, "lr": 1.1433418150975402e-06, "epoch": 0.05728105906313646, "percentage": 5.73, "elapsed_time": "0:01:48", "remaining_time": "0:29:53", "throughput": 5992.28, "total_tokens": 653056} +{"current_steps": 680, "total_steps": 11784, "loss": 0.0743, "lr": 1.1518235793044952e-06, "epoch": 0.05770536320434488, "percentage": 5.77, "elapsed_time": "0:01:49", "remaining_time": "0:29:44", "throughput": 6016.63, "total_tokens": 657664} +{"current_steps": 685, "total_steps": 11784, "loss": 0.0379, "lr": 1.1603053435114503e-06, "epoch": 0.05812966734555329, "percentage": 5.81, "elapsed_time": "0:01:49", "remaining_time": "0:29:36", "throughput": 6038.61, "total_tokens": 662016} +{"current_steps": 690, "total_steps": 11784, "loss": 0.1067, "lr": 1.1687871077184053e-06, "epoch": 0.05855397148676171, "percentage": 5.86, "elapsed_time": "0:01:49", "remaining_time": "0:29:27", "throughput": 6060.17, "total_tokens": 666368} +{"current_steps": 695, "total_steps": 11784, "loss": 0.1044, "lr": 1.1772688719253606e-06, "epoch": 0.05897827562797013, "percentage": 5.9, "elapsed_time": "0:01:50", "remaining_time": "0:29:19", "throughput": 6088.71, "total_tokens": 671616} +{"current_steps": 700, "total_steps": 11784, "loss": 0.0405, "lr": 1.1857506361323155e-06, "epoch": 0.059402579769178544, "percentage": 5.94, "elapsed_time": "0:01:50", "remaining_time": "0:29:11", "throughput": 6112.62, "total_tokens": 676288} +{"current_steps": 705, "total_steps": 11784, "loss": 0.0789, "lr": 1.1942324003392705e-06, "epoch": 0.059826883910386965, "percentage": 5.98, "elapsed_time": "0:01:50", "remaining_time": "0:29:03", "throughput": 6136.24, "total_tokens": 680960} +{"current_steps": 710, "total_steps": 11784, "loss": 0.0686, "lr": 1.2027141645462256e-06, "epoch": 0.060251188051595385, "percentage": 6.03, "elapsed_time": "0:01:51", "remaining_time": "0:28:56", "throughput": 6158.19, "total_tokens": 685440} +{"current_steps": 715, "total_steps": 11784, "loss": 0.1027, "lr": 1.2111959287531806e-06, "epoch": 0.060675492192803805, "percentage": 6.07, "elapsed_time": "0:01:51", "remaining_time": "0:28:48", "throughput": 6183.16, "total_tokens": 690304} +{"current_steps": 720, "total_steps": 11784, "loss": 0.0782, "lr": 1.2196776929601355e-06, "epoch": 0.06109979633401222, "percentage": 6.11, "elapsed_time": "0:01:51", "remaining_time": "0:28:40", "throughput": 6207.24, "total_tokens": 695040} +{"current_steps": 725, "total_steps": 11784, "loss": 0.0291, "lr": 1.2281594571670907e-06, "epoch": 0.06152410047522064, "percentage": 6.15, "elapsed_time": "0:01:52", "remaining_time": "0:28:33", "throughput": 6228.44, "total_tokens": 699456} +{"current_steps": 730, "total_steps": 11784, "loss": 0.1563, "lr": 1.2366412213740458e-06, "epoch": 0.06194840461642906, "percentage": 6.19, "elapsed_time": "0:01:52", "remaining_time": "0:28:25", "throughput": 6250.99, "total_tokens": 704064} +{"current_steps": 735, "total_steps": 11784, "loss": 0.0311, "lr": 1.2451229855810009e-06, "epoch": 0.06237270875763747, "percentage": 6.24, "elapsed_time": "0:01:52", "remaining_time": "0:28:18", "throughput": 6272.56, "total_tokens": 708544} +{"current_steps": 740, "total_steps": 11784, "loss": 0.2106, "lr": 1.253604749787956e-06, "epoch": 0.06279701289884589, "percentage": 6.28, "elapsed_time": "0:01:53", "remaining_time": "0:28:10", "throughput": 6294.26, "total_tokens": 713088} +{"current_steps": 745, "total_steps": 11784, "loss": 0.1061, "lr": 1.2620865139949108e-06, "epoch": 0.0632213170400543, "percentage": 6.32, "elapsed_time": "0:01:53", "remaining_time": "0:28:03", "throughput": 6318.93, "total_tokens": 718016} +{"current_steps": 750, "total_steps": 11784, "loss": 0.042, "lr": 1.2705682782018658e-06, "epoch": 0.06364562118126273, "percentage": 6.36, "elapsed_time": "0:01:53", "remaining_time": "0:27:56", "throughput": 6342.0, "total_tokens": 722752} +{"current_steps": 755, "total_steps": 11784, "loss": 0.0744, "lr": 1.279050042408821e-06, "epoch": 0.06406992532247115, "percentage": 6.41, "elapsed_time": "0:01:54", "remaining_time": "0:27:49", "throughput": 6367.84, "total_tokens": 727872} +{"current_steps": 760, "total_steps": 11784, "loss": 0.1042, "lr": 1.2875318066157761e-06, "epoch": 0.06449422946367957, "percentage": 6.45, "elapsed_time": "0:01:54", "remaining_time": "0:27:42", "throughput": 6389.8, "total_tokens": 732480} +{"current_steps": 765, "total_steps": 11784, "loss": 0.0475, "lr": 1.2960135708227312e-06, "epoch": 0.06491853360488799, "percentage": 6.49, "elapsed_time": "0:01:54", "remaining_time": "0:27:35", "throughput": 6408.55, "total_tokens": 736704} +{"current_steps": 770, "total_steps": 11784, "loss": 0.0575, "lr": 1.304495335029686e-06, "epoch": 0.06534283774609641, "percentage": 6.53, "elapsed_time": "0:01:55", "remaining_time": "0:27:29", "throughput": 6441.48, "total_tokens": 742848} +{"current_steps": 775, "total_steps": 11784, "loss": 0.1154, "lr": 1.3129770992366411e-06, "epoch": 0.06576714188730481, "percentage": 6.58, "elapsed_time": "0:01:55", "remaining_time": "0:27:22", "throughput": 6459.82, "total_tokens": 747072} +{"current_steps": 780, "total_steps": 11784, "loss": 0.0311, "lr": 1.3214588634435962e-06, "epoch": 0.06619144602851323, "percentage": 6.62, "elapsed_time": "0:01:55", "remaining_time": "0:27:16", "throughput": 6482.11, "total_tokens": 751808} +{"current_steps": 785, "total_steps": 11784, "loss": 0.0878, "lr": 1.3299406276505512e-06, "epoch": 0.06661575016972165, "percentage": 6.66, "elapsed_time": "0:01:56", "remaining_time": "0:27:09", "throughput": 6505.95, "total_tokens": 756800} +{"current_steps": 790, "total_steps": 11784, "loss": 0.1431, "lr": 1.3384223918575063e-06, "epoch": 0.06704005431093008, "percentage": 6.7, "elapsed_time": "0:01:56", "remaining_time": "0:27:03", "throughput": 6528.21, "total_tokens": 761536} +{"current_steps": 795, "total_steps": 11784, "loss": 0.1062, "lr": 1.3469041560644613e-06, "epoch": 0.0674643584521385, "percentage": 6.75, "elapsed_time": "0:01:56", "remaining_time": "0:26:56", "throughput": 6546.68, "total_tokens": 765824} +{"current_steps": 800, "total_steps": 11784, "loss": 0.3519, "lr": 1.3553859202714164e-06, "epoch": 0.06788866259334692, "percentage": 6.79, "elapsed_time": "0:01:57", "remaining_time": "0:26:50", "throughput": 6566.18, "total_tokens": 770240} +{"current_steps": 805, "total_steps": 11784, "loss": 0.1262, "lr": 1.3638676844783715e-06, "epoch": 0.06831296673455534, "percentage": 6.83, "elapsed_time": "0:01:57", "remaining_time": "0:26:44", "throughput": 6591.0, "total_tokens": 775424} +{"current_steps": 810, "total_steps": 11784, "loss": 0.061, "lr": 1.3723494486853265e-06, "epoch": 0.06873727087576374, "percentage": 6.87, "elapsed_time": "0:01:57", "remaining_time": "0:26:38", "throughput": 6610.5, "total_tokens": 779904} +{"current_steps": 815, "total_steps": 11784, "loss": 0.0747, "lr": 1.3808312128922814e-06, "epoch": 0.06916157501697216, "percentage": 6.92, "elapsed_time": "0:01:58", "remaining_time": "0:26:32", "throughput": 6631.16, "total_tokens": 784512} +{"current_steps": 820, "total_steps": 11784, "loss": 0.078, "lr": 1.3893129770992366e-06, "epoch": 0.06958587915818058, "percentage": 6.96, "elapsed_time": "0:01:58", "remaining_time": "0:26:26", "throughput": 6653.79, "total_tokens": 789440} +{"current_steps": 825, "total_steps": 11784, "loss": 0.1098, "lr": 1.3977947413061917e-06, "epoch": 0.070010183299389, "percentage": 7.0, "elapsed_time": "0:01:58", "remaining_time": "0:26:20", "throughput": 6675.56, "total_tokens": 794240} +{"current_steps": 830, "total_steps": 11784, "loss": 0.139, "lr": 1.4062765055131467e-06, "epoch": 0.07043448744059742, "percentage": 7.04, "elapsed_time": "0:01:59", "remaining_time": "0:26:14", "throughput": 6693.91, "total_tokens": 798592} +{"current_steps": 835, "total_steps": 11784, "loss": 0.1213, "lr": 1.4147582697201018e-06, "epoch": 0.07085879158180584, "percentage": 7.09, "elapsed_time": "0:01:59", "remaining_time": "0:26:08", "throughput": 6718.06, "total_tokens": 803776} +{"current_steps": 840, "total_steps": 11784, "loss": 0.0552, "lr": 1.4232400339270566e-06, "epoch": 0.07128309572301425, "percentage": 7.13, "elapsed_time": "0:01:59", "remaining_time": "0:26:03", "throughput": 6743.12, "total_tokens": 809088} +{"current_steps": 845, "total_steps": 11784, "loss": 0.0523, "lr": 1.4317217981340117e-06, "epoch": 0.07170739986422267, "percentage": 7.17, "elapsed_time": "0:02:00", "remaining_time": "0:25:57", "throughput": 6763.99, "total_tokens": 813824} +{"current_steps": 850, "total_steps": 11784, "loss": 0.0617, "lr": 1.440203562340967e-06, "epoch": 0.07213170400543109, "percentage": 7.21, "elapsed_time": "0:02:00", "remaining_time": "0:25:51", "throughput": 6782.42, "total_tokens": 818240} +{"current_steps": 855, "total_steps": 11784, "loss": 0.1225, "lr": 1.448685326547922e-06, "epoch": 0.07255600814663951, "percentage": 7.26, "elapsed_time": "0:02:00", "remaining_time": "0:25:46", "throughput": 6798.81, "total_tokens": 822400} +{"current_steps": 860, "total_steps": 11784, "loss": 0.1247, "lr": 1.457167090754877e-06, "epoch": 0.07298031228784793, "percentage": 7.3, "elapsed_time": "0:02:01", "remaining_time": "0:25:40", "throughput": 6820.58, "total_tokens": 827328} +{"current_steps": 865, "total_steps": 11784, "loss": 0.1375, "lr": 1.465648854961832e-06, "epoch": 0.07340461642905635, "percentage": 7.34, "elapsed_time": "0:02:01", "remaining_time": "0:25:35", "throughput": 6842.66, "total_tokens": 832320} +{"current_steps": 870, "total_steps": 11784, "loss": 0.0976, "lr": 1.474130619168787e-06, "epoch": 0.07382892057026477, "percentage": 7.38, "elapsed_time": "0:02:01", "remaining_time": "0:25:30", "throughput": 6864.64, "total_tokens": 837376} +{"current_steps": 875, "total_steps": 11784, "loss": 0.0646, "lr": 1.482612383375742e-06, "epoch": 0.07425322471147318, "percentage": 7.43, "elapsed_time": "0:02:02", "remaining_time": "0:25:25", "throughput": 6890.63, "total_tokens": 843008} +{"current_steps": 880, "total_steps": 11784, "loss": 0.1057, "lr": 1.491094147582697e-06, "epoch": 0.0746775288526816, "percentage": 7.47, "elapsed_time": "0:02:02", "remaining_time": "0:25:20", "throughput": 6910.11, "total_tokens": 847680} +{"current_steps": 885, "total_steps": 11784, "loss": 0.0703, "lr": 1.4995759117896522e-06, "epoch": 0.07510183299389002, "percentage": 7.51, "elapsed_time": "0:02:03", "remaining_time": "0:25:14", "throughput": 6929.08, "total_tokens": 852288} +{"current_steps": 890, "total_steps": 11784, "loss": 0.0832, "lr": 1.5080576759966072e-06, "epoch": 0.07552613713509844, "percentage": 7.55, "elapsed_time": "0:02:03", "remaining_time": "0:25:09", "throughput": 6950.65, "total_tokens": 857280} +{"current_steps": 895, "total_steps": 11784, "loss": 0.0415, "lr": 1.5165394402035623e-06, "epoch": 0.07595044127630686, "percentage": 7.6, "elapsed_time": "0:02:03", "remaining_time": "0:25:04", "throughput": 6970.53, "total_tokens": 862080} +{"current_steps": 900, "total_steps": 11784, "loss": 0.1667, "lr": 1.5250212044105173e-06, "epoch": 0.07637474541751528, "percentage": 7.64, "elapsed_time": "0:02:03", "remaining_time": "0:24:59", "throughput": 6988.95, "total_tokens": 866624} +{"current_steps": 905, "total_steps": 11784, "loss": 0.0798, "lr": 1.5335029686174724e-06, "epoch": 0.07679904955872369, "percentage": 7.68, "elapsed_time": "0:02:04", "remaining_time": "0:24:54", "throughput": 7008.39, "total_tokens": 871360} +{"current_steps": 910, "total_steps": 11784, "loss": 0.1418, "lr": 1.5419847328244272e-06, "epoch": 0.0772233536999321, "percentage": 7.72, "elapsed_time": "0:02:04", "remaining_time": "0:24:50", "throughput": 7037.77, "total_tokens": 877632} +{"current_steps": 915, "total_steps": 11784, "loss": 0.0659, "lr": 1.5504664970313825e-06, "epoch": 0.07764765784114053, "percentage": 7.76, "elapsed_time": "0:02:05", "remaining_time": "0:24:45", "throughput": 7051.66, "total_tokens": 881600} +{"current_steps": 920, "total_steps": 11784, "loss": 0.0591, "lr": 1.5589482612383376e-06, "epoch": 0.07807196198234895, "percentage": 7.81, "elapsed_time": "0:02:05", "remaining_time": "0:24:40", "throughput": 7071.3, "total_tokens": 886400} +{"current_steps": 925, "total_steps": 11784, "loss": 0.0906, "lr": 1.5674300254452926e-06, "epoch": 0.07849626612355737, "percentage": 7.85, "elapsed_time": "0:02:05", "remaining_time": "0:24:35", "throughput": 7088.17, "total_tokens": 890880} +{"current_steps": 930, "total_steps": 11784, "loss": 0.076, "lr": 1.5759117896522477e-06, "epoch": 0.07892057026476579, "percentage": 7.89, "elapsed_time": "0:02:06", "remaining_time": "0:24:30", "throughput": 7107.92, "total_tokens": 895744} +{"current_steps": 935, "total_steps": 11784, "loss": 0.1215, "lr": 1.5843935538592025e-06, "epoch": 0.07934487440597421, "percentage": 7.93, "elapsed_time": "0:02:06", "remaining_time": "0:24:26", "throughput": 7124.84, "total_tokens": 900224} +{"current_steps": 940, "total_steps": 11784, "loss": 0.0687, "lr": 1.5928753180661576e-06, "epoch": 0.07976917854718261, "percentage": 7.98, "elapsed_time": "0:02:06", "remaining_time": "0:24:21", "throughput": 7145.43, "total_tokens": 905280} +{"current_steps": 945, "total_steps": 11784, "loss": 0.0859, "lr": 1.6013570822731128e-06, "epoch": 0.08019348268839104, "percentage": 8.02, "elapsed_time": "0:02:07", "remaining_time": "0:24:17", "throughput": 7166.31, "total_tokens": 910336} +{"current_steps": 950, "total_steps": 11784, "loss": 0.0632, "lr": 1.609838846480068e-06, "epoch": 0.08061778682959946, "percentage": 8.06, "elapsed_time": "0:02:07", "remaining_time": "0:24:12", "throughput": 7189.26, "total_tokens": 915776} +{"current_steps": 955, "total_steps": 11784, "loss": 0.0857, "lr": 1.618320610687023e-06, "epoch": 0.08104209097080788, "percentage": 8.1, "elapsed_time": "0:02:07", "remaining_time": "0:24:08", "throughput": 7207.46, "total_tokens": 920512} +{"current_steps": 960, "total_steps": 11784, "loss": 0.1331, "lr": 1.6268023748939778e-06, "epoch": 0.0814663951120163, "percentage": 8.15, "elapsed_time": "0:02:08", "remaining_time": "0:24:03", "throughput": 7223.94, "total_tokens": 924992} +{"current_steps": 965, "total_steps": 11784, "loss": 0.1138, "lr": 1.6352841391009329e-06, "epoch": 0.08189069925322472, "percentage": 8.19, "elapsed_time": "0:02:08", "remaining_time": "0:23:59", "throughput": 7242.45, "total_tokens": 929792} +{"current_steps": 970, "total_steps": 11784, "loss": 0.1123, "lr": 1.643765903307888e-06, "epoch": 0.08231500339443312, "percentage": 8.23, "elapsed_time": "0:02:08", "remaining_time": "0:23:54", "throughput": 7258.27, "total_tokens": 934208} +{"current_steps": 975, "total_steps": 11784, "loss": 0.0767, "lr": 1.652247667514843e-06, "epoch": 0.08273930753564154, "percentage": 8.27, "elapsed_time": "0:02:09", "remaining_time": "0:23:50", "throughput": 7274.01, "total_tokens": 938624} +{"current_steps": 980, "total_steps": 11784, "loss": 0.0286, "lr": 1.660729431721798e-06, "epoch": 0.08316361167684996, "percentage": 8.32, "elapsed_time": "0:02:09", "remaining_time": "0:23:46", "throughput": 7290.47, "total_tokens": 943168} +{"current_steps": 985, "total_steps": 11784, "loss": 0.0942, "lr": 1.669211195928753e-06, "epoch": 0.08358791581805838, "percentage": 8.36, "elapsed_time": "0:02:09", "remaining_time": "0:23:42", "throughput": 7309.21, "total_tokens": 948032} +{"current_steps": 990, "total_steps": 11784, "loss": 0.0874, "lr": 1.6776929601357082e-06, "epoch": 0.0840122199592668, "percentage": 8.4, "elapsed_time": "0:02:10", "remaining_time": "0:23:38", "throughput": 7335.87, "total_tokens": 954176} +{"current_steps": 995, "total_steps": 11784, "loss": 0.0855, "lr": 1.6861747243426632e-06, "epoch": 0.08443652410047522, "percentage": 8.44, "elapsed_time": "0:02:10", "remaining_time": "0:23:33", "throughput": 7353.48, "total_tokens": 958912} +{"current_steps": 1000, "total_steps": 11784, "loss": 0.1609, "lr": 1.6946564885496183e-06, "epoch": 0.08486082824168364, "percentage": 8.49, "elapsed_time": "0:02:10", "remaining_time": "0:23:29", "throughput": 7368.25, "total_tokens": 963264} +{"current_steps": 1005, "total_steps": 11784, "loss": 0.0747, "lr": 1.7031382527565731e-06, "epoch": 0.08528513238289205, "percentage": 8.53, "elapsed_time": "0:02:11", "remaining_time": "0:23:25", "throughput": 7387.3, "total_tokens": 968256} +{"current_steps": 1010, "total_steps": 11784, "loss": 0.1322, "lr": 1.7116200169635284e-06, "epoch": 0.08570943652410047, "percentage": 8.57, "elapsed_time": "0:02:11", "remaining_time": "0:23:21", "throughput": 7401.92, "total_tokens": 972608} +{"current_steps": 1015, "total_steps": 11784, "loss": 0.0859, "lr": 1.7201017811704834e-06, "epoch": 0.08613374066530889, "percentage": 8.61, "elapsed_time": "0:02:11", "remaining_time": "0:23:17", "throughput": 7422.31, "total_tokens": 977856} +{"current_steps": 1020, "total_steps": 11784, "loss": 0.0987, "lr": 1.7285835453774385e-06, "epoch": 0.08655804480651731, "percentage": 8.66, "elapsed_time": "0:02:12", "remaining_time": "0:23:13", "throughput": 7440.24, "total_tokens": 982720} +{"current_steps": 1025, "total_steps": 11784, "loss": 0.1228, "lr": 1.7370653095843936e-06, "epoch": 0.08698234894772573, "percentage": 8.7, "elapsed_time": "0:02:12", "remaining_time": "0:23:09", "throughput": 7457.84, "total_tokens": 987584} +{"current_steps": 1030, "total_steps": 11784, "loss": 0.082, "lr": 1.7455470737913484e-06, "epoch": 0.08740665308893415, "percentage": 8.74, "elapsed_time": "0:02:12", "remaining_time": "0:23:06", "throughput": 7475.61, "total_tokens": 992448} +{"current_steps": 1035, "total_steps": 11784, "loss": 0.1068, "lr": 1.7540288379983035e-06, "epoch": 0.08783095723014257, "percentage": 8.78, "elapsed_time": "0:02:13", "remaining_time": "0:23:02", "throughput": 7492.6, "total_tokens": 997184} +{"current_steps": 1040, "total_steps": 11784, "loss": 0.0649, "lr": 1.7625106022052587e-06, "epoch": 0.08825526137135098, "percentage": 8.83, "elapsed_time": "0:02:13", "remaining_time": "0:22:58", "throughput": 7512.38, "total_tokens": 1002432} +{"current_steps": 1045, "total_steps": 11784, "loss": 0.0748, "lr": 1.7709923664122138e-06, "epoch": 0.0886795655125594, "percentage": 8.87, "elapsed_time": "0:02:13", "remaining_time": "0:22:54", "throughput": 7529.95, "total_tokens": 1007360} +{"current_steps": 1050, "total_steps": 11784, "loss": 0.0598, "lr": 1.7794741306191686e-06, "epoch": 0.08910386965376782, "percentage": 8.91, "elapsed_time": "0:02:14", "remaining_time": "0:22:51", "throughput": 7545.17, "total_tokens": 1011968} +{"current_steps": 1055, "total_steps": 11784, "loss": 0.0976, "lr": 1.7879558948261237e-06, "epoch": 0.08952817379497624, "percentage": 8.95, "elapsed_time": "0:02:14", "remaining_time": "0:22:47", "throughput": 7562.64, "total_tokens": 1016896} +{"current_steps": 1060, "total_steps": 11784, "loss": 0.1005, "lr": 1.7964376590330787e-06, "epoch": 0.08995247793618466, "percentage": 9.0, "elapsed_time": "0:02:14", "remaining_time": "0:22:43", "throughput": 7581.0, "total_tokens": 1021952} +{"current_steps": 1065, "total_steps": 11784, "loss": 0.1977, "lr": 1.8049194232400338e-06, "epoch": 0.09037678207739308, "percentage": 9.04, "elapsed_time": "0:02:15", "remaining_time": "0:22:40", "throughput": 7596.22, "total_tokens": 1026560} +{"current_steps": 1070, "total_steps": 11784, "loss": 0.0821, "lr": 1.813401187446989e-06, "epoch": 0.09080108621860149, "percentage": 9.08, "elapsed_time": "0:02:15", "remaining_time": "0:22:36", "throughput": 7612.78, "total_tokens": 1031360} +{"current_steps": 1075, "total_steps": 11784, "loss": 0.1014, "lr": 1.821882951653944e-06, "epoch": 0.09122539035980991, "percentage": 9.12, "elapsed_time": "0:02:15", "remaining_time": "0:22:33", "throughput": 7631.25, "total_tokens": 1036480} +{"current_steps": 1080, "total_steps": 11784, "loss": 0.0861, "lr": 1.830364715860899e-06, "epoch": 0.09164969450101833, "percentage": 9.16, "elapsed_time": "0:02:16", "remaining_time": "0:22:29", "throughput": 7646.08, "total_tokens": 1041024} +{"current_steps": 1085, "total_steps": 11784, "loss": 0.0588, "lr": 1.838846480067854e-06, "epoch": 0.09207399864222675, "percentage": 9.21, "elapsed_time": "0:02:16", "remaining_time": "0:22:25", "throughput": 7659.19, "total_tokens": 1045312} +{"current_steps": 1090, "total_steps": 11784, "loss": 0.0759, "lr": 1.847328244274809e-06, "epoch": 0.09249830278343517, "percentage": 9.25, "elapsed_time": "0:02:16", "remaining_time": "0:22:22", "throughput": 7676.21, "total_tokens": 1050240} +{"current_steps": 1095, "total_steps": 11784, "loss": 0.09, "lr": 1.8558100084817641e-06, "epoch": 0.09292260692464359, "percentage": 9.29, "elapsed_time": "0:02:17", "remaining_time": "0:22:19", "throughput": 7696.44, "total_tokens": 1055744} +{"current_steps": 1100, "total_steps": 11784, "loss": 0.073, "lr": 1.864291772688719e-06, "epoch": 0.09334691106585201, "percentage": 9.33, "elapsed_time": "0:02:17", "remaining_time": "0:22:15", "throughput": 7711.27, "total_tokens": 1060352} +{"current_steps": 1105, "total_steps": 11784, "loss": 0.1046, "lr": 1.8727735368956743e-06, "epoch": 0.09377121520706042, "percentage": 9.38, "elapsed_time": "0:02:17", "remaining_time": "0:22:12", "throughput": 7729.03, "total_tokens": 1065472} +{"current_steps": 1110, "total_steps": 11784, "loss": 0.0767, "lr": 1.8812553011026293e-06, "epoch": 0.09419551934826884, "percentage": 9.42, "elapsed_time": "0:02:18", "remaining_time": "0:22:08", "throughput": 7744.45, "total_tokens": 1070144} +{"current_steps": 1115, "total_steps": 11784, "loss": 0.0741, "lr": 1.8897370653095844e-06, "epoch": 0.09461982348947726, "percentage": 9.46, "elapsed_time": "0:02:18", "remaining_time": "0:22:05", "throughput": 7758.86, "total_tokens": 1074688} +{"current_steps": 1120, "total_steps": 11784, "loss": 0.0479, "lr": 1.8982188295165394e-06, "epoch": 0.09504412763068568, "percentage": 9.5, "elapsed_time": "0:02:18", "remaining_time": "0:22:01", "throughput": 7771.78, "total_tokens": 1079040} +{"current_steps": 1125, "total_steps": 11784, "loss": 0.0952, "lr": 1.9067005937234943e-06, "epoch": 0.0954684317718941, "percentage": 9.55, "elapsed_time": "0:02:19", "remaining_time": "0:21:58", "throughput": 7785.03, "total_tokens": 1083456} +{"current_steps": 1130, "total_steps": 11784, "loss": 0.1757, "lr": 1.9151823579304493e-06, "epoch": 0.09589273591310252, "percentage": 9.59, "elapsed_time": "0:02:19", "remaining_time": "0:21:55", "throughput": 7799.64, "total_tokens": 1088064} +{"current_steps": 1135, "total_steps": 11784, "loss": 0.0502, "lr": 1.9236641221374044e-06, "epoch": 0.09631704005431092, "percentage": 9.63, "elapsed_time": "0:02:19", "remaining_time": "0:21:51", "throughput": 7813.47, "total_tokens": 1092544} +{"current_steps": 1140, "total_steps": 11784, "loss": 0.0995, "lr": 1.9321458863443595e-06, "epoch": 0.09674134419551934, "percentage": 9.67, "elapsed_time": "0:02:20", "remaining_time": "0:21:48", "throughput": 7831.43, "total_tokens": 1097792} +{"current_steps": 1145, "total_steps": 11784, "loss": 0.1066, "lr": 1.9406276505513145e-06, "epoch": 0.09716564833672776, "percentage": 9.72, "elapsed_time": "0:02:20", "remaining_time": "0:21:45", "throughput": 7848.96, "total_tokens": 1102912} +{"current_steps": 1150, "total_steps": 11784, "loss": 0.0254, "lr": 1.9491094147582696e-06, "epoch": 0.09758995247793618, "percentage": 9.76, "elapsed_time": "0:02:20", "remaining_time": "0:21:42", "throughput": 7865.12, "total_tokens": 1107840} +{"current_steps": 1155, "total_steps": 11784, "loss": 0.1516, "lr": 1.9575911789652246e-06, "epoch": 0.0980142566191446, "percentage": 9.8, "elapsed_time": "0:02:21", "remaining_time": "0:21:39", "throughput": 7879.36, "total_tokens": 1112448} +{"current_steps": 1160, "total_steps": 11784, "loss": 0.1223, "lr": 1.9660729431721797e-06, "epoch": 0.09843856076035302, "percentage": 9.84, "elapsed_time": "0:02:21", "remaining_time": "0:21:36", "throughput": 7894.37, "total_tokens": 1117248} +{"current_steps": 1165, "total_steps": 11784, "loss": 0.089, "lr": 1.9745547073791347e-06, "epoch": 0.09886286490156145, "percentage": 9.89, "elapsed_time": "0:02:21", "remaining_time": "0:21:33", "throughput": 7909.14, "total_tokens": 1121984} +{"current_steps": 1170, "total_steps": 11784, "loss": 0.0988, "lr": 1.98303647158609e-06, "epoch": 0.09928716904276985, "percentage": 9.93, "elapsed_time": "0:02:22", "remaining_time": "0:21:30", "throughput": 7925.64, "total_tokens": 1127040} +{"current_steps": 1175, "total_steps": 11784, "loss": 0.0578, "lr": 1.991518235793045e-06, "epoch": 0.09971147318397827, "percentage": 9.97, "elapsed_time": "0:02:22", "remaining_time": "0:21:26", "throughput": 7941.13, "total_tokens": 1131904} +{"current_steps": 1180, "total_steps": 11784, "loss": 0.1054, "lr": 2e-06, "epoch": 0.10013577732518669, "percentage": 10.01, "elapsed_time": "0:02:22", "remaining_time": "0:21:23", "throughput": 7954.32, "total_tokens": 1136384} +{"current_steps": 1180, "total_steps": 11784, "eval_loss": 0.07076410949230194, "epoch": 0.10013577732518669, "percentage": 10.01, "elapsed_time": "0:02:38", "remaining_time": "0:23:45", "throughput": 7163.83, "total_tokens": 1136384} +{"current_steps": 1185, "total_steps": 11784, "loss": 0.0543, "lr": 1.999998903046209e-06, "epoch": 0.10056008146639511, "percentage": 10.06, "elapsed_time": "0:03:25", "remaining_time": "0:30:38", "throughput": 5549.27, "total_tokens": 1140864} +{"current_steps": 1190, "total_steps": 11784, "loss": 0.1416, "lr": 1.999995612187243e-06, "epoch": 0.10098438560760353, "percentage": 10.1, "elapsed_time": "0:03:25", "remaining_time": "0:30:33", "throughput": 5562.43, "total_tokens": 1145408} +{"current_steps": 1195, "total_steps": 11784, "loss": 0.1497, "lr": 1.9999901274303226e-06, "epoch": 0.10140868974881195, "percentage": 10.14, "elapsed_time": "0:03:26", "remaining_time": "0:30:27", "throughput": 5577.56, "total_tokens": 1150400} +{"current_steps": 1200, "total_steps": 11784, "loss": 0.1094, "lr": 1.9999824487874795e-06, "epoch": 0.10183299389002037, "percentage": 10.18, "elapsed_time": "0:03:26", "remaining_time": "0:30:22", "throughput": 5590.36, "total_tokens": 1154880} +{"current_steps": 1205, "total_steps": 11784, "loss": 0.1046, "lr": 1.999972576275561e-06, "epoch": 0.10225729803122878, "percentage": 10.23, "elapsed_time": "0:03:26", "remaining_time": "0:30:16", "throughput": 5604.03, "total_tokens": 1159552} +{"current_steps": 1210, "total_steps": 11784, "loss": 0.0262, "lr": 1.999960509916226e-06, "epoch": 0.1026816021724372, "percentage": 10.27, "elapsed_time": "0:03:27", "remaining_time": "0:30:11", "throughput": 5620.01, "total_tokens": 1164800} +{"current_steps": 1215, "total_steps": 11784, "loss": 0.0621, "lr": 1.9999462497359463e-06, "epoch": 0.10310590631364562, "percentage": 10.31, "elapsed_time": "0:03:27", "remaining_time": "0:30:05", "throughput": 5637.12, "total_tokens": 1170304} +{"current_steps": 1220, "total_steps": 11784, "loss": 0.0834, "lr": 1.999929795766009e-06, "epoch": 0.10353021045485404, "percentage": 10.35, "elapsed_time": "0:03:27", "remaining_time": "0:30:00", "throughput": 5650.95, "total_tokens": 1175040} +{"current_steps": 1225, "total_steps": 11784, "loss": 0.0045, "lr": 1.999911148042511e-06, "epoch": 0.10395451459606246, "percentage": 10.4, "elapsed_time": "0:03:28", "remaining_time": "0:29:55", "throughput": 5666.88, "total_tokens": 1180288} +{"current_steps": 1230, "total_steps": 11784, "loss": 0.097, "lr": 1.999890306606365e-06, "epoch": 0.10437881873727088, "percentage": 10.44, "elapsed_time": "0:03:28", "remaining_time": "0:29:49", "throughput": 5680.81, "total_tokens": 1185088} +{"current_steps": 1235, "total_steps": 11784, "loss": 0.0987, "lr": 1.9998672715032944e-06, "epoch": 0.10480312287847929, "percentage": 10.48, "elapsed_time": "0:03:28", "remaining_time": "0:29:44", "throughput": 5693.17, "total_tokens": 1189504} +{"current_steps": 1240, "total_steps": 11784, "loss": 0.1065, "lr": 1.999842042783836e-06, "epoch": 0.10522742701968771, "percentage": 10.52, "elapsed_time": "0:03:29", "remaining_time": "0:29:39", "throughput": 5707.04, "total_tokens": 1194304} +{"current_steps": 1245, "total_steps": 11784, "loss": 0.0156, "lr": 1.99981462050334e-06, "epoch": 0.10565173116089613, "percentage": 10.57, "elapsed_time": "0:03:29", "remaining_time": "0:29:34", "throughput": 5720.28, "total_tokens": 1198976} +{"current_steps": 1250, "total_steps": 11784, "loss": 0.0797, "lr": 1.999785004721968e-06, "epoch": 0.10607603530210455, "percentage": 10.61, "elapsed_time": "0:03:29", "remaining_time": "0:29:29", "throughput": 5732.92, "total_tokens": 1203520} +{"current_steps": 1255, "total_steps": 11784, "loss": 0.0947, "lr": 1.9997531955046936e-06, "epoch": 0.10650033944331297, "percentage": 10.65, "elapsed_time": "0:03:30", "remaining_time": "0:29:23", "throughput": 5744.51, "total_tokens": 1207808} +{"current_steps": 1260, "total_steps": 11784, "loss": 0.0938, "lr": 1.9997191929213044e-06, "epoch": 0.10692464358452139, "percentage": 10.69, "elapsed_time": "0:03:30", "remaining_time": "0:29:19", "throughput": 5759.73, "total_tokens": 1212992} +{"current_steps": 1265, "total_steps": 11784, "loss": 0.1488, "lr": 1.999682997046398e-06, "epoch": 0.10734894772572981, "percentage": 10.73, "elapsed_time": "0:03:30", "remaining_time": "0:29:13", "throughput": 5771.49, "total_tokens": 1217344} +{"current_steps": 1270, "total_steps": 11784, "loss": 0.0821, "lr": 1.9996446079593855e-06, "epoch": 0.10777325186693822, "percentage": 10.78, "elapsed_time": "0:03:31", "remaining_time": "0:29:08", "throughput": 5784.93, "total_tokens": 1222080} +{"current_steps": 1275, "total_steps": 11784, "loss": 0.1393, "lr": 1.999604025744489e-06, "epoch": 0.10819755600814664, "percentage": 10.82, "elapsed_time": "0:03:31", "remaining_time": "0:29:03", "throughput": 5797.92, "total_tokens": 1226752} +{"current_steps": 1280, "total_steps": 11784, "loss": 0.1702, "lr": 1.9995612504907414e-06, "epoch": 0.10862186014935506, "percentage": 10.86, "elapsed_time": "0:03:31", "remaining_time": "0:28:59", "throughput": 5812.39, "total_tokens": 1231808} +{"current_steps": 1285, "total_steps": 11784, "loss": 0.1218, "lr": 1.999516282291988e-06, "epoch": 0.10904616429056348, "percentage": 10.9, "elapsed_time": "0:03:32", "remaining_time": "0:28:54", "throughput": 5824.86, "total_tokens": 1236352} +{"current_steps": 1290, "total_steps": 11784, "loss": 0.0983, "lr": 1.9994691212468853e-06, "epoch": 0.1094704684317719, "percentage": 10.95, "elapsed_time": "0:03:32", "remaining_time": "0:28:49", "throughput": 5838.11, "total_tokens": 1241088} +{"current_steps": 1295, "total_steps": 11784, "loss": 0.0506, "lr": 1.9994197674588997e-06, "epoch": 0.10989477257298032, "percentage": 10.99, "elapsed_time": "0:03:32", "remaining_time": "0:28:44", "throughput": 5853.3, "total_tokens": 1246336} +{"current_steps": 1300, "total_steps": 11784, "loss": 0.1021, "lr": 1.999368221036309e-06, "epoch": 0.11031907671418872, "percentage": 11.03, "elapsed_time": "0:03:33", "remaining_time": "0:28:39", "throughput": 5868.82, "total_tokens": 1251648} +{"current_steps": 1305, "total_steps": 11784, "loss": 0.0848, "lr": 1.9993144820922015e-06, "epoch": 0.11074338085539714, "percentage": 11.07, "elapsed_time": "0:03:33", "remaining_time": "0:28:35", "throughput": 5882.1, "total_tokens": 1256448} +{"current_steps": 1310, "total_steps": 11784, "loss": 0.096, "lr": 1.9992585507444757e-06, "epoch": 0.11116768499660556, "percentage": 11.12, "elapsed_time": "0:03:33", "remaining_time": "0:28:30", "throughput": 5895.06, "total_tokens": 1261184} +{"current_steps": 1315, "total_steps": 11784, "loss": 0.1, "lr": 1.999200427115839e-06, "epoch": 0.11159198913781398, "percentage": 11.16, "elapsed_time": "0:03:34", "remaining_time": "0:28:25", "throughput": 5909.56, "total_tokens": 1266304} +{"current_steps": 1320, "total_steps": 11784, "loss": 0.0415, "lr": 1.99914011133381e-06, "epoch": 0.1120162932790224, "percentage": 11.2, "elapsed_time": "0:03:34", "remaining_time": "0:28:21", "throughput": 5921.61, "total_tokens": 1270848} +{"current_steps": 1325, "total_steps": 11784, "loss": 0.0318, "lr": 1.999077603530716e-06, "epoch": 0.11244059742023083, "percentage": 11.24, "elapsed_time": "0:03:34", "remaining_time": "0:28:16", "throughput": 5934.9, "total_tokens": 1275712} +{"current_steps": 1330, "total_steps": 11784, "loss": 0.0425, "lr": 1.999012903843693e-06, "epoch": 0.11286490156143925, "percentage": 11.29, "elapsed_time": "0:03:35", "remaining_time": "0:28:12", "throughput": 5945.86, "total_tokens": 1280000} +{"current_steps": 1335, "total_steps": 11784, "loss": 0.0826, "lr": 1.9989460124146854e-06, "epoch": 0.11328920570264765, "percentage": 11.33, "elapsed_time": "0:03:35", "remaining_time": "0:28:07", "throughput": 5961.5, "total_tokens": 1285440} +{"current_steps": 1340, "total_steps": 11784, "loss": 0.0835, "lr": 1.998876929390448e-06, "epoch": 0.11371350984385607, "percentage": 11.37, "elapsed_time": "0:03:35", "remaining_time": "0:28:03", "throughput": 5974.19, "total_tokens": 1290176} +{"current_steps": 1345, "total_steps": 11784, "loss": 0.108, "lr": 1.9988056549225423e-06, "epoch": 0.11413781398506449, "percentage": 11.41, "elapsed_time": "0:03:36", "remaining_time": "0:27:58", "throughput": 5986.76, "total_tokens": 1294912} +{"current_steps": 1350, "total_steps": 11784, "loss": 0.0703, "lr": 1.9987321891673375e-06, "epoch": 0.11456211812627291, "percentage": 11.46, "elapsed_time": "0:03:36", "remaining_time": "0:27:54", "throughput": 5997.36, "total_tokens": 1299136} +{"current_steps": 1355, "total_steps": 11784, "loss": 0.1112, "lr": 1.9986565322860116e-06, "epoch": 0.11498642226748133, "percentage": 11.5, "elapsed_time": "0:03:36", "remaining_time": "0:27:49", "throughput": 6010.21, "total_tokens": 1303936} +{"current_steps": 1360, "total_steps": 11784, "loss": 0.045, "lr": 1.9985786844445474e-06, "epoch": 0.11541072640868975, "percentage": 11.54, "elapsed_time": "0:03:37", "remaining_time": "0:27:45", "throughput": 6023.86, "total_tokens": 1308928} +{"current_steps": 1365, "total_steps": 11784, "loss": 0.0518, "lr": 1.9984986458137366e-06, "epoch": 0.11583503054989816, "percentage": 11.58, "elapsed_time": "0:03:37", "remaining_time": "0:27:41", "throughput": 6036.62, "total_tokens": 1313728} +{"current_steps": 1370, "total_steps": 11784, "loss": 0.062, "lr": 1.998416416569177e-06, "epoch": 0.11625933469110658, "percentage": 11.63, "elapsed_time": "0:03:37", "remaining_time": "0:27:36", "throughput": 6048.82, "total_tokens": 1318400} +{"current_steps": 1375, "total_steps": 11784, "loss": 0.0946, "lr": 1.9983319968912714e-06, "epoch": 0.116683638832315, "percentage": 11.67, "elapsed_time": "0:03:38", "remaining_time": "0:27:32", "throughput": 6059.82, "total_tokens": 1322752} +{"current_steps": 1380, "total_steps": 11784, "loss": 0.0354, "lr": 1.9982453869652286e-06, "epoch": 0.11710794297352342, "percentage": 11.71, "elapsed_time": "0:03:38", "remaining_time": "0:27:28", "throughput": 6072.63, "total_tokens": 1327552} +{"current_steps": 1385, "total_steps": 11784, "loss": 0.084, "lr": 1.9981565869810637e-06, "epoch": 0.11753224711473184, "percentage": 11.75, "elapsed_time": "0:03:38", "remaining_time": "0:27:23", "throughput": 6082.95, "total_tokens": 1331776} +{"current_steps": 1390, "total_steps": 11784, "loss": 0.0591, "lr": 1.998065597133594e-06, "epoch": 0.11795655125594026, "percentage": 11.8, "elapsed_time": "0:03:39", "remaining_time": "0:27:19", "throughput": 6093.81, "total_tokens": 1336128} +{"current_steps": 1395, "total_steps": 11784, "loss": 0.1068, "lr": 1.9979724176224447e-06, "epoch": 0.11838085539714868, "percentage": 11.84, "elapsed_time": "0:03:39", "remaining_time": "0:27:15", "throughput": 6105.87, "total_tokens": 1340800} +{"current_steps": 1400, "total_steps": 11784, "loss": 0.0982, "lr": 1.997877048652042e-06, "epoch": 0.11880515953835709, "percentage": 11.88, "elapsed_time": "0:03:39", "remaining_time": "0:27:11", "throughput": 6117.61, "total_tokens": 1345408} +{"current_steps": 1405, "total_steps": 11784, "loss": 0.12, "lr": 1.9977794904316163e-06, "epoch": 0.11922946367956551, "percentage": 11.92, "elapsed_time": "0:03:40", "remaining_time": "0:27:07", "throughput": 6130.12, "total_tokens": 1350208} +{"current_steps": 1410, "total_steps": 11784, "loss": 0.0689, "lr": 1.9976797431752023e-06, "epoch": 0.11965376782077393, "percentage": 11.97, "elapsed_time": "0:03:40", "remaining_time": "0:27:02", "throughput": 6141.13, "total_tokens": 1354624} +{"current_steps": 1415, "total_steps": 11784, "loss": 0.0574, "lr": 1.9975778071016357e-06, "epoch": 0.12007807196198235, "percentage": 12.01, "elapsed_time": "0:03:40", "remaining_time": "0:26:58", "throughput": 6152.79, "total_tokens": 1359232} +{"current_steps": 1420, "total_steps": 11784, "loss": 0.0794, "lr": 1.997473682434555e-06, "epoch": 0.12050237610319077, "percentage": 12.05, "elapsed_time": "0:03:41", "remaining_time": "0:26:54", "throughput": 6164.78, "total_tokens": 1363904} +{"current_steps": 1425, "total_steps": 11784, "loss": 0.0577, "lr": 1.9973673694023998e-06, "epoch": 0.12092668024439919, "percentage": 12.09, "elapsed_time": "0:03:41", "remaining_time": "0:26:50", "throughput": 6176.19, "total_tokens": 1368448} +{"current_steps": 1430, "total_steps": 11784, "loss": 0.099, "lr": 1.997258868238411e-06, "epoch": 0.12135098438560761, "percentage": 12.14, "elapsed_time": "0:03:41", "remaining_time": "0:26:46", "throughput": 6187.08, "total_tokens": 1372864} +{"current_steps": 1435, "total_steps": 11784, "loss": 0.0979, "lr": 1.997148179180631e-06, "epoch": 0.12177528852681602, "percentage": 12.18, "elapsed_time": "0:03:42", "remaining_time": "0:26:42", "throughput": 6200.39, "total_tokens": 1377920} +{"current_steps": 1440, "total_steps": 11784, "loss": 0.0951, "lr": 1.9970353024719003e-06, "epoch": 0.12219959266802444, "percentage": 12.22, "elapsed_time": "0:03:42", "remaining_time": "0:26:38", "throughput": 6211.7, "total_tokens": 1382464} +{"current_steps": 1445, "total_steps": 11784, "loss": 0.065, "lr": 1.9969202383598605e-06, "epoch": 0.12262389680923286, "percentage": 12.26, "elapsed_time": "0:03:42", "remaining_time": "0:26:34", "throughput": 6223.2, "total_tokens": 1387072} +{"current_steps": 1450, "total_steps": 11784, "loss": 0.0363, "lr": 1.996802987096952e-06, "epoch": 0.12304820095044128, "percentage": 12.3, "elapsed_time": "0:03:43", "remaining_time": "0:26:30", "throughput": 6233.92, "total_tokens": 1391488} +{"current_steps": 1455, "total_steps": 11784, "loss": 0.1148, "lr": 1.9966835489404123e-06, "epoch": 0.1234725050916497, "percentage": 12.35, "elapsed_time": "0:03:43", "remaining_time": "0:26:27", "throughput": 6250.5, "total_tokens": 1397440} +{"current_steps": 1460, "total_steps": 11784, "loss": 0.0559, "lr": 1.996561924152278e-06, "epoch": 0.12389680923285812, "percentage": 12.39, "elapsed_time": "0:03:43", "remaining_time": "0:26:23", "throughput": 6261.86, "total_tokens": 1402048} +{"current_steps": 1465, "total_steps": 11784, "loss": 0.0275, "lr": 1.996438112999383e-06, "epoch": 0.12432111337406652, "percentage": 12.43, "elapsed_time": "0:03:44", "remaining_time": "0:26:19", "throughput": 6273.79, "total_tokens": 1406784} +{"current_steps": 1470, "total_steps": 11784, "loss": 0.1324, "lr": 1.9963121157533573e-06, "epoch": 0.12474541751527495, "percentage": 12.47, "elapsed_time": "0:03:44", "remaining_time": "0:26:15", "throughput": 6284.81, "total_tokens": 1411328} +{"current_steps": 1475, "total_steps": 11784, "loss": 0.1638, "lr": 1.9961839326906272e-06, "epoch": 0.12516972165648338, "percentage": 12.52, "elapsed_time": "0:03:44", "remaining_time": "0:26:11", "throughput": 6296.09, "total_tokens": 1415936} +{"current_steps": 1480, "total_steps": 11784, "loss": 0.1479, "lr": 1.9960535640924146e-06, "epoch": 0.12559402579769177, "percentage": 12.56, "elapsed_time": "0:03:45", "remaining_time": "0:26:08", "throughput": 6310.02, "total_tokens": 1421248} +{"current_steps": 1485, "total_steps": 11784, "loss": 0.0392, "lr": 1.995921010244736e-06, "epoch": 0.1260183299389002, "percentage": 12.6, "elapsed_time": "0:03:45", "remaining_time": "0:26:04", "throughput": 6320.77, "total_tokens": 1425728} +{"current_steps": 1490, "total_steps": 11784, "loss": 0.0857, "lr": 1.9957862714384025e-06, "epoch": 0.1264426340801086, "percentage": 12.64, "elapsed_time": "0:03:45", "remaining_time": "0:26:00", "throughput": 6335.25, "total_tokens": 1431296} +{"current_steps": 1495, "total_steps": 11784, "loss": 0.0819, "lr": 1.9956493479690188e-06, "epoch": 0.12686693822131703, "percentage": 12.69, "elapsed_time": "0:03:46", "remaining_time": "0:25:57", "throughput": 6347.37, "total_tokens": 1436160} +{"current_steps": 1500, "total_steps": 11784, "loss": 0.1003, "lr": 1.9955102401369814e-06, "epoch": 0.12729124236252545, "percentage": 12.73, "elapsed_time": "0:03:46", "remaining_time": "0:25:53", "throughput": 6359.24, "total_tokens": 1440960} +{"current_steps": 1505, "total_steps": 11784, "loss": 0.0611, "lr": 1.9953689482474806e-06, "epoch": 0.12771554650373387, "percentage": 12.77, "elapsed_time": "0:03:46", "remaining_time": "0:25:49", "throughput": 6371.05, "total_tokens": 1445760} +{"current_steps": 1510, "total_steps": 11784, "loss": 0.0144, "lr": 1.995225472610498e-06, "epoch": 0.1281398506449423, "percentage": 12.81, "elapsed_time": "0:03:47", "remaining_time": "0:25:46", "throughput": 6383.33, "total_tokens": 1450688} +{"current_steps": 1515, "total_steps": 11784, "loss": 0.1675, "lr": 1.9950798135408057e-06, "epoch": 0.12856415478615071, "percentage": 12.86, "elapsed_time": "0:03:47", "remaining_time": "0:25:42", "throughput": 6395.36, "total_tokens": 1455552} +{"current_steps": 1520, "total_steps": 11784, "loss": 0.0808, "lr": 1.994931971357966e-06, "epoch": 0.12898845892735913, "percentage": 12.9, "elapsed_time": "0:03:47", "remaining_time": "0:25:39", "throughput": 6407.35, "total_tokens": 1460416} +{"current_steps": 1525, "total_steps": 11784, "loss": 0.0717, "lr": 1.9947819463863316e-06, "epoch": 0.12941276306856755, "percentage": 12.94, "elapsed_time": "0:03:48", "remaining_time": "0:25:35", "throughput": 6423.43, "total_tokens": 1466432} +{"current_steps": 1530, "total_steps": 11784, "loss": 0.0989, "lr": 1.9946297389550432e-06, "epoch": 0.12983706720977597, "percentage": 12.98, "elapsed_time": "0:03:48", "remaining_time": "0:25:32", "throughput": 6435.15, "total_tokens": 1471232} +{"current_steps": 1535, "total_steps": 11784, "loss": 0.0587, "lr": 1.9944753493980292e-06, "epoch": 0.1302613713509844, "percentage": 13.03, "elapsed_time": "0:03:48", "remaining_time": "0:25:28", "throughput": 6447.27, "total_tokens": 1476160} +{"current_steps": 1540, "total_steps": 11784, "loss": 0.0755, "lr": 1.9943187780540062e-06, "epoch": 0.13068567549219282, "percentage": 13.07, "elapsed_time": "0:03:49", "remaining_time": "0:25:25", "throughput": 6459.49, "total_tokens": 1481152} +{"current_steps": 1545, "total_steps": 11784, "loss": 0.1668, "lr": 1.994160025266478e-06, "epoch": 0.13110997963340124, "percentage": 13.11, "elapsed_time": "0:03:49", "remaining_time": "0:25:21", "throughput": 6472.42, "total_tokens": 1486336} +{"current_steps": 1550, "total_steps": 11784, "loss": 0.0588, "lr": 1.9939990913837327e-06, "epoch": 0.13153428377460963, "percentage": 13.15, "elapsed_time": "0:03:49", "remaining_time": "0:25:18", "throughput": 6484.4, "total_tokens": 1491264} +{"current_steps": 1555, "total_steps": 11784, "loss": 0.0672, "lr": 1.993835976758845e-06, "epoch": 0.13195858791581805, "percentage": 13.2, "elapsed_time": "0:03:50", "remaining_time": "0:25:14", "throughput": 6494.34, "total_tokens": 1495680} +{"current_steps": 1560, "total_steps": 11784, "loss": 0.1687, "lr": 1.993670681749673e-06, "epoch": 0.13238289205702647, "percentage": 13.24, "elapsed_time": "0:03:50", "remaining_time": "0:25:11", "throughput": 6509.18, "total_tokens": 1501376} +{"current_steps": 1565, "total_steps": 11784, "loss": 0.1089, "lr": 1.9935032067188587e-06, "epoch": 0.1328071961982349, "percentage": 13.28, "elapsed_time": "0:03:50", "remaining_time": "0:25:08", "throughput": 6520.65, "total_tokens": 1506176} +{"current_steps": 1570, "total_steps": 11784, "loss": 0.0749, "lr": 1.993333552033827e-06, "epoch": 0.1332315003394433, "percentage": 13.32, "elapsed_time": "0:03:51", "remaining_time": "0:25:05", "throughput": 6535.15, "total_tokens": 1511808} +{"current_steps": 1575, "total_steps": 11784, "loss": 0.0406, "lr": 1.9931617180667844e-06, "epoch": 0.13365580448065173, "percentage": 13.37, "elapsed_time": "0:03:51", "remaining_time": "0:25:01", "throughput": 6546.4, "total_tokens": 1516608} +{"current_steps": 1580, "total_steps": 11784, "loss": 0.0988, "lr": 1.992987705194719e-06, "epoch": 0.13408010862186015, "percentage": 13.41, "elapsed_time": "0:03:51", "remaining_time": "0:24:58", "throughput": 6557.28, "total_tokens": 1521280} +{"current_steps": 1585, "total_steps": 11784, "loss": 0.0683, "lr": 1.9928115137993983e-06, "epoch": 0.13450441276306857, "percentage": 13.45, "elapsed_time": "0:03:52", "remaining_time": "0:24:54", "throughput": 6568.55, "total_tokens": 1526080} +{"current_steps": 1590, "total_steps": 11784, "loss": 0.0429, "lr": 1.9926331442673703e-06, "epoch": 0.134928716904277, "percentage": 13.49, "elapsed_time": "0:03:52", "remaining_time": "0:24:51", "throughput": 6580.05, "total_tokens": 1530944} +{"current_steps": 1595, "total_steps": 11784, "loss": 0.1098, "lr": 1.992452596989962e-06, "epoch": 0.1353530210454854, "percentage": 13.54, "elapsed_time": "0:03:53", "remaining_time": "0:24:48", "throughput": 6593.12, "total_tokens": 1536256} +{"current_steps": 1600, "total_steps": 11784, "loss": 0.0842, "lr": 1.9922698723632763e-06, "epoch": 0.13577732518669383, "percentage": 13.58, "elapsed_time": "0:03:53", "remaining_time": "0:24:45", "throughput": 6603.56, "total_tokens": 1540864} +{"current_steps": 1605, "total_steps": 11784, "loss": 0.077, "lr": 1.992084970788195e-06, "epoch": 0.13620162932790225, "percentage": 13.62, "elapsed_time": "0:03:53", "remaining_time": "0:24:42", "throughput": 6613.79, "total_tokens": 1545536} +{"current_steps": 1610, "total_steps": 11784, "loss": 0.1246, "lr": 1.991897892670375e-06, "epoch": 0.13662593346911067, "percentage": 13.66, "elapsed_time": "0:03:54", "remaining_time": "0:24:38", "throughput": 6624.21, "total_tokens": 1550144} +{"current_steps": 1615, "total_steps": 11784, "loss": 0.0509, "lr": 1.9917086384202475e-06, "epoch": 0.13705023761031906, "percentage": 13.71, "elapsed_time": "0:03:54", "remaining_time": "0:24:35", "throughput": 6634.06, "total_tokens": 1554624} +{"current_steps": 1620, "total_steps": 11784, "loss": 0.1169, "lr": 1.9915172084530195e-06, "epoch": 0.13747454175152748, "percentage": 13.75, "elapsed_time": "0:03:54", "remaining_time": "0:24:32", "throughput": 6644.12, "total_tokens": 1559168} +{"current_steps": 1625, "total_steps": 11784, "loss": 0.0867, "lr": 1.9913236031886707e-06, "epoch": 0.1378988458927359, "percentage": 13.79, "elapsed_time": "0:03:55", "remaining_time": "0:24:29", "throughput": 6655.41, "total_tokens": 1564032} +{"current_steps": 1630, "total_steps": 11784, "loss": 0.0813, "lr": 1.9911278230519533e-06, "epoch": 0.13832315003394433, "percentage": 13.83, "elapsed_time": "0:03:55", "remaining_time": "0:24:25", "throughput": 6666.75, "total_tokens": 1568896} +{"current_steps": 1635, "total_steps": 11784, "loss": 0.0779, "lr": 1.9909298684723905e-06, "epoch": 0.13874745417515275, "percentage": 13.87, "elapsed_time": "0:03:55", "remaining_time": "0:24:22", "throughput": 6678.34, "total_tokens": 1573888} +{"current_steps": 1640, "total_steps": 11784, "loss": 0.0649, "lr": 1.9907297398842764e-06, "epoch": 0.13917175831636117, "percentage": 13.92, "elapsed_time": "0:03:55", "remaining_time": "0:24:19", "throughput": 6688.6, "total_tokens": 1578496} +{"current_steps": 1645, "total_steps": 11784, "loss": 0.0418, "lr": 1.9905274377266744e-06, "epoch": 0.1395960624575696, "percentage": 13.96, "elapsed_time": "0:03:56", "remaining_time": "0:24:16", "throughput": 6698.8, "total_tokens": 1583104} +{"current_steps": 1650, "total_steps": 11784, "loss": 0.1031, "lr": 1.9903229624434174e-06, "epoch": 0.140020366598778, "percentage": 14.0, "elapsed_time": "0:03:56", "remaining_time": "0:24:13", "throughput": 6708.7, "total_tokens": 1587648} +{"current_steps": 1655, "total_steps": 11784, "loss": 0.1513, "lr": 1.9901163144831047e-06, "epoch": 0.14044467073998643, "percentage": 14.04, "elapsed_time": "0:03:57", "remaining_time": "0:24:10", "throughput": 6722.24, "total_tokens": 1593216} +{"current_steps": 1660, "total_steps": 11784, "loss": 0.0057, "lr": 1.989907494299103e-06, "epoch": 0.14086897488119485, "percentage": 14.09, "elapsed_time": "0:03:57", "remaining_time": "0:24:07", "throughput": 6733.76, "total_tokens": 1598208} +{"current_steps": 1665, "total_steps": 11784, "loss": 0.0057, "lr": 1.989696502349545e-06, "epoch": 0.14129327902240327, "percentage": 14.13, "elapsed_time": "0:03:57", "remaining_time": "0:24:04", "throughput": 6743.43, "total_tokens": 1602688} +{"current_steps": 1670, "total_steps": 11784, "loss": 0.1691, "lr": 1.9894833390973266e-06, "epoch": 0.1417175831636117, "percentage": 14.17, "elapsed_time": "0:03:57", "remaining_time": "0:24:01", "throughput": 6751.57, "total_tokens": 1606784} +{"current_steps": 1675, "total_steps": 11784, "loss": 0.1757, "lr": 1.9892680050101085e-06, "epoch": 0.1421418873048201, "percentage": 14.21, "elapsed_time": "0:03:58", "remaining_time": "0:23:58", "throughput": 6762.33, "total_tokens": 1611584} +{"current_steps": 1680, "total_steps": 11784, "loss": 0.094, "lr": 1.9890505005603146e-06, "epoch": 0.1425661914460285, "percentage": 14.26, "elapsed_time": "0:03:58", "remaining_time": "0:23:55", "throughput": 6773.74, "total_tokens": 1616576} +{"current_steps": 1685, "total_steps": 11784, "loss": 0.0994, "lr": 1.9888308262251284e-06, "epoch": 0.14299049558723692, "percentage": 14.3, "elapsed_time": "0:03:58", "remaining_time": "0:23:52", "throughput": 6784.63, "total_tokens": 1621440} +{"current_steps": 1690, "total_steps": 11784, "loss": 0.071, "lr": 1.9886089824864956e-06, "epoch": 0.14341479972844534, "percentage": 14.34, "elapsed_time": "0:03:59", "remaining_time": "0:23:49", "throughput": 6795.72, "total_tokens": 1626368} +{"current_steps": 1695, "total_steps": 11784, "loss": 0.0566, "lr": 1.9883849698311213e-06, "epoch": 0.14383910386965376, "percentage": 14.38, "elapsed_time": "0:03:59", "remaining_time": "0:23:46", "throughput": 6804.77, "total_tokens": 1630784} +{"current_steps": 1700, "total_steps": 11784, "loss": 0.0815, "lr": 1.988158788750468e-06, "epoch": 0.14426340801086218, "percentage": 14.43, "elapsed_time": "0:03:59", "remaining_time": "0:23:43", "throughput": 6815.96, "total_tokens": 1635776} +{"current_steps": 1705, "total_steps": 11784, "loss": 0.0967, "lr": 1.9879304397407566e-06, "epoch": 0.1446877121520706, "percentage": 14.47, "elapsed_time": "0:04:00", "remaining_time": "0:23:40", "throughput": 6826.13, "total_tokens": 1640448} +{"current_steps": 1710, "total_steps": 11784, "loss": 0.0206, "lr": 1.987699923302963e-06, "epoch": 0.14511201629327902, "percentage": 14.51, "elapsed_time": "0:04:00", "remaining_time": "0:23:37", "throughput": 6837.14, "total_tokens": 1645440} +{"current_steps": 1715, "total_steps": 11784, "loss": 0.0811, "lr": 1.9874672399428195e-06, "epoch": 0.14553632043448744, "percentage": 14.55, "elapsed_time": "0:04:00", "remaining_time": "0:23:34", "throughput": 6846.67, "total_tokens": 1649984} +{"current_steps": 1720, "total_steps": 11784, "loss": 0.1235, "lr": 1.9872323901708116e-06, "epoch": 0.14596062457569586, "percentage": 14.6, "elapsed_time": "0:04:01", "remaining_time": "0:23:32", "throughput": 6856.83, "total_tokens": 1654720} +{"current_steps": 1725, "total_steps": 11784, "loss": 0.1061, "lr": 1.9869953745021785e-06, "epoch": 0.14638492871690428, "percentage": 14.64, "elapsed_time": "0:04:01", "remaining_time": "0:23:29", "throughput": 6867.69, "total_tokens": 1659648} +{"current_steps": 1730, "total_steps": 11784, "loss": 0.0567, "lr": 1.9867561934569103e-06, "epoch": 0.1468092328581127, "percentage": 14.68, "elapsed_time": "0:04:02", "remaining_time": "0:23:26", "throughput": 6879.57, "total_tokens": 1664896} +{"current_steps": 1735, "total_steps": 11784, "loss": 0.0482, "lr": 1.9865148475597475e-06, "epoch": 0.14723353699932112, "percentage": 14.72, "elapsed_time": "0:04:02", "remaining_time": "0:23:23", "throughput": 6889.46, "total_tokens": 1669568} +{"current_steps": 1740, "total_steps": 11784, "loss": 0.0588, "lr": 1.986271337340182e-06, "epoch": 0.14765784114052954, "percentage": 14.77, "elapsed_time": "0:04:02", "remaining_time": "0:23:20", "throughput": 6899.95, "total_tokens": 1674432} +{"current_steps": 1745, "total_steps": 11784, "loss": 0.0265, "lr": 1.9860256633324513e-06, "epoch": 0.14808214528173794, "percentage": 14.81, "elapsed_time": "0:04:02", "remaining_time": "0:23:17", "throughput": 6908.34, "total_tokens": 1678720} +{"current_steps": 1750, "total_steps": 11784, "loss": 0.0692, "lr": 1.9857778260755426e-06, "epoch": 0.14850644942294636, "percentage": 14.85, "elapsed_time": "0:04:03", "remaining_time": "0:23:15", "throughput": 6919.77, "total_tokens": 1683904} +{"current_steps": 1755, "total_steps": 11784, "loss": 0.0717, "lr": 1.9855278261131876e-06, "epoch": 0.14893075356415478, "percentage": 14.89, "elapsed_time": "0:04:03", "remaining_time": "0:23:12", "throughput": 6931.03, "total_tokens": 1689024} +{"current_steps": 1760, "total_steps": 11784, "loss": 0.0851, "lr": 1.985275663993863e-06, "epoch": 0.1493550577053632, "percentage": 14.94, "elapsed_time": "0:04:04", "remaining_time": "0:23:09", "throughput": 6940.51, "total_tokens": 1693632} +{"current_steps": 1765, "total_steps": 11784, "loss": 0.0532, "lr": 1.9850213402707888e-06, "epoch": 0.14977936184657162, "percentage": 14.98, "elapsed_time": "0:04:04", "remaining_time": "0:23:07", "throughput": 6949.95, "total_tokens": 1698304} +{"current_steps": 1770, "total_steps": 11784, "loss": 0.1201, "lr": 1.9847648555019286e-06, "epoch": 0.15020366598778004, "percentage": 15.02, "elapsed_time": "0:04:04", "remaining_time": "0:23:04", "throughput": 6962.22, "total_tokens": 1703808} +{"current_steps": 1770, "total_steps": 11784, "eval_loss": 0.0835869163274765, "epoch": 0.15020366598778004, "percentage": 15.02, "elapsed_time": "0:04:20", "remaining_time": "0:24:35", "throughput": 6533.39, "total_tokens": 1703808} +{"current_steps": 1775, "total_steps": 11784, "loss": 0.0634, "lr": 1.9845062102499858e-06, "epoch": 0.15062797012898846, "percentage": 15.06, "elapsed_time": "0:04:47", "remaining_time": "0:27:00", "throughput": 5946.61, "total_tokens": 1708992} +{"current_steps": 1780, "total_steps": 11784, "loss": 0.0769, "lr": 1.9842454050824043e-06, "epoch": 0.15105227427019688, "percentage": 15.11, "elapsed_time": "0:04:47", "remaining_time": "0:26:57", "throughput": 5955.87, "total_tokens": 1713600} +{"current_steps": 1785, "total_steps": 11784, "loss": 0.0963, "lr": 1.9839824405713663e-06, "epoch": 0.1514765784114053, "percentage": 15.15, "elapsed_time": "0:04:48", "remaining_time": "0:26:53", "throughput": 5965.1, "total_tokens": 1718208} +{"current_steps": 1790, "total_steps": 11784, "loss": 0.1394, "lr": 1.983717317293792e-06, "epoch": 0.15190088255261372, "percentage": 15.19, "elapsed_time": "0:04:48", "remaining_time": "0:26:50", "throughput": 5973.51, "total_tokens": 1722560} +{"current_steps": 1795, "total_steps": 11784, "loss": 0.0263, "lr": 1.983450035831337e-06, "epoch": 0.15232518669382214, "percentage": 15.23, "elapsed_time": "0:04:48", "remaining_time": "0:26:46", "throughput": 5983.09, "total_tokens": 1727296} +{"current_steps": 1800, "total_steps": 11784, "loss": 0.0809, "lr": 1.983180596770392e-06, "epoch": 0.15274949083503056, "percentage": 15.27, "elapsed_time": "0:04:49", "remaining_time": "0:26:43", "throughput": 5994.36, "total_tokens": 1732608} +{"current_steps": 1805, "total_steps": 11784, "loss": 0.0569, "lr": 1.982909000702082e-06, "epoch": 0.15317379497623898, "percentage": 15.32, "elapsed_time": "0:04:49", "remaining_time": "0:26:39", "throughput": 6003.66, "total_tokens": 1737280} +{"current_steps": 1810, "total_steps": 11784, "loss": 0.0948, "lr": 1.982635248222264e-06, "epoch": 0.15359809911744737, "percentage": 15.36, "elapsed_time": "0:04:49", "remaining_time": "0:26:36", "throughput": 6011.41, "total_tokens": 1741440} +{"current_steps": 1815, "total_steps": 11784, "loss": 0.0485, "lr": 1.982359339931524e-06, "epoch": 0.1540224032586558, "percentage": 15.4, "elapsed_time": "0:04:50", "remaining_time": "0:26:32", "throughput": 6020.94, "total_tokens": 1746176} +{"current_steps": 1820, "total_steps": 11784, "loss": 0.1267, "lr": 1.9820812764351804e-06, "epoch": 0.1544467073998642, "percentage": 15.44, "elapsed_time": "0:04:50", "remaining_time": "0:26:29", "throughput": 6032.65, "total_tokens": 1751680} +{"current_steps": 1825, "total_steps": 11784, "loss": 0.0524, "lr": 1.981801058343279e-06, "epoch": 0.15487101154107263, "percentage": 15.49, "elapsed_time": "0:04:50", "remaining_time": "0:26:26", "throughput": 6041.99, "total_tokens": 1756416} +{"current_steps": 1830, "total_steps": 11784, "loss": 0.0919, "lr": 1.981518686270592e-06, "epoch": 0.15529531568228105, "percentage": 15.53, "elapsed_time": "0:04:51", "remaining_time": "0:26:23", "throughput": 6050.78, "total_tokens": 1760960} +{"current_steps": 1835, "total_steps": 11784, "loss": 0.0884, "lr": 1.9812341608366183e-06, "epoch": 0.15571961982348947, "percentage": 15.57, "elapsed_time": "0:04:51", "remaining_time": "0:26:19", "throughput": 6061.69, "total_tokens": 1766208} +{"current_steps": 1840, "total_steps": 11784, "loss": 0.0528, "lr": 1.980947482665579e-06, "epoch": 0.1561439239646979, "percentage": 15.61, "elapsed_time": "0:04:51", "remaining_time": "0:26:16", "throughput": 6072.01, "total_tokens": 1771264} +{"current_steps": 1845, "total_steps": 11784, "loss": 0.15, "lr": 1.980658652386421e-06, "epoch": 0.15656822810590632, "percentage": 15.66, "elapsed_time": "0:04:52", "remaining_time": "0:26:13", "throughput": 6081.92, "total_tokens": 1776192} +{"current_steps": 1850, "total_steps": 11784, "loss": 0.0842, "lr": 1.9803676706328102e-06, "epoch": 0.15699253224711474, "percentage": 15.7, "elapsed_time": "0:04:52", "remaining_time": "0:26:09", "throughput": 6091.49, "total_tokens": 1780992} +{"current_steps": 1855, "total_steps": 11784, "loss": 0.0473, "lr": 1.980074538043134e-06, "epoch": 0.15741683638832316, "percentage": 15.74, "elapsed_time": "0:04:52", "remaining_time": "0:26:06", "throughput": 6099.81, "total_tokens": 1785408} +{"current_steps": 1860, "total_steps": 11784, "loss": 0.1532, "lr": 1.9797792552604985e-06, "epoch": 0.15784114052953158, "percentage": 15.78, "elapsed_time": "0:04:53", "remaining_time": "0:26:03", "throughput": 6109.25, "total_tokens": 1790208} +{"current_steps": 1865, "total_steps": 11784, "loss": 0.0137, "lr": 1.9794818229327266e-06, "epoch": 0.15826544467074, "percentage": 15.83, "elapsed_time": "0:04:53", "remaining_time": "0:26:00", "throughput": 6119.45, "total_tokens": 1795264} +{"current_steps": 1870, "total_steps": 11784, "loss": 0.0572, "lr": 1.9791822417123576e-06, "epoch": 0.15868974881194842, "percentage": 15.87, "elapsed_time": "0:04:53", "remaining_time": "0:25:57", "throughput": 6128.9, "total_tokens": 1800064} +{"current_steps": 1875, "total_steps": 11784, "loss": 0.05, "lr": 1.9788805122566445e-06, "epoch": 0.1591140529531568, "percentage": 15.91, "elapsed_time": "0:04:54", "remaining_time": "0:25:53", "throughput": 6137.75, "total_tokens": 1804672} +{"current_steps": 1880, "total_steps": 11784, "loss": 0.1075, "lr": 1.9785766352275538e-06, "epoch": 0.15953835709436523, "percentage": 15.95, "elapsed_time": "0:04:54", "remaining_time": "0:25:50", "throughput": 6146.9, "total_tokens": 1809408} +{"current_steps": 1885, "total_steps": 11784, "loss": 0.1561, "lr": 1.9782706112917643e-06, "epoch": 0.15996266123557365, "percentage": 16.0, "elapsed_time": "0:04:54", "remaining_time": "0:25:47", "throughput": 6155.06, "total_tokens": 1813824} +{"current_steps": 1890, "total_steps": 11784, "loss": 0.0392, "lr": 1.977962441120664e-06, "epoch": 0.16038696537678207, "percentage": 16.04, "elapsed_time": "0:04:55", "remaining_time": "0:25:44", "throughput": 6162.99, "total_tokens": 1818176} +{"current_steps": 1895, "total_steps": 11784, "loss": 0.0622, "lr": 1.9776521253903492e-06, "epoch": 0.1608112695179905, "percentage": 16.08, "elapsed_time": "0:04:55", "remaining_time": "0:25:41", "throughput": 6171.76, "total_tokens": 1822784} +{"current_steps": 1900, "total_steps": 11784, "loss": 0.1414, "lr": 1.9773396647816246e-06, "epoch": 0.1612355736591989, "percentage": 16.12, "elapsed_time": "0:04:55", "remaining_time": "0:25:38", "throughput": 6180.91, "total_tokens": 1827520} +{"current_steps": 1905, "total_steps": 11784, "loss": 0.0911, "lr": 1.97702505998e-06, "epoch": 0.16165987780040733, "percentage": 16.17, "elapsed_time": "0:04:55", "remaining_time": "0:25:35", "throughput": 6190.07, "total_tokens": 1832256} +{"current_steps": 1910, "total_steps": 11784, "loss": 0.0821, "lr": 1.976708311675688e-06, "epoch": 0.16208418194161575, "percentage": 16.21, "elapsed_time": "0:04:56", "remaining_time": "0:25:31", "throughput": 6198.79, "total_tokens": 1836864} +{"current_steps": 1915, "total_steps": 11784, "loss": 0.0317, "lr": 1.976389420563607e-06, "epoch": 0.16250848608282417, "percentage": 16.25, "elapsed_time": "0:04:56", "remaining_time": "0:25:28", "throughput": 6206.91, "total_tokens": 1841280} +{"current_steps": 1920, "total_steps": 11784, "loss": 0.0848, "lr": 1.9760683873433734e-06, "epoch": 0.1629327902240326, "percentage": 16.29, "elapsed_time": "0:04:56", "remaining_time": "0:25:25", "throughput": 6216.16, "total_tokens": 1846080} +{"current_steps": 1925, "total_steps": 11784, "loss": 0.0373, "lr": 1.9757452127193043e-06, "epoch": 0.163357094365241, "percentage": 16.34, "elapsed_time": "0:04:57", "remaining_time": "0:25:22", "throughput": 6225.27, "total_tokens": 1850816} +{"current_steps": 1930, "total_steps": 11784, "loss": 0.0922, "lr": 1.9754198974004156e-06, "epoch": 0.16378139850644943, "percentage": 16.38, "elapsed_time": "0:04:57", "remaining_time": "0:25:19", "throughput": 6233.27, "total_tokens": 1855232} +{"current_steps": 1935, "total_steps": 11784, "loss": 0.0689, "lr": 1.975092442100419e-06, "epoch": 0.16420570264765785, "percentage": 16.42, "elapsed_time": "0:04:57", "remaining_time": "0:25:16", "throughput": 6242.81, "total_tokens": 1860160} +{"current_steps": 1940, "total_steps": 11784, "loss": 0.0229, "lr": 1.9747628475377204e-06, "epoch": 0.16463000678886625, "percentage": 16.46, "elapsed_time": "0:04:58", "remaining_time": "0:25:13", "throughput": 6252.16, "total_tokens": 1865024} +{"current_steps": 1945, "total_steps": 11784, "loss": 0.0846, "lr": 1.9744311144354208e-06, "epoch": 0.16505431093007467, "percentage": 16.51, "elapsed_time": "0:04:58", "remaining_time": "0:25:10", "throughput": 6261.47, "total_tokens": 1869888} +{"current_steps": 1950, "total_steps": 11784, "loss": 0.1164, "lr": 1.9740972435213112e-06, "epoch": 0.16547861507128309, "percentage": 16.55, "elapsed_time": "0:04:58", "remaining_time": "0:25:07", "throughput": 6270.33, "total_tokens": 1874624} +{"current_steps": 1955, "total_steps": 11784, "loss": 0.066, "lr": 1.973761235527874e-06, "epoch": 0.1659029192124915, "percentage": 16.59, "elapsed_time": "0:04:59", "remaining_time": "0:25:04", "throughput": 6278.66, "total_tokens": 1879168} +{"current_steps": 1960, "total_steps": 11784, "loss": 0.1811, "lr": 1.9734230911922795e-06, "epoch": 0.16632722335369993, "percentage": 16.63, "elapsed_time": "0:04:59", "remaining_time": "0:25:01", "throughput": 6288.09, "total_tokens": 1884096} +{"current_steps": 1965, "total_steps": 11784, "loss": 0.0921, "lr": 1.9730828112563852e-06, "epoch": 0.16675152749490835, "percentage": 16.68, "elapsed_time": "0:04:59", "remaining_time": "0:24:58", "throughput": 6296.91, "total_tokens": 1888832} +{"current_steps": 1970, "total_steps": 11784, "loss": 0.0428, "lr": 1.972740396466734e-06, "epoch": 0.16717583163611677, "percentage": 16.72, "elapsed_time": "0:05:00", "remaining_time": "0:24:55", "throughput": 6306.11, "total_tokens": 1893696} +{"current_steps": 1975, "total_steps": 11784, "loss": 0.0128, "lr": 1.972395847574552e-06, "epoch": 0.1676001357773252, "percentage": 16.76, "elapsed_time": "0:05:00", "remaining_time": "0:24:53", "throughput": 6314.11, "total_tokens": 1898176} +{"current_steps": 1980, "total_steps": 11784, "loss": 0.092, "lr": 1.972049165335747e-06, "epoch": 0.1680244399185336, "percentage": 16.8, "elapsed_time": "0:05:00", "remaining_time": "0:24:50", "throughput": 6322.38, "total_tokens": 1902720} +{"current_steps": 1985, "total_steps": 11784, "loss": 0.0494, "lr": 1.9717003505109094e-06, "epoch": 0.16844874405974203, "percentage": 16.84, "elapsed_time": "0:05:01", "remaining_time": "0:24:47", "throughput": 6331.3, "total_tokens": 1907520} +{"current_steps": 1990, "total_steps": 11784, "loss": 0.0955, "lr": 1.9713494038653054e-06, "epoch": 0.16887304820095045, "percentage": 16.89, "elapsed_time": "0:05:01", "remaining_time": "0:24:44", "throughput": 6339.31, "total_tokens": 1912000} +{"current_steps": 1995, "total_steps": 11784, "loss": 0.0437, "lr": 1.97099632616888e-06, "epoch": 0.16929735234215887, "percentage": 16.93, "elapsed_time": "0:05:01", "remaining_time": "0:24:41", "throughput": 6346.5, "total_tokens": 1916224} +{"current_steps": 2000, "total_steps": 11784, "loss": 0.0532, "lr": 1.9706411181962534e-06, "epoch": 0.1697216564833673, "percentage": 16.97, "elapsed_time": "0:05:02", "remaining_time": "0:24:38", "throughput": 6355.04, "total_tokens": 1920896} +{"current_steps": 2005, "total_steps": 11784, "loss": 0.0502, "lr": 1.970283780726718e-06, "epoch": 0.1701459606245757, "percentage": 17.01, "elapsed_time": "0:05:02", "remaining_time": "0:24:35", "throughput": 6362.85, "total_tokens": 1925312} +{"current_steps": 2010, "total_steps": 11784, "loss": 0.095, "lr": 1.9699243145442397e-06, "epoch": 0.1705702647657841, "percentage": 17.06, "elapsed_time": "0:05:02", "remaining_time": "0:24:32", "throughput": 6371.16, "total_tokens": 1929920} +{"current_steps": 2015, "total_steps": 11784, "loss": 0.0817, "lr": 1.9695627204374544e-06, "epoch": 0.17099456890699252, "percentage": 17.1, "elapsed_time": "0:05:03", "remaining_time": "0:24:30", "throughput": 6379.98, "total_tokens": 1934720} +{"current_steps": 2020, "total_steps": 11784, "loss": 0.0332, "lr": 1.969198999199666e-06, "epoch": 0.17141887304820094, "percentage": 17.14, "elapsed_time": "0:05:03", "remaining_time": "0:24:27", "throughput": 6389.09, "total_tokens": 1939584} +{"current_steps": 2025, "total_steps": 11784, "loss": 0.125, "lr": 1.968833151628845e-06, "epoch": 0.17184317718940936, "percentage": 17.18, "elapsed_time": "0:05:03", "remaining_time": "0:24:24", "throughput": 6398.43, "total_tokens": 1944576} +{"current_steps": 2030, "total_steps": 11784, "loss": 0.1032, "lr": 1.968465178527628e-06, "epoch": 0.17226748133061778, "percentage": 17.23, "elapsed_time": "0:05:04", "remaining_time": "0:24:21", "throughput": 6405.93, "total_tokens": 1948928} +{"current_steps": 2035, "total_steps": 11784, "loss": 0.0727, "lr": 1.9680950807033124e-06, "epoch": 0.1726917854718262, "percentage": 17.27, "elapsed_time": "0:05:04", "remaining_time": "0:24:19", "throughput": 6414.32, "total_tokens": 1953600} +{"current_steps": 2040, "total_steps": 11784, "loss": 0.0892, "lr": 1.96772285896786e-06, "epoch": 0.17311608961303462, "percentage": 17.31, "elapsed_time": "0:05:04", "remaining_time": "0:24:16", "throughput": 6423.66, "total_tokens": 1958592} +{"current_steps": 2045, "total_steps": 11784, "loss": 0.042, "lr": 1.9673485141378904e-06, "epoch": 0.17354039375424304, "percentage": 17.35, "elapsed_time": "0:05:05", "remaining_time": "0:24:13", "throughput": 6430.56, "total_tokens": 1962752} +{"current_steps": 2050, "total_steps": 11784, "loss": 0.1337, "lr": 1.9669720470346817e-06, "epoch": 0.17396469789545146, "percentage": 17.4, "elapsed_time": "0:05:05", "remaining_time": "0:24:10", "throughput": 6438.92, "total_tokens": 1967424} +{"current_steps": 2055, "total_steps": 11784, "loss": 0.0393, "lr": 1.966593458484168e-06, "epoch": 0.17438900203665988, "percentage": 17.44, "elapsed_time": "0:05:05", "remaining_time": "0:24:08", "throughput": 6449.11, "total_tokens": 1972736} +{"current_steps": 2060, "total_steps": 11784, "loss": 0.0351, "lr": 1.9662127493169367e-06, "epoch": 0.1748133061778683, "percentage": 17.48, "elapsed_time": "0:05:06", "remaining_time": "0:24:05", "throughput": 6457.37, "total_tokens": 1977408} +{"current_steps": 2065, "total_steps": 11784, "loss": 0.044, "lr": 1.96582992036823e-06, "epoch": 0.17523761031907673, "percentage": 17.52, "elapsed_time": "0:05:06", "remaining_time": "0:24:02", "throughput": 6465.5, "total_tokens": 1982016} +{"current_steps": 2070, "total_steps": 11784, "loss": 0.114, "lr": 1.9654449724779387e-06, "epoch": 0.17566191446028515, "percentage": 17.57, "elapsed_time": "0:05:06", "remaining_time": "0:24:00", "throughput": 6475.71, "total_tokens": 1987392} +{"current_steps": 2075, "total_steps": 11784, "loss": 0.0802, "lr": 1.965057906490602e-06, "epoch": 0.17608621860149354, "percentage": 17.61, "elapsed_time": "0:05:07", "remaining_time": "0:23:57", "throughput": 6483.88, "total_tokens": 1992064} +{"current_steps": 2080, "total_steps": 11784, "loss": 0.0644, "lr": 1.964668723255408e-06, "epoch": 0.17651052274270196, "percentage": 17.65, "elapsed_time": "0:05:07", "remaining_time": "0:23:54", "throughput": 6493.15, "total_tokens": 1997120} +{"current_steps": 2085, "total_steps": 11784, "loss": 0.0501, "lr": 1.964277423626188e-06, "epoch": 0.17693482688391038, "percentage": 17.69, "elapsed_time": "0:05:07", "remaining_time": "0:23:52", "throughput": 6501.07, "total_tokens": 2001664} +{"current_steps": 2090, "total_steps": 11784, "loss": 0.0941, "lr": 1.9638840084614178e-06, "epoch": 0.1773591310251188, "percentage": 17.74, "elapsed_time": "0:05:08", "remaining_time": "0:23:49", "throughput": 6509.3, "total_tokens": 2006336} +{"current_steps": 2095, "total_steps": 11784, "loss": 0.0483, "lr": 1.963488478624214e-06, "epoch": 0.17778343516632722, "percentage": 17.78, "elapsed_time": "0:05:08", "remaining_time": "0:23:47", "throughput": 6518.16, "total_tokens": 2011264} +{"current_steps": 2100, "total_steps": 11784, "loss": 0.0896, "lr": 1.9630908349823315e-06, "epoch": 0.17820773930753564, "percentage": 17.82, "elapsed_time": "0:05:08", "remaining_time": "0:23:44", "throughput": 6525.64, "total_tokens": 2015680} +{"current_steps": 2105, "total_steps": 11784, "loss": 0.1457, "lr": 1.9626910784081647e-06, "epoch": 0.17863204344874406, "percentage": 17.86, "elapsed_time": "0:05:09", "remaining_time": "0:23:41", "throughput": 6533.75, "total_tokens": 2020352} +{"current_steps": 2110, "total_steps": 11784, "loss": 0.0603, "lr": 1.9622892097787426e-06, "epoch": 0.17905634758995248, "percentage": 17.91, "elapsed_time": "0:05:09", "remaining_time": "0:23:39", "throughput": 6541.31, "total_tokens": 2024832} +{"current_steps": 2115, "total_steps": 11784, "loss": 0.0261, "lr": 1.961885229975727e-06, "epoch": 0.1794806517311609, "percentage": 17.95, "elapsed_time": "0:05:09", "remaining_time": "0:23:36", "throughput": 6548.54, "total_tokens": 2029184} +{"current_steps": 2120, "total_steps": 11784, "loss": 0.0787, "lr": 1.9614791398854133e-06, "epoch": 0.17990495587236932, "percentage": 17.99, "elapsed_time": "0:05:10", "remaining_time": "0:23:34", "throughput": 6557.18, "total_tokens": 2034048} +{"current_steps": 2125, "total_steps": 11784, "loss": 0.0453, "lr": 1.9610709403987244e-06, "epoch": 0.18032926001357774, "percentage": 18.03, "elapsed_time": "0:05:10", "remaining_time": "0:23:31", "throughput": 6566.63, "total_tokens": 2039232} +{"current_steps": 2130, "total_steps": 11784, "loss": 0.0774, "lr": 1.9606606324112134e-06, "epoch": 0.18075356415478616, "percentage": 18.08, "elapsed_time": "0:05:10", "remaining_time": "0:23:28", "throughput": 6574.21, "total_tokens": 2043712} +{"current_steps": 2135, "total_steps": 11784, "loss": 0.1347, "lr": 1.9602482168230576e-06, "epoch": 0.18117786829599458, "percentage": 18.12, "elapsed_time": "0:05:11", "remaining_time": "0:23:26", "throughput": 6582.78, "total_tokens": 2048576} +{"current_steps": 2140, "total_steps": 11784, "loss": 0.071, "lr": 1.9598336945390584e-06, "epoch": 0.18160217243720297, "percentage": 18.16, "elapsed_time": "0:05:11", "remaining_time": "0:23:23", "throughput": 6591.36, "total_tokens": 2053440} +{"current_steps": 2145, "total_steps": 11784, "loss": 0.0639, "lr": 1.95941706646864e-06, "epoch": 0.1820264765784114, "percentage": 18.2, "elapsed_time": "0:05:11", "remaining_time": "0:23:21", "throughput": 6599.86, "total_tokens": 2058304} +{"current_steps": 2150, "total_steps": 11784, "loss": 0.0581, "lr": 1.9589983335258457e-06, "epoch": 0.18245078071961982, "percentage": 18.25, "elapsed_time": "0:05:12", "remaining_time": "0:23:18", "throughput": 6607.17, "total_tokens": 2062720} +{"current_steps": 2155, "total_steps": 11784, "loss": 0.0886, "lr": 1.9585774966293365e-06, "epoch": 0.18287508486082824, "percentage": 18.29, "elapsed_time": "0:05:12", "remaining_time": "0:23:16", "throughput": 6614.86, "total_tokens": 2067264} +{"current_steps": 2160, "total_steps": 11784, "loss": 0.0804, "lr": 1.95815455670239e-06, "epoch": 0.18329938900203666, "percentage": 18.33, "elapsed_time": "0:05:12", "remaining_time": "0:23:13", "throughput": 6622.35, "total_tokens": 2071744} +{"current_steps": 2165, "total_steps": 11784, "loss": 0.0776, "lr": 1.957729514672897e-06, "epoch": 0.18372369314324508, "percentage": 18.37, "elapsed_time": "0:05:13", "remaining_time": "0:23:11", "throughput": 6630.17, "total_tokens": 2076352} +{"current_steps": 2170, "total_steps": 11784, "loss": 0.0654, "lr": 1.957302371473361e-06, "epoch": 0.1841479972844535, "percentage": 18.41, "elapsed_time": "0:05:13", "remaining_time": "0:23:08", "throughput": 6638.21, "total_tokens": 2081088} +{"current_steps": 2175, "total_steps": 11784, "loss": 0.0651, "lr": 1.9568731280408945e-06, "epoch": 0.18457230142566192, "percentage": 18.46, "elapsed_time": "0:05:13", "remaining_time": "0:23:06", "throughput": 6646.05, "total_tokens": 2085760} +{"current_steps": 2180, "total_steps": 11784, "loss": 0.0981, "lr": 1.956441785317217e-06, "epoch": 0.18499660556687034, "percentage": 18.5, "elapsed_time": "0:05:14", "remaining_time": "0:23:04", "throughput": 6654.36, "total_tokens": 2090624} +{"current_steps": 2185, "total_steps": 11784, "loss": 0.0606, "lr": 1.9560083442486565e-06, "epoch": 0.18542090970807876, "percentage": 18.54, "elapsed_time": "0:05:14", "remaining_time": "0:23:01", "throughput": 6663.94, "total_tokens": 2095936} +{"current_steps": 2190, "total_steps": 11784, "loss": 0.081, "lr": 1.955572805786141e-06, "epoch": 0.18584521384928718, "percentage": 18.58, "elapsed_time": "0:05:14", "remaining_time": "0:22:59", "throughput": 6671.74, "total_tokens": 2100608} +{"current_steps": 2195, "total_steps": 11784, "loss": 0.0897, "lr": 1.9551351708852015e-06, "epoch": 0.1862695179904956, "percentage": 18.63, "elapsed_time": "0:05:15", "remaining_time": "0:22:56", "throughput": 6681.11, "total_tokens": 2105856} +{"current_steps": 2200, "total_steps": 11784, "loss": 0.0448, "lr": 1.9546954405059697e-06, "epoch": 0.18669382213170402, "percentage": 18.67, "elapsed_time": "0:05:15", "remaining_time": "0:22:54", "throughput": 6688.81, "total_tokens": 2110464} +{"current_steps": 2205, "total_steps": 11784, "loss": 0.1086, "lr": 1.954253615613173e-06, "epoch": 0.1871181262729124, "percentage": 18.71, "elapsed_time": "0:05:15", "remaining_time": "0:22:52", "throughput": 6697.97, "total_tokens": 2115648} +{"current_steps": 2210, "total_steps": 11784, "loss": 0.043, "lr": 1.9538096971761343e-06, "epoch": 0.18754243041412083, "percentage": 18.75, "elapsed_time": "0:05:16", "remaining_time": "0:22:49", "throughput": 6705.54, "total_tokens": 2120256} +{"current_steps": 2215, "total_steps": 11784, "loss": 0.1077, "lr": 1.9533636861687696e-06, "epoch": 0.18796673455532925, "percentage": 18.8, "elapsed_time": "0:05:16", "remaining_time": "0:22:47", "throughput": 6712.6, "total_tokens": 2124672} +{"current_steps": 2220, "total_steps": 11784, "loss": 0.0675, "lr": 1.9529155835695855e-06, "epoch": 0.18839103869653767, "percentage": 18.84, "elapsed_time": "0:05:16", "remaining_time": "0:22:45", "throughput": 6720.22, "total_tokens": 2129344} +{"current_steps": 2225, "total_steps": 11784, "loss": 0.0677, "lr": 1.952465390361678e-06, "epoch": 0.1888153428377461, "percentage": 18.88, "elapsed_time": "0:05:17", "remaining_time": "0:22:42", "throughput": 6728.78, "total_tokens": 2134336} +{"current_steps": 2230, "total_steps": 11784, "loss": 0.0558, "lr": 1.95201310753273e-06, "epoch": 0.1892396469789545, "percentage": 18.92, "elapsed_time": "0:05:17", "remaining_time": "0:22:40", "throughput": 6737.66, "total_tokens": 2139456} +{"current_steps": 2235, "total_steps": 11784, "loss": 0.1059, "lr": 1.9515587360750068e-06, "epoch": 0.18966395112016293, "percentage": 18.97, "elapsed_time": "0:05:17", "remaining_time": "0:22:38", "throughput": 6746.69, "total_tokens": 2144640} +{"current_steps": 2240, "total_steps": 11784, "loss": 0.1091, "lr": 1.9511022769853586e-06, "epoch": 0.19008825526137135, "percentage": 19.01, "elapsed_time": "0:05:18", "remaining_time": "0:22:35", "throughput": 6755.62, "total_tokens": 2149760} +{"current_steps": 2245, "total_steps": 11784, "loss": 0.1657, "lr": 1.9506437312652144e-06, "epoch": 0.19051255940257977, "percentage": 19.05, "elapsed_time": "0:05:18", "remaining_time": "0:22:33", "throughput": 6765.3, "total_tokens": 2155200} +{"current_steps": 2250, "total_steps": 11784, "loss": 0.1252, "lr": 1.9501830999205806e-06, "epoch": 0.1909368635437882, "percentage": 19.09, "elapsed_time": "0:05:18", "remaining_time": "0:22:31", "throughput": 6772.91, "total_tokens": 2159872} +{"current_steps": 2255, "total_steps": 11784, "loss": 0.0864, "lr": 1.9497203839620398e-06, "epoch": 0.1913611676849966, "percentage": 19.14, "elapsed_time": "0:05:19", "remaining_time": "0:22:28", "throughput": 6780.58, "total_tokens": 2164544} +{"current_steps": 2260, "total_steps": 11784, "loss": 0.0384, "lr": 1.9492555844047483e-06, "epoch": 0.19178547182620503, "percentage": 19.18, "elapsed_time": "0:05:19", "remaining_time": "0:22:26", "throughput": 6789.9, "total_tokens": 2169856} +{"current_steps": 2265, "total_steps": 11784, "loss": 0.0385, "lr": 1.9487887022684334e-06, "epoch": 0.19220977596741345, "percentage": 19.22, "elapsed_time": "0:05:19", "remaining_time": "0:22:24", "throughput": 6797.11, "total_tokens": 2174400} +{"current_steps": 2270, "total_steps": 11784, "loss": 0.0915, "lr": 1.9483197385773913e-06, "epoch": 0.19263408010862185, "percentage": 19.26, "elapsed_time": "0:05:20", "remaining_time": "0:22:22", "throughput": 6804.98, "total_tokens": 2179200} +{"current_steps": 2275, "total_steps": 11784, "loss": 0.0774, "lr": 1.947848694360485e-06, "epoch": 0.19305838424983027, "percentage": 19.31, "elapsed_time": "0:05:20", "remaining_time": "0:22:19", "throughput": 6814.82, "total_tokens": 2184768} +{"current_steps": 2280, "total_steps": 11784, "loss": 0.0947, "lr": 1.947375570651142e-06, "epoch": 0.1934826883910387, "percentage": 19.35, "elapsed_time": "0:05:20", "remaining_time": "0:22:17", "throughput": 6823.33, "total_tokens": 2189824} +{"current_steps": 2285, "total_steps": 11784, "loss": 0.0549, "lr": 1.9469003684873514e-06, "epoch": 0.1939069925322471, "percentage": 19.39, "elapsed_time": "0:05:21", "remaining_time": "0:22:15", "throughput": 6831.43, "total_tokens": 2194752} +{"current_steps": 2290, "total_steps": 11784, "loss": 0.1116, "lr": 1.946423088911664e-06, "epoch": 0.19433129667345553, "percentage": 19.43, "elapsed_time": "0:05:21", "remaining_time": "0:22:13", "throughput": 6839.25, "total_tokens": 2199552} +{"current_steps": 2295, "total_steps": 11784, "loss": 0.1976, "lr": 1.9459437329711865e-06, "epoch": 0.19475560081466395, "percentage": 19.48, "elapsed_time": "0:05:21", "remaining_time": "0:22:11", "throughput": 6846.84, "total_tokens": 2204288} +{"current_steps": 2300, "total_steps": 11784, "loss": 0.0683, "lr": 1.945462301717581e-06, "epoch": 0.19517990495587237, "percentage": 19.52, "elapsed_time": "0:05:22", "remaining_time": "0:22:08", "throughput": 6855.25, "total_tokens": 2209344} +{"current_steps": 2305, "total_steps": 11784, "loss": 0.0837, "lr": 1.944978796207064e-06, "epoch": 0.1956042090970808, "percentage": 19.56, "elapsed_time": "0:05:22", "remaining_time": "0:22:06", "throughput": 6863.19, "total_tokens": 2214208} +{"current_steps": 2310, "total_steps": 11784, "loss": 0.0974, "lr": 1.9444932175004017e-06, "epoch": 0.1960285132382892, "percentage": 19.6, "elapsed_time": "0:05:22", "remaining_time": "0:22:04", "throughput": 6869.95, "total_tokens": 2218624} +{"current_steps": 2315, "total_steps": 11784, "loss": 0.0717, "lr": 1.9440055666629087e-06, "epoch": 0.19645281737949763, "percentage": 19.65, "elapsed_time": "0:05:23", "remaining_time": "0:22:02", "throughput": 6878.83, "total_tokens": 2223872} +{"current_steps": 2320, "total_steps": 11784, "loss": 0.0729, "lr": 1.943515844764446e-06, "epoch": 0.19687712152070605, "percentage": 19.69, "elapsed_time": "0:05:23", "remaining_time": "0:22:00", "throughput": 6884.98, "total_tokens": 2228096} +{"current_steps": 2325, "total_steps": 11784, "loss": 0.1151, "lr": 1.943024052879418e-06, "epoch": 0.19730142566191447, "percentage": 19.73, "elapsed_time": "0:05:23", "remaining_time": "0:21:57", "throughput": 6891.29, "total_tokens": 2232384} +{"current_steps": 2330, "total_steps": 11784, "loss": 0.0336, "lr": 1.9425301920867703e-06, "epoch": 0.1977257298031229, "percentage": 19.77, "elapsed_time": "0:05:24", "remaining_time": "0:21:55", "throughput": 6898.91, "total_tokens": 2237184} +{"current_steps": 2335, "total_steps": 11784, "loss": 0.1116, "lr": 1.942034263469989e-06, "epoch": 0.19815003394433128, "percentage": 19.82, "elapsed_time": "0:05:24", "remaining_time": "0:21:53", "throughput": 6906.67, "total_tokens": 2242048} +{"current_steps": 2340, "total_steps": 11784, "loss": 0.0395, "lr": 1.941536268117095e-06, "epoch": 0.1985743380855397, "percentage": 19.86, "elapsed_time": "0:05:24", "remaining_time": "0:21:51", "throughput": 6915.03, "total_tokens": 2247104} +{"current_steps": 2345, "total_steps": 11784, "loss": 0.089, "lr": 1.9410362071206436e-06, "epoch": 0.19899864222674812, "percentage": 19.9, "elapsed_time": "0:05:25", "remaining_time": "0:21:49", "throughput": 6922.54, "total_tokens": 2251840} +{"current_steps": 2350, "total_steps": 11784, "loss": 0.1879, "lr": 1.9405340815777232e-06, "epoch": 0.19942294636795654, "percentage": 19.94, "elapsed_time": "0:05:25", "remaining_time": "0:21:47", "throughput": 6930.73, "total_tokens": 2256832} +{"current_steps": 2355, "total_steps": 11784, "loss": 0.0277, "lr": 1.9400298925899505e-06, "epoch": 0.19984725050916496, "percentage": 19.98, "elapsed_time": "0:05:25", "remaining_time": "0:21:45", "throughput": 6937.03, "total_tokens": 2261120} +{"current_steps": 2360, "total_steps": 11784, "loss": 0.1436, "lr": 1.939523641263469e-06, "epoch": 0.20027155465037338, "percentage": 20.03, "elapsed_time": "0:05:26", "remaining_time": "0:21:42", "throughput": 6946.07, "total_tokens": 2266496} +{"current_steps": 2360, "total_steps": 11784, "eval_loss": 0.0887659341096878, "epoch": 0.20027155465037338, "percentage": 20.03, "elapsed_time": "0:05:42", "remaining_time": "0:22:46", "throughput": 6621.92, "total_tokens": 2266496} +{"current_steps": 2365, "total_steps": 11784, "loss": 0.0249, "lr": 1.9390153287089485e-06, "epoch": 0.2006958587915818, "percentage": 20.07, "elapsed_time": "0:06:13", "remaining_time": "0:24:47", "throughput": 6079.46, "total_tokens": 2271040} +{"current_steps": 2370, "total_steps": 11784, "loss": 0.1026, "lr": 1.938504956041579e-06, "epoch": 0.20112016293279023, "percentage": 20.11, "elapsed_time": "0:06:13", "remaining_time": "0:24:45", "throughput": 6087.47, "total_tokens": 2276096} +{"current_steps": 2375, "total_steps": 11784, "loss": 0.0498, "lr": 1.937992524381071e-06, "epoch": 0.20154446707399865, "percentage": 20.15, "elapsed_time": "0:06:14", "remaining_time": "0:24:42", "throughput": 6093.17, "total_tokens": 2280192} +{"current_steps": 2380, "total_steps": 11784, "loss": 0.0528, "lr": 1.9374780348516525e-06, "epoch": 0.20196877121520707, "percentage": 20.2, "elapsed_time": "0:06:14", "remaining_time": "0:24:39", "throughput": 6099.85, "total_tokens": 2284672} +{"current_steps": 2385, "total_steps": 11784, "loss": 0.1108, "lr": 1.9369614885820657e-06, "epoch": 0.20239307535641549, "percentage": 20.24, "elapsed_time": "0:06:14", "remaining_time": "0:24:37", "throughput": 6107.72, "total_tokens": 2289664} +{"current_steps": 2390, "total_steps": 11784, "loss": 0.0978, "lr": 1.9364428867055655e-06, "epoch": 0.2028173794976239, "percentage": 20.28, "elapsed_time": "0:06:15", "remaining_time": "0:24:34", "throughput": 6116.28, "total_tokens": 2294976} +{"current_steps": 2395, "total_steps": 11784, "loss": 0.0427, "lr": 1.935922230359916e-06, "epoch": 0.20324168363883233, "percentage": 20.32, "elapsed_time": "0:06:15", "remaining_time": "0:24:32", "throughput": 6123.23, "total_tokens": 2299584} +{"current_steps": 2400, "total_steps": 11784, "loss": 0.067, "lr": 1.9353995206873898e-06, "epoch": 0.20366598778004075, "percentage": 20.37, "elapsed_time": "0:06:15", "remaining_time": "0:24:29", "throughput": 6130.43, "total_tokens": 2304320} +{"current_steps": 2405, "total_steps": 11784, "loss": 0.1673, "lr": 1.9348747588347637e-06, "epoch": 0.20409029192124914, "percentage": 20.41, "elapsed_time": "0:06:16", "remaining_time": "0:24:27", "throughput": 6137.38, "total_tokens": 2308928} +{"current_steps": 2410, "total_steps": 11784, "loss": 0.0387, "lr": 1.9343479459533157e-06, "epoch": 0.20451459606245756, "percentage": 20.45, "elapsed_time": "0:06:16", "remaining_time": "0:24:24", "throughput": 6143.66, "total_tokens": 2313280} +{"current_steps": 2415, "total_steps": 11784, "loss": 0.1093, "lr": 1.933819083198826e-06, "epoch": 0.20493890020366598, "percentage": 20.49, "elapsed_time": "0:06:16", "remaining_time": "0:24:22", "throughput": 6151.73, "total_tokens": 2318400} +{"current_steps": 2420, "total_steps": 11784, "loss": 0.0704, "lr": 1.9332881717315694e-06, "epoch": 0.2053632043448744, "percentage": 20.54, "elapsed_time": "0:06:17", "remaining_time": "0:24:19", "throughput": 6160.2, "total_tokens": 2323712} +{"current_steps": 2425, "total_steps": 11784, "loss": 0.0282, "lr": 1.9327552127163172e-06, "epoch": 0.20578750848608282, "percentage": 20.58, "elapsed_time": "0:06:17", "remaining_time": "0:24:17", "throughput": 6165.99, "total_tokens": 2327936} +{"current_steps": 2430, "total_steps": 11784, "loss": 0.0612, "lr": 1.932220207322332e-06, "epoch": 0.20621181262729124, "percentage": 20.62, "elapsed_time": "0:06:17", "remaining_time": "0:24:14", "throughput": 6172.07, "total_tokens": 2332224} +{"current_steps": 2435, "total_steps": 11784, "loss": 0.0718, "lr": 1.931683156723366e-06, "epoch": 0.20663611676849966, "percentage": 20.66, "elapsed_time": "0:06:18", "remaining_time": "0:24:12", "throughput": 6178.6, "total_tokens": 2336704} +{"current_steps": 2440, "total_steps": 11784, "loss": 0.0876, "lr": 1.9311440620976595e-06, "epoch": 0.20706042090970808, "percentage": 20.71, "elapsed_time": "0:06:18", "remaining_time": "0:24:09", "throughput": 6186.74, "total_tokens": 2341888} +{"current_steps": 2445, "total_steps": 11784, "loss": 0.0747, "lr": 1.930602924627935e-06, "epoch": 0.2074847250509165, "percentage": 20.75, "elapsed_time": "0:06:18", "remaining_time": "0:24:07", "throughput": 6193.38, "total_tokens": 2346432} +{"current_steps": 2450, "total_steps": 11784, "loss": 0.0499, "lr": 1.930059745501399e-06, "epoch": 0.20790902919212492, "percentage": 20.79, "elapsed_time": "0:06:19", "remaining_time": "0:24:04", "throughput": 6199.25, "total_tokens": 2350656} +{"current_steps": 2455, "total_steps": 11784, "loss": 0.0438, "lr": 1.9295145259097362e-06, "epoch": 0.20833333333333334, "percentage": 20.83, "elapsed_time": "0:06:19", "remaining_time": "0:24:02", "throughput": 6206.31, "total_tokens": 2355392} +{"current_steps": 2460, "total_steps": 11784, "loss": 0.0241, "lr": 1.9289672670491076e-06, "epoch": 0.20875763747454176, "percentage": 20.88, "elapsed_time": "0:06:19", "remaining_time": "0:23:59", "throughput": 6213.79, "total_tokens": 2360320} +{"current_steps": 2465, "total_steps": 11784, "loss": 0.0112, "lr": 1.928417970120149e-06, "epoch": 0.20918194161575018, "percentage": 20.92, "elapsed_time": "0:06:20", "remaining_time": "0:23:57", "throughput": 6221.03, "total_tokens": 2365120} +{"current_steps": 2470, "total_steps": 11784, "loss": 0.1338, "lr": 1.9278666363279664e-06, "epoch": 0.20960624575695858, "percentage": 20.96, "elapsed_time": "0:06:20", "remaining_time": "0:23:54", "throughput": 6228.18, "total_tokens": 2369920} +{"current_steps": 2475, "total_steps": 11784, "loss": 0.0943, "lr": 1.9273132668821363e-06, "epoch": 0.210030549898167, "percentage": 21.0, "elapsed_time": "0:06:20", "remaining_time": "0:23:52", "throughput": 6235.6, "total_tokens": 2374848} +{"current_steps": 2480, "total_steps": 11784, "loss": 0.0583, "lr": 1.926757862996699e-06, "epoch": 0.21045485403937542, "percentage": 21.05, "elapsed_time": "0:06:21", "remaining_time": "0:23:50", "throughput": 6243.6, "total_tokens": 2380032} +{"current_steps": 2485, "total_steps": 11784, "loss": 0.1324, "lr": 1.92620042589016e-06, "epoch": 0.21087915818058384, "percentage": 21.09, "elapsed_time": "0:06:21", "remaining_time": "0:23:47", "throughput": 6250.44, "total_tokens": 2384704} +{"current_steps": 2490, "total_steps": 11784, "loss": 0.0661, "lr": 1.9256409567854847e-06, "epoch": 0.21130346232179226, "percentage": 21.13, "elapsed_time": "0:06:21", "remaining_time": "0:23:45", "throughput": 6257.75, "total_tokens": 2389568} +{"current_steps": 2495, "total_steps": 11784, "loss": 0.1469, "lr": 1.9250794569100963e-06, "epoch": 0.21172776646300068, "percentage": 21.17, "elapsed_time": "0:06:22", "remaining_time": "0:23:42", "throughput": 6265.28, "total_tokens": 2394560} +{"current_steps": 2500, "total_steps": 11784, "loss": 0.0676, "lr": 1.9245159274958737e-06, "epoch": 0.2121520706042091, "percentage": 21.22, "elapsed_time": "0:06:22", "remaining_time": "0:23:40", "throughput": 6272.1, "total_tokens": 2399232} +{"current_steps": 2505, "total_steps": 11784, "loss": 0.1362, "lr": 1.9239503697791487e-06, "epoch": 0.21257637474541752, "percentage": 21.26, "elapsed_time": "0:06:22", "remaining_time": "0:23:38", "throughput": 6279.2, "total_tokens": 2404032} +{"current_steps": 2510, "total_steps": 11784, "loss": 0.0744, "lr": 1.9233827850007024e-06, "epoch": 0.21300067888662594, "percentage": 21.3, "elapsed_time": "0:06:23", "remaining_time": "0:23:35", "throughput": 6285.7, "total_tokens": 2408576} +{"current_steps": 2515, "total_steps": 11784, "loss": 0.0727, "lr": 1.9228131744057633e-06, "epoch": 0.21342498302783436, "percentage": 21.34, "elapsed_time": "0:06:23", "remaining_time": "0:23:33", "throughput": 6292.88, "total_tokens": 2413440} +{"current_steps": 2520, "total_steps": 11784, "loss": 0.0503, "lr": 1.922241539244005e-06, "epoch": 0.21384928716904278, "percentage": 21.38, "elapsed_time": "0:06:23", "remaining_time": "0:23:31", "throughput": 6298.64, "total_tokens": 2417664} +{"current_steps": 2525, "total_steps": 11784, "loss": 0.0807, "lr": 1.921667880769541e-06, "epoch": 0.2142735913102512, "percentage": 21.43, "elapsed_time": "0:06:24", "remaining_time": "0:23:28", "throughput": 6305.7, "total_tokens": 2422464} +{"current_steps": 2530, "total_steps": 11784, "loss": 0.039, "lr": 1.921092200240926e-06, "epoch": 0.21469789545145962, "percentage": 21.47, "elapsed_time": "0:06:24", "remaining_time": "0:23:26", "throughput": 6312.58, "total_tokens": 2427200} +{"current_steps": 2535, "total_steps": 11784, "loss": 0.0421, "lr": 1.9205144989211495e-06, "epoch": 0.215122199592668, "percentage": 21.51, "elapsed_time": "0:06:24", "remaining_time": "0:23:24", "throughput": 6318.72, "total_tokens": 2431616} +{"current_steps": 2540, "total_steps": 11784, "loss": 0.0555, "lr": 1.919934778077635e-06, "epoch": 0.21554650373387643, "percentage": 21.55, "elapsed_time": "0:06:25", "remaining_time": "0:23:21", "throughput": 6325.55, "total_tokens": 2436352} +{"current_steps": 2545, "total_steps": 11784, "loss": 0.0429, "lr": 1.9193530389822362e-06, "epoch": 0.21597080787508485, "percentage": 21.6, "elapsed_time": "0:06:25", "remaining_time": "0:23:19", "throughput": 6331.52, "total_tokens": 2440704} +{"current_steps": 2550, "total_steps": 11784, "loss": 0.0246, "lr": 1.918769282911235e-06, "epoch": 0.21639511201629327, "percentage": 21.64, "elapsed_time": "0:06:25", "remaining_time": "0:23:17", "throughput": 6340.2, "total_tokens": 2446272} +{"current_steps": 2555, "total_steps": 11784, "loss": 0.0731, "lr": 1.9181835111453383e-06, "epoch": 0.2168194161575017, "percentage": 21.68, "elapsed_time": "0:06:26", "remaining_time": "0:23:14", "throughput": 6345.82, "total_tokens": 2450496} +{"current_steps": 2560, "total_steps": 11784, "loss": 0.0483, "lr": 1.9175957249696755e-06, "epoch": 0.2172437202987101, "percentage": 21.72, "elapsed_time": "0:06:26", "remaining_time": "0:23:12", "throughput": 6353.06, "total_tokens": 2455424} +{"current_steps": 2565, "total_steps": 11784, "loss": 0.0535, "lr": 1.9170059256737946e-06, "epoch": 0.21766802443991853, "percentage": 21.77, "elapsed_time": "0:06:26", "remaining_time": "0:23:10", "throughput": 6360.28, "total_tokens": 2460352} +{"current_steps": 2570, "total_steps": 11784, "loss": 0.1311, "lr": 1.9164141145516613e-06, "epoch": 0.21809232858112695, "percentage": 21.81, "elapsed_time": "0:06:27", "remaining_time": "0:23:08", "throughput": 6366.66, "total_tokens": 2464896} +{"current_steps": 2575, "total_steps": 11784, "loss": 0.1178, "lr": 1.915820292901654e-06, "epoch": 0.21851663272233537, "percentage": 21.85, "elapsed_time": "0:06:27", "remaining_time": "0:23:05", "throughput": 6373.85, "total_tokens": 2469824} +{"current_steps": 2580, "total_steps": 11784, "loss": 0.1612, "lr": 1.915224462026563e-06, "epoch": 0.2189409368635438, "percentage": 21.89, "elapsed_time": "0:06:27", "remaining_time": "0:23:03", "throughput": 6379.74, "total_tokens": 2474176} +{"current_steps": 2585, "total_steps": 11784, "loss": 0.1505, "lr": 1.9146266232335854e-06, "epoch": 0.21936524100475221, "percentage": 21.94, "elapsed_time": "0:06:28", "remaining_time": "0:23:01", "throughput": 6387.25, "total_tokens": 2479232} +{"current_steps": 2590, "total_steps": 11784, "loss": 0.1221, "lr": 1.914026777834325e-06, "epoch": 0.21978954514596064, "percentage": 21.98, "elapsed_time": "0:06:28", "remaining_time": "0:22:59", "throughput": 6393.23, "total_tokens": 2483648} +{"current_steps": 2595, "total_steps": 11784, "loss": 0.0517, "lr": 1.9134249271447872e-06, "epoch": 0.22021384928716906, "percentage": 22.02, "elapsed_time": "0:06:28", "remaining_time": "0:22:56", "throughput": 6399.19, "total_tokens": 2488064} +{"current_steps": 2600, "total_steps": 11784, "loss": 0.0604, "lr": 1.9128210724853765e-06, "epoch": 0.22063815342837745, "percentage": 22.06, "elapsed_time": "0:06:29", "remaining_time": "0:22:54", "throughput": 6406.73, "total_tokens": 2493184} +{"current_steps": 2605, "total_steps": 11784, "loss": 0.094, "lr": 1.912215215180894e-06, "epoch": 0.22106245756958587, "percentage": 22.11, "elapsed_time": "0:06:29", "remaining_time": "0:22:52", "throughput": 6413.64, "total_tokens": 2498048} +{"current_steps": 2610, "total_steps": 11784, "loss": 0.1097, "lr": 1.9116073565605347e-06, "epoch": 0.2214867617107943, "percentage": 22.15, "elapsed_time": "0:06:29", "remaining_time": "0:22:50", "throughput": 6423.12, "total_tokens": 2504064} +{"current_steps": 2615, "total_steps": 11784, "loss": 0.0698, "lr": 1.9109974979578847e-06, "epoch": 0.2219110658520027, "percentage": 22.19, "elapsed_time": "0:06:30", "remaining_time": "0:22:48", "throughput": 6429.81, "total_tokens": 2508800} +{"current_steps": 2620, "total_steps": 11784, "loss": 0.0328, "lr": 1.9103856407109172e-06, "epoch": 0.22233536999321113, "percentage": 22.23, "elapsed_time": "0:06:30", "remaining_time": "0:22:45", "throughput": 6435.88, "total_tokens": 2513280} +{"current_steps": 2625, "total_steps": 11784, "loss": 0.0484, "lr": 1.9097717861619907e-06, "epoch": 0.22275967413441955, "percentage": 22.28, "elapsed_time": "0:06:30", "remaining_time": "0:22:43", "throughput": 6442.68, "total_tokens": 2518080} +{"current_steps": 2630, "total_steps": 11784, "loss": 0.0655, "lr": 1.9091559356578445e-06, "epoch": 0.22318397827562797, "percentage": 22.32, "elapsed_time": "0:06:31", "remaining_time": "0:22:41", "throughput": 6449.05, "total_tokens": 2522688} +{"current_steps": 2635, "total_steps": 11784, "loss": 0.0933, "lr": 1.9085380905495985e-06, "epoch": 0.2236082824168364, "percentage": 22.36, "elapsed_time": "0:06:31", "remaining_time": "0:22:39", "throughput": 6454.81, "total_tokens": 2527040} +{"current_steps": 2640, "total_steps": 11784, "loss": 0.0743, "lr": 1.9079182521927475e-06, "epoch": 0.2240325865580448, "percentage": 22.4, "elapsed_time": "0:06:31", "remaining_time": "0:22:37", "throughput": 6460.98, "total_tokens": 2531584} +{"current_steps": 2645, "total_steps": 11784, "loss": 0.1144, "lr": 1.9072964219471594e-06, "epoch": 0.22445689069925323, "percentage": 22.45, "elapsed_time": "0:06:32", "remaining_time": "0:22:35", "throughput": 6467.62, "total_tokens": 2536384} +{"current_steps": 2650, "total_steps": 11784, "loss": 0.0821, "lr": 1.9066726011770724e-06, "epoch": 0.22488119484046165, "percentage": 22.49, "elapsed_time": "0:06:32", "remaining_time": "0:22:32", "throughput": 6473.41, "total_tokens": 2540800} +{"current_steps": 2655, "total_steps": 11784, "loss": 0.02, "lr": 1.906046791251092e-06, "epoch": 0.22530549898167007, "percentage": 22.53, "elapsed_time": "0:06:32", "remaining_time": "0:22:30", "throughput": 6480.1, "total_tokens": 2545600} +{"current_steps": 2660, "total_steps": 11784, "loss": 0.1331, "lr": 1.9054189935421868e-06, "epoch": 0.2257298031228785, "percentage": 22.57, "elapsed_time": "0:06:33", "remaining_time": "0:22:28", "throughput": 6487.13, "total_tokens": 2550528} +{"current_steps": 2665, "total_steps": 11784, "loss": 0.0412, "lr": 1.9047892094276871e-06, "epoch": 0.22615410726408688, "percentage": 22.62, "elapsed_time": "0:06:33", "remaining_time": "0:22:26", "throughput": 6493.89, "total_tokens": 2555328} +{"current_steps": 2670, "total_steps": 11784, "loss": 0.0844, "lr": 1.9041574402892813e-06, "epoch": 0.2265784114052953, "percentage": 22.66, "elapsed_time": "0:06:33", "remaining_time": "0:22:24", "throughput": 6501.03, "total_tokens": 2560320} +{"current_steps": 2675, "total_steps": 11784, "loss": 0.0467, "lr": 1.903523687513012e-06, "epoch": 0.22700271554650372, "percentage": 22.7, "elapsed_time": "0:06:34", "remaining_time": "0:22:22", "throughput": 6507.03, "total_tokens": 2564800} +{"current_steps": 2680, "total_steps": 11784, "loss": 0.0676, "lr": 1.902887952489275e-06, "epoch": 0.22742701968771215, "percentage": 22.74, "elapsed_time": "0:06:34", "remaining_time": "0:22:20", "throughput": 6513.8, "total_tokens": 2569664} +{"current_steps": 2685, "total_steps": 11784, "loss": 0.0301, "lr": 1.9022502366128132e-06, "epoch": 0.22785132382892057, "percentage": 22.79, "elapsed_time": "0:06:34", "remaining_time": "0:22:18", "throughput": 6520.74, "total_tokens": 2574592} +{"current_steps": 2690, "total_steps": 11784, "loss": 0.1049, "lr": 1.9016105412827173e-06, "epoch": 0.22827562797012899, "percentage": 22.83, "elapsed_time": "0:06:35", "remaining_time": "0:22:15", "throughput": 6527.26, "total_tokens": 2579328} +{"current_steps": 2695, "total_steps": 11784, "loss": 0.0181, "lr": 1.9009688679024189e-06, "epoch": 0.2286999321113374, "percentage": 22.87, "elapsed_time": "0:06:35", "remaining_time": "0:22:13", "throughput": 6535.54, "total_tokens": 2584896} +{"current_steps": 2700, "total_steps": 11784, "loss": 0.1308, "lr": 1.9003252178796907e-06, "epoch": 0.22912423625254583, "percentage": 22.91, "elapsed_time": "0:06:35", "remaining_time": "0:22:11", "throughput": 6541.76, "total_tokens": 2589504} +{"current_steps": 2705, "total_steps": 11784, "loss": 0.0851, "lr": 1.8996795926266412e-06, "epoch": 0.22954854039375425, "percentage": 22.95, "elapsed_time": "0:06:36", "remaining_time": "0:22:09", "throughput": 6548.34, "total_tokens": 2594304} +{"current_steps": 2710, "total_steps": 11784, "loss": 0.0292, "lr": 1.899031993559712e-06, "epoch": 0.22997284453496267, "percentage": 23.0, "elapsed_time": "0:06:36", "remaining_time": "0:22:07", "throughput": 6554.2, "total_tokens": 2598784} +{"current_steps": 2715, "total_steps": 11784, "loss": 0.0709, "lr": 1.8983824220996764e-06, "epoch": 0.2303971486761711, "percentage": 23.04, "elapsed_time": "0:06:36", "remaining_time": "0:22:05", "throughput": 6561.04, "total_tokens": 2603712} +{"current_steps": 2720, "total_steps": 11784, "loss": 0.0578, "lr": 1.8977308796716338e-06, "epoch": 0.2308214528173795, "percentage": 23.08, "elapsed_time": "0:06:37", "remaining_time": "0:22:03", "throughput": 6567.22, "total_tokens": 2608320} +{"current_steps": 2725, "total_steps": 11784, "loss": 0.0302, "lr": 1.897077367705008e-06, "epoch": 0.23124575695858793, "percentage": 23.12, "elapsed_time": "0:06:37", "remaining_time": "0:22:01", "throughput": 6574.05, "total_tokens": 2613248} +{"current_steps": 2730, "total_steps": 11784, "loss": 0.0637, "lr": 1.896421887633544e-06, "epoch": 0.23167006109979632, "percentage": 23.17, "elapsed_time": "0:06:37", "remaining_time": "0:21:59", "throughput": 6579.65, "total_tokens": 2617664} +{"current_steps": 2735, "total_steps": 11784, "loss": 0.0716, "lr": 1.8957644408953044e-06, "epoch": 0.23209436524100474, "percentage": 23.21, "elapsed_time": "0:06:38", "remaining_time": "0:21:57", "throughput": 6585.13, "total_tokens": 2622016} +{"current_steps": 2740, "total_steps": 11784, "loss": 0.0438, "lr": 1.8951050289326664e-06, "epoch": 0.23251866938221316, "percentage": 23.25, "elapsed_time": "0:06:38", "remaining_time": "0:21:55", "throughput": 6590.71, "total_tokens": 2626368} +{"current_steps": 2745, "total_steps": 11784, "loss": 0.032, "lr": 1.8944436531923193e-06, "epoch": 0.23294297352342158, "percentage": 23.29, "elapsed_time": "0:06:38", "remaining_time": "0:21:53", "throughput": 6596.23, "total_tokens": 2630720} +{"current_steps": 2750, "total_steps": 11784, "loss": 0.1554, "lr": 1.8937803151252603e-06, "epoch": 0.23336727766463, "percentage": 23.34, "elapsed_time": "0:06:39", "remaining_time": "0:21:51", "throughput": 6602.57, "total_tokens": 2635456} +{"current_steps": 2755, "total_steps": 11784, "loss": 0.1055, "lr": 1.8931150161867915e-06, "epoch": 0.23379158180583842, "percentage": 23.38, "elapsed_time": "0:06:39", "remaining_time": "0:21:49", "throughput": 6607.96, "total_tokens": 2639744} +{"current_steps": 2760, "total_steps": 11784, "loss": 0.0627, "lr": 1.8924477578365177e-06, "epoch": 0.23421588594704684, "percentage": 23.42, "elapsed_time": "0:06:39", "remaining_time": "0:21:47", "throughput": 6613.04, "total_tokens": 2643904} +{"current_steps": 2765, "total_steps": 11784, "loss": 0.0403, "lr": 1.8917785415383415e-06, "epoch": 0.23464019008825526, "percentage": 23.46, "elapsed_time": "0:06:40", "remaining_time": "0:21:45", "throughput": 6619.64, "total_tokens": 2648768} +{"current_steps": 2770, "total_steps": 11784, "loss": 0.0271, "lr": 1.8911073687604622e-06, "epoch": 0.23506449422946368, "percentage": 23.51, "elapsed_time": "0:06:40", "remaining_time": "0:21:43", "throughput": 6626.9, "total_tokens": 2653952} +{"current_steps": 2775, "total_steps": 11784, "loss": 0.0031, "lr": 1.8904342409753703e-06, "epoch": 0.2354887983706721, "percentage": 23.55, "elapsed_time": "0:06:40", "remaining_time": "0:21:41", "throughput": 6632.58, "total_tokens": 2658432} +{"current_steps": 2780, "total_steps": 11784, "loss": 0.054, "lr": 1.8897591596598464e-06, "epoch": 0.23591310251188052, "percentage": 23.59, "elapsed_time": "0:06:41", "remaining_time": "0:21:39", "throughput": 6638.89, "total_tokens": 2663168} +{"current_steps": 2785, "total_steps": 11784, "loss": 0.1288, "lr": 1.8890821262949564e-06, "epoch": 0.23633740665308894, "percentage": 23.63, "elapsed_time": "0:06:41", "remaining_time": "0:21:37", "throughput": 6645.53, "total_tokens": 2668096} +{"current_steps": 2790, "total_steps": 11784, "loss": 0.1241, "lr": 1.8884031423660488e-06, "epoch": 0.23676171079429736, "percentage": 23.68, "elapsed_time": "0:06:41", "remaining_time": "0:21:35", "throughput": 6651.28, "total_tokens": 2672576} +{"current_steps": 2795, "total_steps": 11784, "loss": 0.0832, "lr": 1.8877222093627517e-06, "epoch": 0.23718601493550576, "percentage": 23.72, "elapsed_time": "0:06:42", "remaining_time": "0:21:33", "throughput": 6659.66, "total_tokens": 2678336} +{"current_steps": 2800, "total_steps": 11784, "loss": 0.1352, "lr": 1.8870393287789694e-06, "epoch": 0.23761031907671418, "percentage": 23.76, "elapsed_time": "0:06:42", "remaining_time": "0:21:31", "throughput": 6668.45, "total_tokens": 2684288} +{"current_steps": 2805, "total_steps": 11784, "loss": 0.0542, "lr": 1.8863545021128781e-06, "epoch": 0.2380346232179226, "percentage": 23.8, "elapsed_time": "0:06:42", "remaining_time": "0:21:29", "throughput": 6673.97, "total_tokens": 2688704} +{"current_steps": 2810, "total_steps": 11784, "loss": 0.0608, "lr": 1.885667730866925e-06, "epoch": 0.23845892735913102, "percentage": 23.85, "elapsed_time": "0:06:43", "remaining_time": "0:21:27", "throughput": 6679.41, "total_tokens": 2693056} +{"current_steps": 2815, "total_steps": 11784, "loss": 0.0844, "lr": 1.884979016547822e-06, "epoch": 0.23888323150033944, "percentage": 23.89, "elapsed_time": "0:06:43", "remaining_time": "0:21:25", "throughput": 6684.52, "total_tokens": 2697280} +{"current_steps": 2820, "total_steps": 11784, "loss": 0.0769, "lr": 1.8842883606665457e-06, "epoch": 0.23930753564154786, "percentage": 23.93, "elapsed_time": "0:06:43", "remaining_time": "0:21:23", "throughput": 6689.44, "total_tokens": 2701440} +{"current_steps": 2825, "total_steps": 11784, "loss": 0.0935, "lr": 1.88359576473833e-06, "epoch": 0.23973183978275628, "percentage": 23.97, "elapsed_time": "0:06:44", "remaining_time": "0:21:21", "throughput": 6697.88, "total_tokens": 2707264} +{"current_steps": 2830, "total_steps": 11784, "loss": 0.1271, "lr": 1.8829012302826674e-06, "epoch": 0.2401561439239647, "percentage": 24.02, "elapsed_time": "0:06:44", "remaining_time": "0:21:19", "throughput": 6703.38, "total_tokens": 2711680} +{"current_steps": 2835, "total_steps": 11784, "loss": 0.0232, "lr": 1.8822047588233017e-06, "epoch": 0.24058044806517312, "percentage": 24.06, "elapsed_time": "0:06:44", "remaining_time": "0:21:17", "throughput": 6709.41, "total_tokens": 2716352} +{"current_steps": 2840, "total_steps": 11784, "loss": 0.0673, "lr": 1.881506351888227e-06, "epoch": 0.24100475220638154, "percentage": 24.1, "elapsed_time": "0:06:45", "remaining_time": "0:21:16", "throughput": 6715.42, "total_tokens": 2721024} +{"current_steps": 2845, "total_steps": 11784, "loss": 0.0675, "lr": 1.8808060110096839e-06, "epoch": 0.24142905634758996, "percentage": 24.14, "elapsed_time": "0:06:45", "remaining_time": "0:21:14", "throughput": 6721.4, "total_tokens": 2725696} +{"current_steps": 2850, "total_steps": 11784, "loss": 0.0625, "lr": 1.8801037377241553e-06, "epoch": 0.24185336048879838, "percentage": 24.19, "elapsed_time": "0:06:45", "remaining_time": "0:21:12", "throughput": 6729.42, "total_tokens": 2731328} +{"current_steps": 2855, "total_steps": 11784, "loss": 0.115, "lr": 1.879399533572364e-06, "epoch": 0.2422776646300068, "percentage": 24.23, "elapsed_time": "0:06:46", "remaining_time": "0:21:10", "throughput": 6735.82, "total_tokens": 2736192} +{"current_steps": 2860, "total_steps": 11784, "loss": 0.0828, "lr": 1.8786934000992688e-06, "epoch": 0.24270196877121522, "percentage": 24.27, "elapsed_time": "0:06:46", "remaining_time": "0:21:08", "throughput": 6741.17, "total_tokens": 2740544} +{"current_steps": 2865, "total_steps": 11784, "loss": 0.0668, "lr": 1.877985338854061e-06, "epoch": 0.2431262729124236, "percentage": 24.31, "elapsed_time": "0:06:46", "remaining_time": "0:21:06", "throughput": 6747.66, "total_tokens": 2745472} +{"current_steps": 2870, "total_steps": 11784, "loss": 0.0126, "lr": 1.877275351390162e-06, "epoch": 0.24355057705363203, "percentage": 24.36, "elapsed_time": "0:06:47", "remaining_time": "0:21:04", "throughput": 6754.48, "total_tokens": 2750528} +{"current_steps": 2875, "total_steps": 11784, "loss": 0.0341, "lr": 1.8765634392652183e-06, "epoch": 0.24397488119484045, "percentage": 24.4, "elapsed_time": "0:06:47", "remaining_time": "0:21:02", "throughput": 6760.69, "total_tokens": 2755328} +{"current_steps": 2880, "total_steps": 11784, "loss": 0.0878, "lr": 1.8758496040410998e-06, "epoch": 0.24439918533604887, "percentage": 24.44, "elapsed_time": "0:06:47", "remaining_time": "0:21:01", "throughput": 6766.86, "total_tokens": 2760128} +{"current_steps": 2885, "total_steps": 11784, "loss": 0.0316, "lr": 1.8751338472838942e-06, "epoch": 0.2448234894772573, "percentage": 24.48, "elapsed_time": "0:06:48", "remaining_time": "0:20:59", "throughput": 6773.12, "total_tokens": 2764992} +{"current_steps": 2890, "total_steps": 11784, "loss": 0.0563, "lr": 1.8744161705639065e-06, "epoch": 0.24524779361846571, "percentage": 24.52, "elapsed_time": "0:06:48", "remaining_time": "0:20:57", "throughput": 6778.5, "total_tokens": 2769408} +{"current_steps": 2895, "total_steps": 11784, "loss": 0.0999, "lr": 1.8736965754556526e-06, "epoch": 0.24567209775967414, "percentage": 24.57, "elapsed_time": "0:06:48", "remaining_time": "0:20:55", "throughput": 6784.91, "total_tokens": 2774336} +{"current_steps": 2900, "total_steps": 11784, "loss": 0.1851, "lr": 1.8729750635378578e-06, "epoch": 0.24609640190088256, "percentage": 24.61, "elapsed_time": "0:06:49", "remaining_time": "0:20:53", "throughput": 6791.06, "total_tokens": 2779136} +{"current_steps": 2905, "total_steps": 11784, "loss": 0.0546, "lr": 1.872251636393453e-06, "epoch": 0.24652070604209098, "percentage": 24.65, "elapsed_time": "0:06:49", "remaining_time": "0:20:51", "throughput": 6797.94, "total_tokens": 2784256} +{"current_steps": 2910, "total_steps": 11784, "loss": 0.1097, "lr": 1.8715262956095694e-06, "epoch": 0.2469450101832994, "percentage": 24.69, "elapsed_time": "0:06:49", "remaining_time": "0:20:49", "throughput": 6803.69, "total_tokens": 2788864} +{"current_steps": 2915, "total_steps": 11784, "loss": 0.0549, "lr": 1.8707990427775386e-06, "epoch": 0.24736931432450782, "percentage": 24.74, "elapsed_time": "0:06:50", "remaining_time": "0:20:48", "throughput": 6808.76, "total_tokens": 2793152} +{"current_steps": 2920, "total_steps": 11784, "loss": 0.0782, "lr": 1.870069879492886e-06, "epoch": 0.24779361846571624, "percentage": 24.78, "elapsed_time": "0:06:50", "remaining_time": "0:20:46", "throughput": 6815.31, "total_tokens": 2798144} +{"current_steps": 2925, "total_steps": 11784, "loss": 0.0769, "lr": 1.869338807355328e-06, "epoch": 0.24821792260692466, "percentage": 24.82, "elapsed_time": "0:06:50", "remaining_time": "0:20:44", "throughput": 6821.78, "total_tokens": 2803136} +{"current_steps": 2930, "total_steps": 11784, "loss": 0.0514, "lr": 1.8686058279687699e-06, "epoch": 0.24864222674813305, "percentage": 24.86, "elapsed_time": "0:06:51", "remaining_time": "0:20:42", "throughput": 6827.51, "total_tokens": 2807744} +{"current_steps": 2935, "total_steps": 11784, "loss": 0.038, "lr": 1.8678709429413e-06, "epoch": 0.24906653088934147, "percentage": 24.91, "elapsed_time": "0:06:51", "remaining_time": "0:20:40", "throughput": 6834.29, "total_tokens": 2812928} +{"current_steps": 2940, "total_steps": 11784, "loss": 0.0578, "lr": 1.867134153885189e-06, "epoch": 0.2494908350305499, "percentage": 24.95, "elapsed_time": "0:06:51", "remaining_time": "0:20:39", "throughput": 6839.84, "total_tokens": 2817536} +{"current_steps": 2945, "total_steps": 11784, "loss": 0.0818, "lr": 1.8663954624168832e-06, "epoch": 0.2499151391717583, "percentage": 24.99, "elapsed_time": "0:06:52", "remaining_time": "0:20:37", "throughput": 6846.82, "total_tokens": 2822784} +{"current_steps": 2950, "total_steps": 11784, "loss": 0.0749, "lr": 1.8656548701570039e-06, "epoch": 0.25033944331296676, "percentage": 25.03, "elapsed_time": "0:06:52", "remaining_time": "0:20:35", "throughput": 6852.35, "total_tokens": 2827328} +{"current_steps": 2950, "total_steps": 11784, "eval_loss": 0.07611989974975586, "epoch": 0.25033944331296676, "percentage": 25.03, "elapsed_time": "0:07:08", "remaining_time": "0:21:23", "throughput": 6598.04, "total_tokens": 2827328} +{"current_steps": 2955, "total_steps": 11784, "loss": 0.0738, "lr": 1.864912378730342e-06, "epoch": 0.2507637474541752, "percentage": 25.08, "elapsed_time": "0:07:34", "remaining_time": "0:22:39", "throughput": 6225.05, "total_tokens": 2832128} +{"current_steps": 2960, "total_steps": 11784, "loss": 0.149, "lr": 1.8641679897658551e-06, "epoch": 0.25118805159538354, "percentage": 25.12, "elapsed_time": "0:07:35", "remaining_time": "0:22:37", "throughput": 6232.76, "total_tokens": 2837824} +{"current_steps": 2965, "total_steps": 11784, "loss": 0.063, "lr": 1.8634217048966633e-06, "epoch": 0.25161235573659196, "percentage": 25.16, "elapsed_time": "0:07:35", "remaining_time": "0:22:35", "throughput": 6238.01, "total_tokens": 2842240} +{"current_steps": 2970, "total_steps": 11784, "loss": 0.0512, "lr": 1.8626735257600475e-06, "epoch": 0.2520366598778004, "percentage": 25.2, "elapsed_time": "0:07:35", "remaining_time": "0:22:33", "throughput": 6243.64, "total_tokens": 2846848} +{"current_steps": 2975, "total_steps": 11784, "loss": 0.0923, "lr": 1.8619234539974429e-06, "epoch": 0.2524609640190088, "percentage": 25.25, "elapsed_time": "0:07:36", "remaining_time": "0:22:31", "throughput": 6249.07, "total_tokens": 2851392} +{"current_steps": 2980, "total_steps": 11784, "loss": 0.0373, "lr": 1.8611714912544376e-06, "epoch": 0.2528852681602172, "percentage": 25.29, "elapsed_time": "0:07:36", "remaining_time": "0:22:29", "throughput": 6254.02, "total_tokens": 2855680} +{"current_steps": 2985, "total_steps": 11784, "loss": 0.0624, "lr": 1.860417639180769e-06, "epoch": 0.25330957230142565, "percentage": 25.33, "elapsed_time": "0:07:36", "remaining_time": "0:22:26", "throughput": 6260.09, "total_tokens": 2860544} +{"current_steps": 2990, "total_steps": 11784, "loss": 0.0577, "lr": 1.8596618994303183e-06, "epoch": 0.25373387644263407, "percentage": 25.37, "elapsed_time": "0:07:37", "remaining_time": "0:22:24", "throughput": 6265.64, "total_tokens": 2865152} +{"current_steps": 2995, "total_steps": 11784, "loss": 0.066, "lr": 1.858904273661109e-06, "epoch": 0.2541581805838425, "percentage": 25.42, "elapsed_time": "0:07:37", "remaining_time": "0:22:22", "throughput": 6271.93, "total_tokens": 2870144} +{"current_steps": 3000, "total_steps": 11784, "loss": 0.0492, "lr": 1.8581447635353019e-06, "epoch": 0.2545824847250509, "percentage": 25.46, "elapsed_time": "0:07:37", "remaining_time": "0:22:20", "throughput": 6278.33, "total_tokens": 2875200} +{"current_steps": 3005, "total_steps": 11784, "loss": 0.1037, "lr": 1.8573833707191918e-06, "epoch": 0.2550067888662593, "percentage": 25.5, "elapsed_time": "0:07:38", "remaining_time": "0:22:18", "throughput": 6284.59, "total_tokens": 2880192} +{"current_steps": 3010, "total_steps": 11784, "loss": 0.1198, "lr": 1.8566200968832044e-06, "epoch": 0.25543109300746775, "percentage": 25.54, "elapsed_time": "0:07:38", "remaining_time": "0:22:16", "throughput": 6290.17, "total_tokens": 2884800} +{"current_steps": 3015, "total_steps": 11784, "loss": 0.0739, "lr": 1.855854943701892e-06, "epoch": 0.25585539714867617, "percentage": 25.59, "elapsed_time": "0:07:38", "remaining_time": "0:22:14", "throughput": 6297.18, "total_tokens": 2890176} +{"current_steps": 3020, "total_steps": 11784, "loss": 0.1248, "lr": 1.85508791285393e-06, "epoch": 0.2562797012898846, "percentage": 25.63, "elapsed_time": "0:07:39", "remaining_time": "0:22:12", "throughput": 6303.16, "total_tokens": 2895040} +{"current_steps": 3025, "total_steps": 11784, "loss": 0.0693, "lr": 1.8543190060221125e-06, "epoch": 0.256704005431093, "percentage": 25.67, "elapsed_time": "0:07:39", "remaining_time": "0:22:10", "throughput": 6308.94, "total_tokens": 2899776} +{"current_steps": 3030, "total_steps": 11784, "loss": 0.0601, "lr": 1.853548224893351e-06, "epoch": 0.25712830957230143, "percentage": 25.71, "elapsed_time": "0:07:39", "remaining_time": "0:22:08", "throughput": 6313.9, "total_tokens": 2904064} +{"current_steps": 3035, "total_steps": 11784, "loss": 0.0395, "lr": 1.8527755711586678e-06, "epoch": 0.25755261371350985, "percentage": 25.76, "elapsed_time": "0:07:40", "remaining_time": "0:22:06", "throughput": 6319.68, "total_tokens": 2908800} +{"current_steps": 3040, "total_steps": 11784, "loss": 0.0432, "lr": 1.8520010465131935e-06, "epoch": 0.25797691785471827, "percentage": 25.8, "elapsed_time": "0:07:40", "remaining_time": "0:22:04", "throughput": 6324.84, "total_tokens": 2913216} +{"current_steps": 3045, "total_steps": 11784, "loss": 0.0755, "lr": 1.8512246526561636e-06, "epoch": 0.2584012219959267, "percentage": 25.84, "elapsed_time": "0:07:40", "remaining_time": "0:22:02", "throughput": 6329.72, "total_tokens": 2917504} +{"current_steps": 3050, "total_steps": 11784, "loss": 0.1289, "lr": 1.8504463912909149e-06, "epoch": 0.2588255261371351, "percentage": 25.88, "elapsed_time": "0:07:41", "remaining_time": "0:22:00", "throughput": 6336.42, "total_tokens": 2922752} +{"current_steps": 3055, "total_steps": 11784, "loss": 0.0354, "lr": 1.8496662641248807e-06, "epoch": 0.25924983027834353, "percentage": 25.92, "elapsed_time": "0:07:41", "remaining_time": "0:21:58", "throughput": 6343.08, "total_tokens": 2928000} +{"current_steps": 3060, "total_steps": 11784, "loss": 0.0658, "lr": 1.8488842728695874e-06, "epoch": 0.25967413441955195, "percentage": 25.97, "elapsed_time": "0:07:41", "remaining_time": "0:21:56", "throughput": 6348.82, "total_tokens": 2932736} +{"current_steps": 3065, "total_steps": 11784, "loss": 0.1191, "lr": 1.8481004192406525e-06, "epoch": 0.26009843856076037, "percentage": 26.01, "elapsed_time": "0:07:42", "remaining_time": "0:21:55", "throughput": 6354.85, "total_tokens": 2937664} +{"current_steps": 3070, "total_steps": 11784, "loss": 0.1139, "lr": 1.8473147049577773e-06, "epoch": 0.2605227427019688, "percentage": 26.05, "elapsed_time": "0:07:42", "remaining_time": "0:21:53", "throughput": 6361.27, "total_tokens": 2942784} +{"current_steps": 3075, "total_steps": 11784, "loss": 0.1196, "lr": 1.8465271317447474e-06, "epoch": 0.2609470468431772, "percentage": 26.09, "elapsed_time": "0:07:42", "remaining_time": "0:21:51", "throughput": 6367.55, "total_tokens": 2947840} +{"current_steps": 3080, "total_steps": 11784, "loss": 0.0741, "lr": 1.845737701329425e-06, "epoch": 0.26137135098438563, "percentage": 26.14, "elapsed_time": "0:07:43", "remaining_time": "0:21:49", "throughput": 6372.99, "total_tokens": 2952448} +{"current_steps": 3085, "total_steps": 11784, "loss": 0.0241, "lr": 1.8449464154437475e-06, "epoch": 0.26179565512559405, "percentage": 26.18, "elapsed_time": "0:07:43", "remaining_time": "0:21:47", "throughput": 6378.49, "total_tokens": 2957120} +{"current_steps": 3090, "total_steps": 11784, "loss": 0.1096, "lr": 1.8441532758237233e-06, "epoch": 0.26221995926680247, "percentage": 26.22, "elapsed_time": "0:07:43", "remaining_time": "0:21:45", "throughput": 6383.91, "total_tokens": 2961728} +{"current_steps": 3095, "total_steps": 11784, "loss": 0.0875, "lr": 1.8433582842094273e-06, "epoch": 0.26264426340801084, "percentage": 26.26, "elapsed_time": "0:07:44", "remaining_time": "0:21:43", "throughput": 6389.07, "total_tokens": 2966208} +{"current_steps": 3100, "total_steps": 11784, "loss": 0.0347, "lr": 1.8425614423449974e-06, "epoch": 0.26306856754921926, "percentage": 26.31, "elapsed_time": "0:07:44", "remaining_time": "0:21:41", "throughput": 6397.15, "total_tokens": 2972288} +{"current_steps": 3105, "total_steps": 11784, "loss": 0.0902, "lr": 1.8417627519786313e-06, "epoch": 0.2634928716904277, "percentage": 26.35, "elapsed_time": "0:07:44", "remaining_time": "0:21:39", "throughput": 6401.83, "total_tokens": 2976512} +{"current_steps": 3110, "total_steps": 11784, "loss": 0.0626, "lr": 1.840962214862582e-06, "epoch": 0.2639171758316361, "percentage": 26.39, "elapsed_time": "0:07:45", "remaining_time": "0:21:37", "throughput": 6407.44, "total_tokens": 2981248} +{"current_steps": 3115, "total_steps": 11784, "loss": 0.0862, "lr": 1.8401598327531533e-06, "epoch": 0.2643414799728445, "percentage": 26.43, "elapsed_time": "0:07:45", "remaining_time": "0:21:35", "throughput": 6412.6, "total_tokens": 2985728} +{"current_steps": 3120, "total_steps": 11784, "loss": 0.0586, "lr": 1.839355607410698e-06, "epoch": 0.26476578411405294, "percentage": 26.48, "elapsed_time": "0:07:45", "remaining_time": "0:21:33", "throughput": 6417.44, "total_tokens": 2990144} +{"current_steps": 3125, "total_steps": 11784, "loss": 0.0669, "lr": 1.8385495405996119e-06, "epoch": 0.26519008825526136, "percentage": 26.52, "elapsed_time": "0:07:46", "remaining_time": "0:21:31", "throughput": 6422.41, "total_tokens": 2994560} +{"current_steps": 3130, "total_steps": 11784, "loss": 0.0625, "lr": 1.8377416340883312e-06, "epoch": 0.2656143923964698, "percentage": 26.56, "elapsed_time": "0:07:46", "remaining_time": "0:21:30", "throughput": 6428.34, "total_tokens": 2999488} +{"current_steps": 3135, "total_steps": 11784, "loss": 0.0634, "lr": 1.836931889649328e-06, "epoch": 0.2660386965376782, "percentage": 26.6, "elapsed_time": "0:07:46", "remaining_time": "0:21:28", "throughput": 6433.68, "total_tokens": 3004096} +{"current_steps": 3140, "total_steps": 11784, "loss": 0.1258, "lr": 1.8361203090591068e-06, "epoch": 0.2664630006788866, "percentage": 26.65, "elapsed_time": "0:07:47", "remaining_time": "0:21:26", "throughput": 6438.65, "total_tokens": 3008512} +{"current_steps": 3145, "total_steps": 11784, "loss": 0.0708, "lr": 1.8353068940982006e-06, "epoch": 0.26688730482009504, "percentage": 26.69, "elapsed_time": "0:07:47", "remaining_time": "0:21:24", "throughput": 6444.68, "total_tokens": 3013504} +{"current_steps": 3150, "total_steps": 11784, "loss": 0.0164, "lr": 1.8344916465511664e-06, "epoch": 0.26731160896130346, "percentage": 26.73, "elapsed_time": "0:07:47", "remaining_time": "0:21:22", "throughput": 6450.01, "total_tokens": 3018112} +{"current_steps": 3155, "total_steps": 11784, "loss": 0.1132, "lr": 1.833674568206582e-06, "epoch": 0.2677359131025119, "percentage": 26.77, "elapsed_time": "0:07:48", "remaining_time": "0:21:20", "throughput": 6456.12, "total_tokens": 3023168} +{"current_steps": 3160, "total_steps": 11784, "loss": 0.0723, "lr": 1.832855660857042e-06, "epoch": 0.2681602172437203, "percentage": 26.82, "elapsed_time": "0:07:48", "remaining_time": "0:21:18", "throughput": 6461.54, "total_tokens": 3027840} +{"current_steps": 3165, "total_steps": 11784, "loss": 0.1293, "lr": 1.8320349262991532e-06, "epoch": 0.2685845213849287, "percentage": 26.86, "elapsed_time": "0:07:48", "remaining_time": "0:21:17", "throughput": 6469.93, "total_tokens": 3034176} +{"current_steps": 3170, "total_steps": 11784, "loss": 0.063, "lr": 1.8312123663335316e-06, "epoch": 0.26900882552613714, "percentage": 26.9, "elapsed_time": "0:07:49", "remaining_time": "0:21:15", "throughput": 6474.59, "total_tokens": 3038464} +{"current_steps": 3175, "total_steps": 11784, "loss": 0.0748, "lr": 1.8303879827647974e-06, "epoch": 0.26943312966734556, "percentage": 26.94, "elapsed_time": "0:07:49", "remaining_time": "0:21:13", "throughput": 6479.59, "total_tokens": 3042944} +{"current_steps": 3180, "total_steps": 11784, "loss": 0.0582, "lr": 1.8295617774015724e-06, "epoch": 0.269857433808554, "percentage": 26.99, "elapsed_time": "0:07:49", "remaining_time": "0:21:11", "throughput": 6485.67, "total_tokens": 3048000} +{"current_steps": 3185, "total_steps": 11784, "loss": 0.0863, "lr": 1.8287337520564744e-06, "epoch": 0.2702817379497624, "percentage": 27.03, "elapsed_time": "0:07:50", "remaining_time": "0:21:09", "throughput": 6491.74, "total_tokens": 3053056} +{"current_steps": 3190, "total_steps": 11784, "loss": 0.1082, "lr": 1.8279039085461148e-06, "epoch": 0.2707060420909708, "percentage": 27.07, "elapsed_time": "0:07:50", "remaining_time": "0:21:07", "throughput": 6497.2, "total_tokens": 3057792} +{"current_steps": 3195, "total_steps": 11784, "loss": 0.1442, "lr": 1.8270722486910933e-06, "epoch": 0.27113034623217924, "percentage": 27.11, "elapsed_time": "0:07:50", "remaining_time": "0:21:06", "throughput": 6503.17, "total_tokens": 3062784} +{"current_steps": 3200, "total_steps": 11784, "loss": 0.1048, "lr": 1.8262387743159948e-06, "epoch": 0.27155465037338766, "percentage": 27.16, "elapsed_time": "0:07:51", "remaining_time": "0:21:04", "throughput": 6509.02, "total_tokens": 3067712} +{"current_steps": 3205, "total_steps": 11784, "loss": 0.0471, "lr": 1.8254034872493853e-06, "epoch": 0.2719789545145961, "percentage": 27.2, "elapsed_time": "0:07:51", "remaining_time": "0:21:02", "throughput": 6513.63, "total_tokens": 3072000} +{"current_steps": 3210, "total_steps": 11784, "loss": 0.0572, "lr": 1.8245663893238072e-06, "epoch": 0.2724032586558045, "percentage": 27.24, "elapsed_time": "0:07:51", "remaining_time": "0:21:00", "throughput": 6518.49, "total_tokens": 3076416} +{"current_steps": 3215, "total_steps": 11784, "loss": 0.0936, "lr": 1.823727482375776e-06, "epoch": 0.2728275627970129, "percentage": 27.28, "elapsed_time": "0:07:52", "remaining_time": "0:20:58", "throughput": 6525.01, "total_tokens": 3081792} +{"current_steps": 3220, "total_steps": 11784, "loss": 0.1179, "lr": 1.8228867682457762e-06, "epoch": 0.27325186693822134, "percentage": 27.33, "elapsed_time": "0:07:52", "remaining_time": "0:20:57", "throughput": 6530.65, "total_tokens": 3086656} +{"current_steps": 3225, "total_steps": 11784, "loss": 0.0912, "lr": 1.8220442487782565e-06, "epoch": 0.2736761710794297, "percentage": 27.37, "elapsed_time": "0:07:52", "remaining_time": "0:20:55", "throughput": 6536.0, "total_tokens": 3091328} +{"current_steps": 3230, "total_steps": 11784, "loss": 0.0176, "lr": 1.8211999258216273e-06, "epoch": 0.27410047522063813, "percentage": 27.41, "elapsed_time": "0:07:53", "remaining_time": "0:20:53", "throughput": 6542.17, "total_tokens": 3096448} +{"current_steps": 3235, "total_steps": 11784, "loss": 0.0774, "lr": 1.8203538012282548e-06, "epoch": 0.27452477936184655, "percentage": 27.45, "elapsed_time": "0:07:53", "remaining_time": "0:20:51", "throughput": 6549.77, "total_tokens": 3102400} +{"current_steps": 3240, "total_steps": 11784, "loss": 0.0535, "lr": 1.8195058768544583e-06, "epoch": 0.27494908350305497, "percentage": 27.49, "elapsed_time": "0:07:53", "remaining_time": "0:20:49", "throughput": 6554.96, "total_tokens": 3107008} +{"current_steps": 3245, "total_steps": 11784, "loss": 0.0291, "lr": 1.8186561545605052e-06, "epoch": 0.2753733876442634, "percentage": 27.54, "elapsed_time": "0:07:54", "remaining_time": "0:20:48", "throughput": 6560.59, "total_tokens": 3111872} +{"current_steps": 3250, "total_steps": 11784, "loss": 0.0274, "lr": 1.8178046362106083e-06, "epoch": 0.2757976917854718, "percentage": 27.58, "elapsed_time": "0:07:54", "remaining_time": "0:20:46", "throughput": 6565.83, "total_tokens": 3116544} +{"current_steps": 3255, "total_steps": 11784, "loss": 0.1035, "lr": 1.8169513236729195e-06, "epoch": 0.27622199592668023, "percentage": 27.62, "elapsed_time": "0:07:54", "remaining_time": "0:20:44", "throughput": 6570.69, "total_tokens": 3121024} +{"current_steps": 3260, "total_steps": 11784, "loss": 0.0561, "lr": 1.8160962188195278e-06, "epoch": 0.27664630006788865, "percentage": 27.66, "elapsed_time": "0:07:55", "remaining_time": "0:20:42", "throughput": 6575.92, "total_tokens": 3125696} +{"current_steps": 3265, "total_steps": 11784, "loss": 0.0989, "lr": 1.8152393235264545e-06, "epoch": 0.27707060420909707, "percentage": 27.71, "elapsed_time": "0:07:55", "remaining_time": "0:20:41", "throughput": 6581.86, "total_tokens": 3130752} +{"current_steps": 3270, "total_steps": 11784, "loss": 0.0991, "lr": 1.8143806396736486e-06, "epoch": 0.2774949083503055, "percentage": 27.75, "elapsed_time": "0:07:55", "remaining_time": "0:20:39", "throughput": 6586.94, "total_tokens": 3135360} +{"current_steps": 3275, "total_steps": 11784, "loss": 0.1135, "lr": 1.813520169144983e-06, "epoch": 0.2779192124915139, "percentage": 27.79, "elapsed_time": "0:07:56", "remaining_time": "0:20:37", "throughput": 6592.13, "total_tokens": 3140032} +{"current_steps": 3280, "total_steps": 11784, "loss": 0.0532, "lr": 1.8126579138282501e-06, "epoch": 0.27834351663272233, "percentage": 27.83, "elapsed_time": "0:07:56", "remaining_time": "0:20:35", "throughput": 6597.77, "total_tokens": 3144960} +{"current_steps": 3285, "total_steps": 11784, "loss": 0.065, "lr": 1.8117938756151592e-06, "epoch": 0.27876782077393075, "percentage": 27.88, "elapsed_time": "0:07:57", "remaining_time": "0:20:34", "throughput": 6603.64, "total_tokens": 3150016} +{"current_steps": 3290, "total_steps": 11784, "loss": 0.1384, "lr": 1.8109280564013297e-06, "epoch": 0.2791921249151392, "percentage": 27.92, "elapsed_time": "0:07:57", "remaining_time": "0:20:32", "throughput": 6609.72, "total_tokens": 3155200} +{"current_steps": 3295, "total_steps": 11784, "loss": 0.0494, "lr": 1.8100604580862898e-06, "epoch": 0.2796164290563476, "percentage": 27.96, "elapsed_time": "0:07:57", "remaining_time": "0:20:30", "throughput": 6615.16, "total_tokens": 3160000} +{"current_steps": 3300, "total_steps": 11784, "loss": 0.0524, "lr": 1.8091910825734686e-06, "epoch": 0.280040733197556, "percentage": 28.0, "elapsed_time": "0:07:58", "remaining_time": "0:20:28", "throughput": 6620.33, "total_tokens": 3164672} +{"current_steps": 3305, "total_steps": 11784, "loss": 0.052, "lr": 1.808319931770197e-06, "epoch": 0.28046503733876443, "percentage": 28.05, "elapsed_time": "0:07:58", "remaining_time": "0:20:27", "throughput": 6625.16, "total_tokens": 3169152} +{"current_steps": 3310, "total_steps": 11784, "loss": 0.0446, "lr": 1.8074470075876983e-06, "epoch": 0.28088934147997285, "percentage": 28.09, "elapsed_time": "0:07:58", "remaining_time": "0:20:25", "throughput": 6630.5, "total_tokens": 3173888} +{"current_steps": 3315, "total_steps": 11784, "loss": 0.0326, "lr": 1.8065723119410884e-06, "epoch": 0.2813136456211813, "percentage": 28.13, "elapsed_time": "0:07:59", "remaining_time": "0:20:23", "throughput": 6636.55, "total_tokens": 3179072} +{"current_steps": 3320, "total_steps": 11784, "loss": 0.0651, "lr": 1.8056958467493678e-06, "epoch": 0.2817379497623897, "percentage": 28.17, "elapsed_time": "0:07:59", "remaining_time": "0:20:22", "throughput": 6641.39, "total_tokens": 3183552} +{"current_steps": 3325, "total_steps": 11784, "loss": 0.0082, "lr": 1.8048176139354207e-06, "epoch": 0.2821622539035981, "percentage": 28.22, "elapsed_time": "0:07:59", "remaining_time": "0:20:21", "throughput": 6641.81, "total_tokens": 3187968} +{"current_steps": 3330, "total_steps": 11784, "loss": 0.1037, "lr": 1.8039376154260086e-06, "epoch": 0.28258655804480654, "percentage": 28.26, "elapsed_time": "0:08:00", "remaining_time": "0:20:19", "throughput": 6647.05, "total_tokens": 3192704} +{"current_steps": 3335, "total_steps": 11784, "loss": 0.0991, "lr": 1.803055853151767e-06, "epoch": 0.28301086218601496, "percentage": 28.3, "elapsed_time": "0:08:00", "remaining_time": "0:20:17", "throughput": 6652.9, "total_tokens": 3197760} +{"current_steps": 3340, "total_steps": 11784, "loss": 0.0358, "lr": 1.8021723290472007e-06, "epoch": 0.2834351663272234, "percentage": 28.34, "elapsed_time": "0:08:00", "remaining_time": "0:20:16", "throughput": 6657.84, "total_tokens": 3202368} +{"current_steps": 3345, "total_steps": 11784, "loss": 0.1846, "lr": 1.8012870450506798e-06, "epoch": 0.2838594704684318, "percentage": 28.39, "elapsed_time": "0:08:01", "remaining_time": "0:20:14", "throughput": 6663.6, "total_tokens": 3207360} +{"current_steps": 3350, "total_steps": 11784, "loss": 0.0893, "lr": 1.800400003104436e-06, "epoch": 0.2842837746096402, "percentage": 28.43, "elapsed_time": "0:08:01", "remaining_time": "0:20:12", "throughput": 6671.39, "total_tokens": 3213632} +{"current_steps": 3355, "total_steps": 11784, "loss": 0.0597, "lr": 1.799511205154557e-06, "epoch": 0.2847080787508486, "percentage": 28.47, "elapsed_time": "0:08:02", "remaining_time": "0:20:11", "throughput": 6677.96, "total_tokens": 3219136} +{"current_steps": 3360, "total_steps": 11784, "loss": 0.0728, "lr": 1.7986206531509835e-06, "epoch": 0.285132382892057, "percentage": 28.51, "elapsed_time": "0:08:02", "remaining_time": "0:20:09", "throughput": 6685.31, "total_tokens": 3225088} +{"current_steps": 3365, "total_steps": 11784, "loss": 0.0789, "lr": 1.7977283490475043e-06, "epoch": 0.2855566870332654, "percentage": 28.56, "elapsed_time": "0:08:02", "remaining_time": "0:20:07", "throughput": 6689.9, "total_tokens": 3229504} +{"current_steps": 3370, "total_steps": 11784, "loss": 0.0618, "lr": 1.796834294801752e-06, "epoch": 0.28598099117447384, "percentage": 28.6, "elapsed_time": "0:08:03", "remaining_time": "0:20:06", "throughput": 6695.34, "total_tokens": 3234368} +{"current_steps": 3375, "total_steps": 11784, "loss": 0.082, "lr": 1.7959384923751993e-06, "epoch": 0.28640529531568226, "percentage": 28.64, "elapsed_time": "0:08:03", "remaining_time": "0:20:04", "throughput": 6700.78, "total_tokens": 3239232} +{"current_steps": 3380, "total_steps": 11784, "loss": 0.064, "lr": 1.7950409437331535e-06, "epoch": 0.2868295994568907, "percentage": 28.68, "elapsed_time": "0:08:03", "remaining_time": "0:20:02", "throughput": 6706.52, "total_tokens": 3244288} +{"current_steps": 3385, "total_steps": 11784, "loss": 0.1214, "lr": 1.7941416508447534e-06, "epoch": 0.2872539035980991, "percentage": 28.73, "elapsed_time": "0:08:04", "remaining_time": "0:20:01", "throughput": 6710.99, "total_tokens": 3248640} +{"current_steps": 3390, "total_steps": 11784, "loss": 0.0186, "lr": 1.7932406156829649e-06, "epoch": 0.2876782077393075, "percentage": 28.77, "elapsed_time": "0:08:04", "remaining_time": "0:19:59", "throughput": 6716.28, "total_tokens": 3253440} +{"current_steps": 3395, "total_steps": 11784, "loss": 0.0676, "lr": 1.7923378402245756e-06, "epoch": 0.28810251188051594, "percentage": 28.81, "elapsed_time": "0:08:04", "remaining_time": "0:19:57", "throughput": 6721.2, "total_tokens": 3258048} +{"current_steps": 3400, "total_steps": 11784, "loss": 0.0332, "lr": 1.7914333264501913e-06, "epoch": 0.28852681602172436, "percentage": 28.85, "elapsed_time": "0:08:05", "remaining_time": "0:19:56", "throughput": 6726.56, "total_tokens": 3262912} +{"current_steps": 3405, "total_steps": 11784, "loss": 0.0887, "lr": 1.790527076344232e-06, "epoch": 0.2889511201629328, "percentage": 28.9, "elapsed_time": "0:08:05", "remaining_time": "0:19:54", "throughput": 6731.96, "total_tokens": 3267776} +{"current_steps": 3410, "total_steps": 11784, "loss": 0.0464, "lr": 1.7896190918949266e-06, "epoch": 0.2893754243041412, "percentage": 28.94, "elapsed_time": "0:08:05", "remaining_time": "0:19:52", "throughput": 6736.94, "total_tokens": 3272448} +{"current_steps": 3415, "total_steps": 11784, "loss": 0.07, "lr": 1.7887093750943088e-06, "epoch": 0.2897997284453496, "percentage": 28.98, "elapsed_time": "0:08:06", "remaining_time": "0:19:51", "throughput": 6741.83, "total_tokens": 3277056} +{"current_steps": 3420, "total_steps": 11784, "loss": 0.0767, "lr": 1.7877979279382131e-06, "epoch": 0.29022403258655805, "percentage": 29.02, "elapsed_time": "0:08:06", "remaining_time": "0:19:49", "throughput": 6747.37, "total_tokens": 3282048} +{"current_steps": 3425, "total_steps": 11784, "loss": 0.0967, "lr": 1.7868847524262708e-06, "epoch": 0.29064833672776647, "percentage": 29.06, "elapsed_time": "0:08:06", "remaining_time": "0:19:47", "throughput": 6751.63, "total_tokens": 3286336} +{"current_steps": 3430, "total_steps": 11784, "loss": 0.0277, "lr": 1.7859698505619043e-06, "epoch": 0.2910726408689749, "percentage": 29.11, "elapsed_time": "0:08:07", "remaining_time": "0:19:46", "throughput": 6756.41, "total_tokens": 3290880} +{"current_steps": 3435, "total_steps": 11784, "loss": 0.0378, "lr": 1.7850532243523238e-06, "epoch": 0.2914969450101833, "percentage": 29.15, "elapsed_time": "0:08:07", "remaining_time": "0:19:44", "throughput": 6761.02, "total_tokens": 3295360} +{"current_steps": 3440, "total_steps": 11784, "loss": 0.0274, "lr": 1.7841348758085224e-06, "epoch": 0.2919212491513917, "percentage": 29.19, "elapsed_time": "0:08:07", "remaining_time": "0:19:43", "throughput": 6765.65, "total_tokens": 3299840} +{"current_steps": 3445, "total_steps": 11784, "loss": 0.0298, "lr": 1.7832148069452719e-06, "epoch": 0.29234555329260015, "percentage": 29.23, "elapsed_time": "0:08:08", "remaining_time": "0:19:41", "throughput": 6770.46, "total_tokens": 3304448} +{"current_steps": 3450, "total_steps": 11784, "loss": 0.1044, "lr": 1.7822930197811186e-06, "epoch": 0.29276985743380857, "percentage": 29.28, "elapsed_time": "0:08:08", "remaining_time": "0:19:39", "throughput": 6775.06, "total_tokens": 3308928} +{"current_steps": 3455, "total_steps": 11784, "loss": 0.1, "lr": 1.781369516338378e-06, "epoch": 0.293194161575017, "percentage": 29.32, "elapsed_time": "0:08:08", "remaining_time": "0:19:38", "throughput": 6779.64, "total_tokens": 3313408} +{"current_steps": 3460, "total_steps": 11784, "loss": 0.0865, "lr": 1.7804442986431317e-06, "epoch": 0.2936184657162254, "percentage": 29.36, "elapsed_time": "0:08:09", "remaining_time": "0:19:36", "throughput": 6784.62, "total_tokens": 3318080} +{"current_steps": 3465, "total_steps": 11784, "loss": 0.0947, "lr": 1.7795173687252213e-06, "epoch": 0.29404276985743383, "percentage": 29.4, "elapsed_time": "0:08:09", "remaining_time": "0:19:34", "throughput": 6790.18, "total_tokens": 3323136} +{"current_steps": 3470, "total_steps": 11784, "loss": 0.0454, "lr": 1.778588728618246e-06, "epoch": 0.29446707399864225, "percentage": 29.45, "elapsed_time": "0:08:09", "remaining_time": "0:19:33", "throughput": 6795.29, "total_tokens": 3327936} +{"current_steps": 3475, "total_steps": 11784, "loss": 0.0718, "lr": 1.777658380359556e-06, "epoch": 0.29489137813985067, "percentage": 29.49, "elapsed_time": "0:08:10", "remaining_time": "0:19:31", "throughput": 6800.59, "total_tokens": 3332864} +{"current_steps": 3480, "total_steps": 11784, "loss": 0.0759, "lr": 1.7767263259902494e-06, "epoch": 0.2953156822810591, "percentage": 29.53, "elapsed_time": "0:08:10", "remaining_time": "0:19:30", "throughput": 6806.35, "total_tokens": 3338048} +{"current_steps": 3485, "total_steps": 11784, "loss": 0.1356, "lr": 1.7757925675551672e-06, "epoch": 0.2957399864222675, "percentage": 29.57, "elapsed_time": "0:08:10", "remaining_time": "0:19:28", "throughput": 6811.86, "total_tokens": 3343104} +{"current_steps": 3490, "total_steps": 11784, "loss": 0.0701, "lr": 1.7748571071028898e-06, "epoch": 0.2961642905634759, "percentage": 29.62, "elapsed_time": "0:08:11", "remaining_time": "0:19:27", "throughput": 6816.58, "total_tokens": 3347712} +{"current_steps": 3495, "total_steps": 11784, "loss": 0.0489, "lr": 1.7739199466857301e-06, "epoch": 0.2965885947046843, "percentage": 29.66, "elapsed_time": "0:08:11", "remaining_time": "0:19:25", "throughput": 6820.75, "total_tokens": 3352000} +{"current_steps": 3500, "total_steps": 11784, "loss": 0.0291, "lr": 1.772981088359732e-06, "epoch": 0.2970128988458927, "percentage": 29.7, "elapsed_time": "0:08:11", "remaining_time": "0:19:23", "throughput": 6825.25, "total_tokens": 3356480} +{"current_steps": 3505, "total_steps": 11784, "loss": 0.0997, "lr": 1.7720405341846636e-06, "epoch": 0.29743720298710113, "percentage": 29.74, "elapsed_time": "0:08:12", "remaining_time": "0:19:22", "throughput": 6830.75, "total_tokens": 3361536} +{"current_steps": 3510, "total_steps": 11784, "loss": 0.0796, "lr": 1.771098286224014e-06, "epoch": 0.29786150712830956, "percentage": 29.79, "elapsed_time": "0:08:12", "remaining_time": "0:19:20", "throughput": 6837.4, "total_tokens": 3367296} +{"current_steps": 3515, "total_steps": 11784, "loss": 0.0678, "lr": 1.7701543465449884e-06, "epoch": 0.298285811269518, "percentage": 29.83, "elapsed_time": "0:08:12", "remaining_time": "0:19:19", "throughput": 6842.43, "total_tokens": 3372096} +{"current_steps": 3520, "total_steps": 11784, "loss": 0.0632, "lr": 1.7692087172185026e-06, "epoch": 0.2987101154107264, "percentage": 29.87, "elapsed_time": "0:08:13", "remaining_time": "0:19:17", "throughput": 6846.53, "total_tokens": 3376384} +{"current_steps": 3525, "total_steps": 11784, "loss": 0.0409, "lr": 1.7682614003191805e-06, "epoch": 0.2991344195519348, "percentage": 29.91, "elapsed_time": "0:08:13", "remaining_time": "0:19:16", "throughput": 6851.99, "total_tokens": 3381504} +{"current_steps": 3530, "total_steps": 11784, "loss": 0.0469, "lr": 1.7673123979253475e-06, "epoch": 0.29955872369314324, "percentage": 29.96, "elapsed_time": "0:08:13", "remaining_time": "0:19:14", "throughput": 6856.62, "total_tokens": 3386112} +{"current_steps": 3535, "total_steps": 11784, "loss": 0.05, "lr": 1.7663617121190271e-06, "epoch": 0.29998302783435166, "percentage": 30.0, "elapsed_time": "0:08:14", "remaining_time": "0:19:13", "throughput": 6868.48, "total_tokens": 3395072} +{"current_steps": 3540, "total_steps": 11784, "loss": 0.0141, "lr": 1.7654093449859367e-06, "epoch": 0.3004073319755601, "percentage": 30.04, "elapsed_time": "0:08:14", "remaining_time": "0:19:11", "throughput": 6873.36, "total_tokens": 3399808} +{"current_steps": 3540, "total_steps": 11784, "eval_loss": 0.08620841801166534, "epoch": 0.3004073319755601, "percentage": 30.04, "elapsed_time": "0:08:30", "remaining_time": "0:19:49", "throughput": 6658.39, "total_tokens": 3399808} +{"current_steps": 3545, "total_steps": 11784, "loss": 0.0416, "lr": 1.764455298615481e-06, "epoch": 0.3008316361167685, "percentage": 30.08, "elapsed_time": "0:09:11", "remaining_time": "0:21:20", "throughput": 6177.14, "total_tokens": 3404544} +{"current_steps": 3550, "total_steps": 11784, "loss": 0.1003, "lr": 1.7634995751007499e-06, "epoch": 0.3012559402579769, "percentage": 30.13, "elapsed_time": "0:09:11", "remaining_time": "0:21:19", "throughput": 6181.39, "total_tokens": 3408896} +{"current_steps": 3555, "total_steps": 11784, "loss": 0.0709, "lr": 1.7625421765385124e-06, "epoch": 0.30168024439918534, "percentage": 30.17, "elapsed_time": "0:09:11", "remaining_time": "0:21:17", "throughput": 6186.54, "total_tokens": 3413824} +{"current_steps": 3560, "total_steps": 11784, "loss": 0.097, "lr": 1.7615831050292127e-06, "epoch": 0.30210454854039376, "percentage": 30.21, "elapsed_time": "0:09:12", "remaining_time": "0:21:15", "throughput": 6190.87, "total_tokens": 3418240} +{"current_steps": 3565, "total_steps": 11784, "loss": 0.1312, "lr": 1.760622362676965e-06, "epoch": 0.3025288526816022, "percentage": 30.25, "elapsed_time": "0:09:12", "remaining_time": "0:21:13", "throughput": 6196.04, "total_tokens": 3423168} +{"current_steps": 3570, "total_steps": 11784, "loss": 0.0642, "lr": 1.7596599515895486e-06, "epoch": 0.3029531568228106, "percentage": 30.3, "elapsed_time": "0:09:12", "remaining_time": "0:21:11", "throughput": 6201.38, "total_tokens": 3428224} +{"current_steps": 3575, "total_steps": 11784, "loss": 0.0485, "lr": 1.7586958738784055e-06, "epoch": 0.303377460964019, "percentage": 30.34, "elapsed_time": "0:09:13", "remaining_time": "0:21:10", "throughput": 6206.11, "total_tokens": 3432896} +{"current_steps": 3580, "total_steps": 11784, "loss": 0.1002, "lr": 1.7577301316586323e-06, "epoch": 0.30380176510522744, "percentage": 30.38, "elapsed_time": "0:09:13", "remaining_time": "0:21:08", "throughput": 6210.93, "total_tokens": 3437632} +{"current_steps": 3585, "total_steps": 11784, "loss": 0.061, "lr": 1.7567627270489787e-06, "epoch": 0.30422606924643586, "percentage": 30.42, "elapsed_time": "0:09:13", "remaining_time": "0:21:06", "throughput": 6215.34, "total_tokens": 3442112} +{"current_steps": 3590, "total_steps": 11784, "loss": 0.0435, "lr": 1.7557936621718406e-06, "epoch": 0.3046503733876443, "percentage": 30.47, "elapsed_time": "0:09:14", "remaining_time": "0:21:04", "throughput": 6222.05, "total_tokens": 3448064} +{"current_steps": 3595, "total_steps": 11784, "loss": 0.1354, "lr": 1.754822939153257e-06, "epoch": 0.3050746775288527, "percentage": 30.51, "elapsed_time": "0:09:14", "remaining_time": "0:21:03", "throughput": 6226.87, "total_tokens": 3452800} +{"current_steps": 3600, "total_steps": 11784, "loss": 0.1403, "lr": 1.7538505601229043e-06, "epoch": 0.3054989816700611, "percentage": 30.55, "elapsed_time": "0:09:14", "remaining_time": "0:21:01", "throughput": 6232.17, "total_tokens": 3457856} +{"current_steps": 3605, "total_steps": 11784, "loss": 0.036, "lr": 1.7528765272140927e-06, "epoch": 0.30592328581126954, "percentage": 30.59, "elapsed_time": "0:09:15", "remaining_time": "0:20:59", "throughput": 6237.16, "total_tokens": 3462720} +{"current_steps": 3610, "total_steps": 11784, "loss": 0.1049, "lr": 1.7519008425637597e-06, "epoch": 0.30634758995247796, "percentage": 30.63, "elapsed_time": "0:09:15", "remaining_time": "0:20:57", "throughput": 6241.64, "total_tokens": 3467264} +{"current_steps": 3615, "total_steps": 11784, "loss": 0.0713, "lr": 1.7509235083124679e-06, "epoch": 0.3067718940936864, "percentage": 30.68, "elapsed_time": "0:09:15", "remaining_time": "0:20:56", "throughput": 6247.72, "total_tokens": 3472832} +{"current_steps": 3620, "total_steps": 11784, "loss": 0.061, "lr": 1.749944526604398e-06, "epoch": 0.30719619823489475, "percentage": 30.72, "elapsed_time": "0:09:16", "remaining_time": "0:20:54", "throughput": 6253.23, "total_tokens": 3478016} +{"current_steps": 3625, "total_steps": 11784, "loss": 0.0438, "lr": 1.7489638995873453e-06, "epoch": 0.30762050237610317, "percentage": 30.76, "elapsed_time": "0:09:16", "remaining_time": "0:20:52", "throughput": 6257.92, "total_tokens": 3482688} +{"current_steps": 3630, "total_steps": 11784, "loss": 0.0193, "lr": 1.7479816294127149e-06, "epoch": 0.3080448065173116, "percentage": 30.8, "elapsed_time": "0:09:16", "remaining_time": "0:20:50", "throughput": 6262.48, "total_tokens": 3487296} +{"current_steps": 3635, "total_steps": 11784, "loss": 0.0555, "lr": 1.746997718235517e-06, "epoch": 0.30846911065852, "percentage": 30.85, "elapsed_time": "0:09:17", "remaining_time": "0:20:49", "throughput": 6266.76, "total_tokens": 3491712} +{"current_steps": 3640, "total_steps": 11784, "loss": 0.0876, "lr": 1.7460121682143616e-06, "epoch": 0.3088934147997284, "percentage": 30.89, "elapsed_time": "0:09:17", "remaining_time": "0:20:47", "throughput": 6271.21, "total_tokens": 3496256} +{"current_steps": 3645, "total_steps": 11784, "loss": 0.1017, "lr": 1.7450249815114545e-06, "epoch": 0.30931771894093685, "percentage": 30.93, "elapsed_time": "0:09:17", "remaining_time": "0:20:45", "throughput": 6275.49, "total_tokens": 3500672} +{"current_steps": 3650, "total_steps": 11784, "loss": 0.0117, "lr": 1.744036160292592e-06, "epoch": 0.30974202308214527, "percentage": 30.97, "elapsed_time": "0:09:18", "remaining_time": "0:20:43", "throughput": 6280.49, "total_tokens": 3505536} +{"current_steps": 3655, "total_steps": 11784, "loss": 0.0618, "lr": 1.7430457067271563e-06, "epoch": 0.3101663272233537, "percentage": 31.02, "elapsed_time": "0:09:18", "remaining_time": "0:20:42", "throughput": 6285.4, "total_tokens": 3510400} +{"current_steps": 3660, "total_steps": 11784, "loss": 0.03, "lr": 1.742053622988111e-06, "epoch": 0.3105906313645621, "percentage": 31.06, "elapsed_time": "0:09:18", "remaining_time": "0:20:40", "throughput": 6289.76, "total_tokens": 3514880} +{"current_steps": 3665, "total_steps": 11784, "loss": 0.1128, "lr": 1.7410599112519969e-06, "epoch": 0.31101493550577053, "percentage": 31.1, "elapsed_time": "0:09:19", "remaining_time": "0:20:38", "throughput": 6295.2, "total_tokens": 3520192} +{"current_steps": 3670, "total_steps": 11784, "loss": 0.0743, "lr": 1.7400645736989246e-06, "epoch": 0.31143923964697895, "percentage": 31.14, "elapsed_time": "0:09:19", "remaining_time": "0:20:37", "throughput": 6299.32, "total_tokens": 3524544} +{"current_steps": 3675, "total_steps": 11784, "loss": 0.0105, "lr": 1.7390676125125733e-06, "epoch": 0.31186354378818737, "percentage": 31.19, "elapsed_time": "0:09:19", "remaining_time": "0:20:35", "throughput": 6303.45, "total_tokens": 3528896} +{"current_steps": 3680, "total_steps": 11784, "loss": 0.0761, "lr": 1.7380690298801836e-06, "epoch": 0.3122878479293958, "percentage": 31.23, "elapsed_time": "0:09:20", "remaining_time": "0:20:33", "throughput": 6308.07, "total_tokens": 3533568} +{"current_steps": 3685, "total_steps": 11784, "loss": 0.0437, "lr": 1.7370688279925538e-06, "epoch": 0.3127121520706042, "percentage": 31.27, "elapsed_time": "0:09:20", "remaining_time": "0:20:31", "throughput": 6313.88, "total_tokens": 3539008} +{"current_steps": 3690, "total_steps": 11784, "loss": 0.0234, "lr": 1.736067009044034e-06, "epoch": 0.31313645621181263, "percentage": 31.31, "elapsed_time": "0:09:20", "remaining_time": "0:20:30", "throughput": 6319.69, "total_tokens": 3544448} +{"current_steps": 3695, "total_steps": 11784, "loss": 0.1307, "lr": 1.7350635752325222e-06, "epoch": 0.31356076035302105, "percentage": 31.36, "elapsed_time": "0:09:21", "remaining_time": "0:20:28", "throughput": 6324.42, "total_tokens": 3549184} +{"current_steps": 3700, "total_steps": 11784, "loss": 0.0346, "lr": 1.7340585287594603e-06, "epoch": 0.31398506449422947, "percentage": 31.4, "elapsed_time": "0:09:21", "remaining_time": "0:20:26", "throughput": 6329.51, "total_tokens": 3554176} +{"current_steps": 3705, "total_steps": 11784, "loss": 0.0626, "lr": 1.733051871829826e-06, "epoch": 0.3144093686354379, "percentage": 31.44, "elapsed_time": "0:09:21", "remaining_time": "0:20:25", "throughput": 6333.92, "total_tokens": 3558720} +{"current_steps": 3710, "total_steps": 11784, "loss": 0.0735, "lr": 1.7320436066521333e-06, "epoch": 0.3148336727766463, "percentage": 31.48, "elapsed_time": "0:09:22", "remaining_time": "0:20:23", "throughput": 6338.92, "total_tokens": 3563648} +{"current_steps": 3715, "total_steps": 11784, "loss": 0.0464, "lr": 1.7310337354384214e-06, "epoch": 0.31525797691785473, "percentage": 31.53, "elapsed_time": "0:09:22", "remaining_time": "0:20:21", "throughput": 6344.05, "total_tokens": 3568704} +{"current_steps": 3720, "total_steps": 11784, "loss": 0.0382, "lr": 1.7300222604042552e-06, "epoch": 0.31568228105906315, "percentage": 31.57, "elapsed_time": "0:09:22", "remaining_time": "0:20:20", "throughput": 6348.34, "total_tokens": 3573184} +{"current_steps": 3725, "total_steps": 11784, "loss": 0.0764, "lr": 1.7290091837687172e-06, "epoch": 0.3161065852002716, "percentage": 31.61, "elapsed_time": "0:09:23", "remaining_time": "0:20:18", "throughput": 6353.77, "total_tokens": 3578432} +{"current_steps": 3730, "total_steps": 11784, "loss": 0.0116, "lr": 1.7279945077544036e-06, "epoch": 0.31653088934148, "percentage": 31.65, "elapsed_time": "0:09:23", "remaining_time": "0:20:16", "throughput": 6357.97, "total_tokens": 3582848} +{"current_steps": 3735, "total_steps": 11784, "loss": 0.0319, "lr": 1.7269782345874203e-06, "epoch": 0.3169551934826884, "percentage": 31.7, "elapsed_time": "0:09:23", "remaining_time": "0:20:15", "throughput": 6363.0, "total_tokens": 3587840} +{"current_steps": 3740, "total_steps": 11784, "loss": 0.1165, "lr": 1.7259603664973766e-06, "epoch": 0.31737949762389683, "percentage": 31.74, "elapsed_time": "0:09:24", "remaining_time": "0:20:13", "throughput": 6367.58, "total_tokens": 3592576} +{"current_steps": 3745, "total_steps": 11784, "loss": 0.1014, "lr": 1.7249409057173806e-06, "epoch": 0.31780380176510525, "percentage": 31.78, "elapsed_time": "0:09:24", "remaining_time": "0:20:11", "throughput": 6372.3, "total_tokens": 3597376} +{"current_steps": 3750, "total_steps": 11784, "loss": 0.0935, "lr": 1.7239198544840354e-06, "epoch": 0.3182281059063136, "percentage": 31.82, "elapsed_time": "0:09:24", "remaining_time": "0:20:10", "throughput": 6376.34, "total_tokens": 3601728} +{"current_steps": 3755, "total_steps": 11784, "loss": 0.1359, "lr": 1.7228972150374332e-06, "epoch": 0.31865241004752204, "percentage": 31.87, "elapsed_time": "0:09:25", "remaining_time": "0:20:08", "throughput": 6381.13, "total_tokens": 3606592} +{"current_steps": 3760, "total_steps": 11784, "loss": 0.0588, "lr": 1.7218729896211504e-06, "epoch": 0.31907671418873046, "percentage": 31.91, "elapsed_time": "0:09:25", "remaining_time": "0:20:06", "throughput": 6385.75, "total_tokens": 3611328} +{"current_steps": 3765, "total_steps": 11784, "loss": 0.0536, "lr": 1.7208471804822425e-06, "epoch": 0.3195010183299389, "percentage": 31.95, "elapsed_time": "0:09:25", "remaining_time": "0:20:05", "throughput": 6390.3, "total_tokens": 3616000} +{"current_steps": 3770, "total_steps": 11784, "loss": 0.0302, "lr": 1.71981978987124e-06, "epoch": 0.3199253224711473, "percentage": 31.99, "elapsed_time": "0:09:26", "remaining_time": "0:20:03", "throughput": 6394.22, "total_tokens": 3620288} +{"current_steps": 3775, "total_steps": 11784, "loss": 0.0396, "lr": 1.7187908200421432e-06, "epoch": 0.3203496266123557, "percentage": 32.03, "elapsed_time": "0:09:26", "remaining_time": "0:20:01", "throughput": 6398.32, "total_tokens": 3624704} +{"current_steps": 3780, "total_steps": 11784, "loss": 0.0919, "lr": 1.717760273252417e-06, "epoch": 0.32077393075356414, "percentage": 32.08, "elapsed_time": "0:09:26", "remaining_time": "0:20:00", "throughput": 6401.84, "total_tokens": 3628736} +{"current_steps": 3785, "total_steps": 11784, "loss": 0.0607, "lr": 1.7167281517629854e-06, "epoch": 0.32119823489477256, "percentage": 32.12, "elapsed_time": "0:09:27", "remaining_time": "0:19:58", "throughput": 6406.71, "total_tokens": 3633664} +{"current_steps": 3790, "total_steps": 11784, "loss": 0.0758, "lr": 1.7156944578382277e-06, "epoch": 0.321622539035981, "percentage": 32.16, "elapsed_time": "0:09:27", "remaining_time": "0:19:56", "throughput": 6411.28, "total_tokens": 3638400} +{"current_steps": 3795, "total_steps": 11784, "loss": 0.0976, "lr": 1.7146591937459732e-06, "epoch": 0.3220468431771894, "percentage": 32.2, "elapsed_time": "0:09:27", "remaining_time": "0:19:55", "throughput": 6415.99, "total_tokens": 3643200} +{"current_steps": 3800, "total_steps": 11784, "loss": 0.1564, "lr": 1.713622361757495e-06, "epoch": 0.3224711473183978, "percentage": 32.25, "elapsed_time": "0:09:28", "remaining_time": "0:19:53", "throughput": 6420.2, "total_tokens": 3647680} +{"current_steps": 3805, "total_steps": 11784, "loss": 0.0291, "lr": 1.712583964147507e-06, "epoch": 0.32289545145960624, "percentage": 32.29, "elapsed_time": "0:09:28", "remaining_time": "0:19:52", "throughput": 6425.79, "total_tokens": 3653120} +{"current_steps": 3810, "total_steps": 11784, "loss": 0.1444, "lr": 1.7115440031941572e-06, "epoch": 0.32331975560081466, "percentage": 32.33, "elapsed_time": "0:09:28", "remaining_time": "0:19:50", "throughput": 6430.29, "total_tokens": 3657856} +{"current_steps": 3815, "total_steps": 11784, "loss": 0.0922, "lr": 1.7105024811790248e-06, "epoch": 0.3237440597420231, "percentage": 32.37, "elapsed_time": "0:09:29", "remaining_time": "0:19:48", "throughput": 6434.99, "total_tokens": 3662656} +{"current_steps": 3820, "total_steps": 11784, "loss": 0.0862, "lr": 1.7094594003871116e-06, "epoch": 0.3241683638832315, "percentage": 32.42, "elapsed_time": "0:09:29", "remaining_time": "0:19:47", "throughput": 6440.6, "total_tokens": 3668096} +{"current_steps": 3825, "total_steps": 11784, "loss": 0.0375, "lr": 1.7084147631068415e-06, "epoch": 0.3245926680244399, "percentage": 32.46, "elapsed_time": "0:09:29", "remaining_time": "0:19:45", "throughput": 6445.78, "total_tokens": 3673280} +{"current_steps": 3830, "total_steps": 11784, "loss": 0.084, "lr": 1.7073685716300517e-06, "epoch": 0.32501697216564834, "percentage": 32.5, "elapsed_time": "0:09:30", "remaining_time": "0:19:44", "throughput": 6449.99, "total_tokens": 3677824} +{"current_steps": 3835, "total_steps": 11784, "loss": 0.0866, "lr": 1.7063208282519894e-06, "epoch": 0.32544127630685676, "percentage": 32.54, "elapsed_time": "0:09:30", "remaining_time": "0:19:42", "throughput": 6454.64, "total_tokens": 3682624} +{"current_steps": 3840, "total_steps": 11784, "loss": 0.0973, "lr": 1.7052715352713074e-06, "epoch": 0.3258655804480652, "percentage": 32.59, "elapsed_time": "0:09:30", "remaining_time": "0:19:40", "throughput": 6459.09, "total_tokens": 3687296} +{"current_steps": 3845, "total_steps": 11784, "loss": 0.0854, "lr": 1.7042206949900568e-06, "epoch": 0.3262898845892736, "percentage": 32.63, "elapsed_time": "0:09:31", "remaining_time": "0:19:39", "throughput": 6464.14, "total_tokens": 3692352} +{"current_steps": 3850, "total_steps": 11784, "loss": 0.0305, "lr": 1.703168309713684e-06, "epoch": 0.326714188730482, "percentage": 32.67, "elapsed_time": "0:09:31", "remaining_time": "0:19:37", "throughput": 6469.24, "total_tokens": 3697472} +{"current_steps": 3855, "total_steps": 11784, "loss": 0.0659, "lr": 1.7021143817510262e-06, "epoch": 0.32713849287169044, "percentage": 32.71, "elapsed_time": "0:09:31", "remaining_time": "0:19:36", "throughput": 6474.23, "total_tokens": 3702528} +{"current_steps": 3860, "total_steps": 11784, "loss": 0.1094, "lr": 1.7010589134143025e-06, "epoch": 0.32756279701289887, "percentage": 32.76, "elapsed_time": "0:09:32", "remaining_time": "0:19:34", "throughput": 6477.65, "total_tokens": 3706560} +{"current_steps": 3865, "total_steps": 11784, "loss": 0.067, "lr": 1.7000019070191138e-06, "epoch": 0.3279871011541073, "percentage": 32.8, "elapsed_time": "0:09:32", "remaining_time": "0:19:33", "throughput": 6481.9, "total_tokens": 3711104} +{"current_steps": 3870, "total_steps": 11784, "loss": 0.0876, "lr": 1.698943364884434e-06, "epoch": 0.3284114052953157, "percentage": 32.84, "elapsed_time": "0:09:32", "remaining_time": "0:19:31", "throughput": 6486.2, "total_tokens": 3715712} +{"current_steps": 3875, "total_steps": 11784, "loss": 0.0276, "lr": 1.697883289332607e-06, "epoch": 0.3288357094365241, "percentage": 32.88, "elapsed_time": "0:09:33", "remaining_time": "0:19:29", "throughput": 6491.43, "total_tokens": 3720960} +{"current_steps": 3880, "total_steps": 11784, "loss": 0.0555, "lr": 1.6968216826893405e-06, "epoch": 0.3292600135777325, "percentage": 32.93, "elapsed_time": "0:09:33", "remaining_time": "0:19:28", "throughput": 6495.63, "total_tokens": 3725504} +{"current_steps": 3885, "total_steps": 11784, "loss": 0.0693, "lr": 1.6957585472837014e-06, "epoch": 0.3296843177189409, "percentage": 32.97, "elapsed_time": "0:09:33", "remaining_time": "0:19:26", "throughput": 6499.53, "total_tokens": 3729856} +{"current_steps": 3890, "total_steps": 11784, "loss": 0.1188, "lr": 1.6946938854481103e-06, "epoch": 0.33010862186014933, "percentage": 33.01, "elapsed_time": "0:09:34", "remaining_time": "0:19:25", "throughput": 6504.63, "total_tokens": 3735040} +{"current_steps": 3895, "total_steps": 11784, "loss": 0.0129, "lr": 1.6936276995183371e-06, "epoch": 0.33053292600135775, "percentage": 33.05, "elapsed_time": "0:09:34", "remaining_time": "0:19:23", "throughput": 6509.86, "total_tokens": 3740288} +{"current_steps": 3900, "total_steps": 11784, "loss": 0.0761, "lr": 1.6925599918334954e-06, "epoch": 0.33095723014256617, "percentage": 33.1, "elapsed_time": "0:09:34", "remaining_time": "0:19:22", "throughput": 6514.18, "total_tokens": 3744960} +{"current_steps": 3905, "total_steps": 11784, "loss": 0.0783, "lr": 1.6914907647360367e-06, "epoch": 0.3313815342837746, "percentage": 33.14, "elapsed_time": "0:09:35", "remaining_time": "0:19:20", "throughput": 6517.71, "total_tokens": 3749120} +{"current_steps": 3910, "total_steps": 11784, "loss": 0.0857, "lr": 1.6904200205717467e-06, "epoch": 0.331805838424983, "percentage": 33.18, "elapsed_time": "0:09:35", "remaining_time": "0:19:19", "throughput": 6522.31, "total_tokens": 3753984} +{"current_steps": 3915, "total_steps": 11784, "loss": 0.0364, "lr": 1.689347761689739e-06, "epoch": 0.33223014256619143, "percentage": 33.22, "elapsed_time": "0:09:35", "remaining_time": "0:19:17", "throughput": 6526.27, "total_tokens": 3758464} +{"current_steps": 3920, "total_steps": 11784, "loss": 0.0956, "lr": 1.6882739904424507e-06, "epoch": 0.33265444670739985, "percentage": 33.27, "elapsed_time": "0:09:36", "remaining_time": "0:19:15", "throughput": 6529.81, "total_tokens": 3762624} +{"current_steps": 3925, "total_steps": 11784, "loss": 0.1428, "lr": 1.6871987091856366e-06, "epoch": 0.3330787508486083, "percentage": 33.31, "elapsed_time": "0:09:36", "remaining_time": "0:19:14", "throughput": 6534.62, "total_tokens": 3767616} +{"current_steps": 3930, "total_steps": 11784, "loss": 0.0047, "lr": 1.6861219202783644e-06, "epoch": 0.3335030549898167, "percentage": 33.35, "elapsed_time": "0:09:36", "remaining_time": "0:19:12", "throughput": 6539.83, "total_tokens": 3772864} +{"current_steps": 3935, "total_steps": 11784, "loss": 0.0992, "lr": 1.6850436260830093e-06, "epoch": 0.3339273591310251, "percentage": 33.39, "elapsed_time": "0:09:37", "remaining_time": "0:19:11", "throughput": 6544.43, "total_tokens": 3777728} +{"current_steps": 3940, "total_steps": 11784, "loss": 0.0653, "lr": 1.683963828965249e-06, "epoch": 0.33435166327223353, "percentage": 33.44, "elapsed_time": "0:09:37", "remaining_time": "0:19:09", "throughput": 6549.45, "total_tokens": 3782912} +{"current_steps": 3945, "total_steps": 11784, "loss": 0.0656, "lr": 1.6828825312940592e-06, "epoch": 0.33477596741344195, "percentage": 33.48, "elapsed_time": "0:09:37", "remaining_time": "0:19:08", "throughput": 6554.55, "total_tokens": 3788160} +{"current_steps": 3950, "total_steps": 11784, "loss": 0.0815, "lr": 1.6817997354417066e-06, "epoch": 0.3352002715546504, "percentage": 33.52, "elapsed_time": "0:09:38", "remaining_time": "0:19:06", "throughput": 6558.28, "total_tokens": 3792448} +{"current_steps": 3955, "total_steps": 11784, "loss": 0.0654, "lr": 1.6807154437837453e-06, "epoch": 0.3356245756958588, "percentage": 33.56, "elapsed_time": "0:09:38", "remaining_time": "0:19:05", "throughput": 6562.92, "total_tokens": 3797376} +{"current_steps": 3960, "total_steps": 11784, "loss": 0.0956, "lr": 1.6796296586990108e-06, "epoch": 0.3360488798370672, "percentage": 33.6, "elapsed_time": "0:09:38", "remaining_time": "0:19:03", "throughput": 6567.89, "total_tokens": 3802496} +{"current_steps": 3965, "total_steps": 11784, "loss": 0.0604, "lr": 1.6785423825696156e-06, "epoch": 0.33647318397827564, "percentage": 33.65, "elapsed_time": "0:09:39", "remaining_time": "0:19:02", "throughput": 6571.77, "total_tokens": 3806912} +{"current_steps": 3970, "total_steps": 11784, "loss": 0.042, "lr": 1.6774536177809426e-06, "epoch": 0.33689748811948406, "percentage": 33.69, "elapsed_time": "0:09:39", "remaining_time": "0:19:00", "throughput": 6576.14, "total_tokens": 3811648} +{"current_steps": 3975, "total_steps": 11784, "loss": 0.0873, "lr": 1.6763633667216416e-06, "epoch": 0.3373217922606925, "percentage": 33.73, "elapsed_time": "0:09:39", "remaining_time": "0:18:59", "throughput": 6581.42, "total_tokens": 3817024} +{"current_steps": 3980, "total_steps": 11784, "loss": 0.0224, "lr": 1.6752716317836226e-06, "epoch": 0.3377460964019009, "percentage": 33.77, "elapsed_time": "0:09:40", "remaining_time": "0:18:57", "throughput": 6585.28, "total_tokens": 3821440} +{"current_steps": 3985, "total_steps": 11784, "loss": 0.0261, "lr": 1.6741784153620508e-06, "epoch": 0.3381704005431093, "percentage": 33.82, "elapsed_time": "0:09:40", "remaining_time": "0:18:56", "throughput": 6589.37, "total_tokens": 3825984} +{"current_steps": 3990, "total_steps": 11784, "loss": 0.057, "lr": 1.6730837198553422e-06, "epoch": 0.33859470468431774, "percentage": 33.86, "elapsed_time": "0:09:40", "remaining_time": "0:18:54", "throughput": 6594.28, "total_tokens": 3831104} +{"current_steps": 3995, "total_steps": 11784, "loss": 0.083, "lr": 1.6719875476651577e-06, "epoch": 0.33901900882552616, "percentage": 33.9, "elapsed_time": "0:09:41", "remaining_time": "0:18:53", "throughput": 6599.05, "total_tokens": 3836160} +{"current_steps": 4000, "total_steps": 11784, "loss": 0.099, "lr": 1.6708899011963978e-06, "epoch": 0.3394433129667346, "percentage": 33.94, "elapsed_time": "0:09:41", "remaining_time": "0:18:51", "throughput": 6602.97, "total_tokens": 3840640} +{"current_steps": 4005, "total_steps": 11784, "loss": 0.0456, "lr": 1.6697907828571966e-06, "epoch": 0.339867617107943, "percentage": 33.99, "elapsed_time": "0:09:41", "remaining_time": "0:18:50", "throughput": 6607.37, "total_tokens": 3845440} +{"current_steps": 4010, "total_steps": 11784, "loss": 0.1105, "lr": 1.6686901950589193e-06, "epoch": 0.3402919212491514, "percentage": 34.03, "elapsed_time": "0:09:42", "remaining_time": "0:18:48", "throughput": 6611.94, "total_tokens": 3850368} +{"current_steps": 4015, "total_steps": 11784, "loss": 0.0449, "lr": 1.6675881402161536e-06, "epoch": 0.3407162253903598, "percentage": 34.07, "elapsed_time": "0:09:42", "remaining_time": "0:18:47", "throughput": 6616.55, "total_tokens": 3855296} +{"current_steps": 4020, "total_steps": 11784, "loss": 0.073, "lr": 1.6664846207467054e-06, "epoch": 0.3411405295315682, "percentage": 34.11, "elapsed_time": "0:09:43", "remaining_time": "0:18:45", "throughput": 6620.28, "total_tokens": 3859648} +{"current_steps": 4025, "total_steps": 11784, "loss": 0.0364, "lr": 1.665379639071595e-06, "epoch": 0.3415648336727766, "percentage": 34.16, "elapsed_time": "0:09:43", "remaining_time": "0:18:44", "throughput": 6624.79, "total_tokens": 3864512} +{"current_steps": 4030, "total_steps": 11784, "loss": 0.0528, "lr": 1.6642731976150492e-06, "epoch": 0.34198913781398504, "percentage": 34.2, "elapsed_time": "0:09:43", "remaining_time": "0:18:43", "throughput": 6628.38, "total_tokens": 3868800} +{"current_steps": 4035, "total_steps": 11784, "loss": 0.059, "lr": 1.6631652988044995e-06, "epoch": 0.34241344195519346, "percentage": 34.24, "elapsed_time": "0:09:44", "remaining_time": "0:18:41", "throughput": 6632.83, "total_tokens": 3873664} +{"current_steps": 4040, "total_steps": 11784, "loss": 0.0991, "lr": 1.6620559450705728e-06, "epoch": 0.3428377460964019, "percentage": 34.28, "elapsed_time": "0:09:44", "remaining_time": "0:18:40", "throughput": 6637.25, "total_tokens": 3878528} +{"current_steps": 4045, "total_steps": 11784, "loss": 0.0806, "lr": 1.6609451388470885e-06, "epoch": 0.3432620502376103, "percentage": 34.33, "elapsed_time": "0:09:44", "remaining_time": "0:18:38", "throughput": 6641.32, "total_tokens": 3883136} +{"current_steps": 4050, "total_steps": 11784, "loss": 0.095, "lr": 1.6598328825710533e-06, "epoch": 0.3436863543788187, "percentage": 34.37, "elapsed_time": "0:09:45", "remaining_time": "0:18:37", "throughput": 6646.32, "total_tokens": 3888384} +{"current_steps": 4055, "total_steps": 11784, "loss": 0.0117, "lr": 1.6587191786826543e-06, "epoch": 0.34411065852002715, "percentage": 34.41, "elapsed_time": "0:09:45", "remaining_time": "0:18:35", "throughput": 6650.49, "total_tokens": 3893056} +{"current_steps": 4060, "total_steps": 11784, "loss": 0.0724, "lr": 1.6576040296252553e-06, "epoch": 0.34453496266123557, "percentage": 34.45, "elapsed_time": "0:09:45", "remaining_time": "0:18:34", "throughput": 6654.49, "total_tokens": 3897600} +{"current_steps": 4065, "total_steps": 11784, "loss": 0.1141, "lr": 1.65648743784539e-06, "epoch": 0.344959266802444, "percentage": 34.5, "elapsed_time": "0:09:46", "remaining_time": "0:18:32", "throughput": 6658.39, "total_tokens": 3902080} +{"current_steps": 4070, "total_steps": 11784, "loss": 0.061, "lr": 1.6553694057927573e-06, "epoch": 0.3453835709436524, "percentage": 34.54, "elapsed_time": "0:09:46", "remaining_time": "0:18:31", "throughput": 6662.75, "total_tokens": 3906880} +{"current_steps": 4075, "total_steps": 11784, "loss": 0.1306, "lr": 1.654249935920217e-06, "epoch": 0.3458078750848608, "percentage": 34.58, "elapsed_time": "0:09:46", "remaining_time": "0:18:29", "throughput": 6666.14, "total_tokens": 3911040} +{"current_steps": 4080, "total_steps": 11784, "loss": 0.0689, "lr": 1.6531290306837817e-06, "epoch": 0.34623217922606925, "percentage": 34.62, "elapsed_time": "0:09:47", "remaining_time": "0:18:28", "throughput": 6670.33, "total_tokens": 3915712} +{"current_steps": 4085, "total_steps": 11784, "loss": 0.0916, "lr": 1.6520066925426143e-06, "epoch": 0.34665648336727767, "percentage": 34.67, "elapsed_time": "0:09:47", "remaining_time": "0:18:27", "throughput": 6673.8, "total_tokens": 3919936} +{"current_steps": 4090, "total_steps": 11784, "loss": 0.1051, "lr": 1.650882923959021e-06, "epoch": 0.3470807875084861, "percentage": 34.71, "elapsed_time": "0:09:47", "remaining_time": "0:18:25", "throughput": 6677.81, "total_tokens": 3924480} +{"current_steps": 4095, "total_steps": 11784, "loss": 0.1395, "lr": 1.649757727398446e-06, "epoch": 0.3475050916496945, "percentage": 34.75, "elapsed_time": "0:09:48", "remaining_time": "0:18:24", "throughput": 6682.83, "total_tokens": 3929728} +{"current_steps": 4100, "total_steps": 11784, "loss": 0.0495, "lr": 1.6486311053294669e-06, "epoch": 0.34792939579090293, "percentage": 34.79, "elapsed_time": "0:09:48", "remaining_time": "0:18:22", "throughput": 6686.49, "total_tokens": 3934080} +{"current_steps": 4105, "total_steps": 11784, "loss": 0.0759, "lr": 1.6475030602237876e-06, "epoch": 0.34835369993211135, "percentage": 34.84, "elapsed_time": "0:09:48", "remaining_time": "0:18:21", "throughput": 6690.46, "total_tokens": 3938624} +{"current_steps": 4110, "total_steps": 11784, "loss": 0.0924, "lr": 1.646373594556236e-06, "epoch": 0.34877800407331977, "percentage": 34.88, "elapsed_time": "0:09:49", "remaining_time": "0:18:19", "throughput": 6696.16, "total_tokens": 3944448} +{"current_steps": 4115, "total_steps": 11784, "loss": 0.0257, "lr": 1.6452427108047542e-06, "epoch": 0.3492023082145282, "percentage": 34.92, "elapsed_time": "0:09:49", "remaining_time": "0:18:18", "throughput": 6700.36, "total_tokens": 3949184} +{"current_steps": 4120, "total_steps": 11784, "loss": 0.0649, "lr": 1.6441104114503977e-06, "epoch": 0.3496266123557366, "percentage": 34.96, "elapsed_time": "0:09:49", "remaining_time": "0:18:17", "throughput": 6704.21, "total_tokens": 3953664} +{"current_steps": 4125, "total_steps": 11784, "loss": 0.0585, "lr": 1.642976698977326e-06, "epoch": 0.35005091649694503, "percentage": 35.01, "elapsed_time": "0:09:50", "remaining_time": "0:18:15", "throughput": 6708.35, "total_tokens": 3958336} +{"current_steps": 4130, "total_steps": 11784, "loss": 0.0051, "lr": 1.6418415758727995e-06, "epoch": 0.35047522063815345, "percentage": 35.05, "elapsed_time": "0:09:50", "remaining_time": "0:18:14", "throughput": 6713.29, "total_tokens": 3963584} +{"current_steps": 4130, "total_steps": 11784, "eval_loss": 0.07100464403629303, "epoch": 0.35047522063815345, "percentage": 35.05, "elapsed_time": "0:10:06", "remaining_time": "0:18:43", "throughput": 6536.85, "total_tokens": 3963584} +{"current_steps": 4135, "total_steps": 11784, "loss": 0.0893, "lr": 1.6407050446271738e-06, "epoch": 0.35089952477936187, "percentage": 35.09, "elapsed_time": "0:10:37", "remaining_time": "0:19:38", "throughput": 6228.46, "total_tokens": 3968896} +{"current_steps": 4140, "total_steps": 11784, "loss": 0.1264, "lr": 1.6395671077338928e-06, "epoch": 0.3513238289205703, "percentage": 35.13, "elapsed_time": "0:10:37", "remaining_time": "0:19:37", "throughput": 6232.35, "total_tokens": 3973440} +{"current_steps": 4145, "total_steps": 11784, "loss": 0.0501, "lr": 1.6384277676894855e-06, "epoch": 0.35174813306177866, "percentage": 35.17, "elapsed_time": "0:10:37", "remaining_time": "0:19:35", "throughput": 6236.54, "total_tokens": 3978176} +{"current_steps": 4150, "total_steps": 11784, "loss": 0.0844, "lr": 1.6372870269935583e-06, "epoch": 0.3521724372029871, "percentage": 35.22, "elapsed_time": "0:10:38", "remaining_time": "0:19:34", "throughput": 6241.32, "total_tokens": 3983360} +{"current_steps": 4155, "total_steps": 11784, "loss": 0.0303, "lr": 1.6361448881487912e-06, "epoch": 0.3525967413441955, "percentage": 35.26, "elapsed_time": "0:10:38", "remaining_time": "0:19:32", "throughput": 6244.78, "total_tokens": 3987584} +{"current_steps": 4160, "total_steps": 11784, "loss": 0.0368, "lr": 1.6350013536609307e-06, "epoch": 0.3530210454854039, "percentage": 35.3, "elapsed_time": "0:10:38", "remaining_time": "0:19:30", "throughput": 6249.26, "total_tokens": 3992576} +{"current_steps": 4165, "total_steps": 11784, "loss": 0.0488, "lr": 1.6338564260387861e-06, "epoch": 0.35344534962661234, "percentage": 35.34, "elapsed_time": "0:10:39", "remaining_time": "0:19:29", "throughput": 6254.07, "total_tokens": 3997824} +{"current_steps": 4170, "total_steps": 11784, "loss": 0.1147, "lr": 1.6327101077942228e-06, "epoch": 0.35386965376782076, "percentage": 35.39, "elapsed_time": "0:10:39", "remaining_time": "0:19:27", "throughput": 6257.52, "total_tokens": 4002048} +{"current_steps": 4175, "total_steps": 11784, "loss": 0.0454, "lr": 1.631562401442157e-06, "epoch": 0.3542939579090292, "percentage": 35.43, "elapsed_time": "0:10:39", "remaining_time": "0:19:26", "throughput": 6261.5, "total_tokens": 4006656} +{"current_steps": 4180, "total_steps": 11784, "loss": 0.0978, "lr": 1.6304133095005505e-06, "epoch": 0.3547182620502376, "percentage": 35.47, "elapsed_time": "0:10:40", "remaining_time": "0:19:24", "throughput": 6265.25, "total_tokens": 4011136} +{"current_steps": 4185, "total_steps": 11784, "loss": 0.0658, "lr": 1.6292628344904048e-06, "epoch": 0.355142566191446, "percentage": 35.51, "elapsed_time": "0:10:40", "remaining_time": "0:19:23", "throughput": 6269.3, "total_tokens": 4015808} +{"current_steps": 4190, "total_steps": 11784, "loss": 0.0771, "lr": 1.628110978935756e-06, "epoch": 0.35556687033265444, "percentage": 35.56, "elapsed_time": "0:10:40", "remaining_time": "0:19:21", "throughput": 6273.36, "total_tokens": 4020480} +{"current_steps": 4195, "total_steps": 11784, "loss": 0.0722, "lr": 1.626957745363668e-06, "epoch": 0.35599117447386286, "percentage": 35.6, "elapsed_time": "0:10:41", "remaining_time": "0:19:19", "throughput": 6277.32, "total_tokens": 4025088} +{"current_steps": 4200, "total_steps": 11784, "loss": 0.0513, "lr": 1.6258031363042291e-06, "epoch": 0.3564154786150713, "percentage": 35.64, "elapsed_time": "0:10:41", "remaining_time": "0:19:18", "throughput": 6282.05, "total_tokens": 4030272} +{"current_steps": 4205, "total_steps": 11784, "loss": 0.0864, "lr": 1.624647154290545e-06, "epoch": 0.3568397827562797, "percentage": 35.68, "elapsed_time": "0:10:41", "remaining_time": "0:19:16", "throughput": 6286.25, "total_tokens": 4035072} +{"current_steps": 4210, "total_steps": 11784, "loss": 0.0615, "lr": 1.6234898018587336e-06, "epoch": 0.3572640868974881, "percentage": 35.73, "elapsed_time": "0:10:42", "remaining_time": "0:19:15", "throughput": 6289.91, "total_tokens": 4039488} +{"current_steps": 4215, "total_steps": 11784, "loss": 0.1352, "lr": 1.6223310815479186e-06, "epoch": 0.35768839103869654, "percentage": 35.77, "elapsed_time": "0:10:42", "remaining_time": "0:19:13", "throughput": 6294.39, "total_tokens": 4044480} +{"current_steps": 4220, "total_steps": 11784, "loss": 0.0714, "lr": 1.6211709959002255e-06, "epoch": 0.35811269517990496, "percentage": 35.81, "elapsed_time": "0:10:42", "remaining_time": "0:19:12", "throughput": 6297.92, "total_tokens": 4048768} +{"current_steps": 4225, "total_steps": 11784, "loss": 0.0638, "lr": 1.620009547460775e-06, "epoch": 0.3585369993211134, "percentage": 35.85, "elapsed_time": "0:10:43", "remaining_time": "0:19:10", "throughput": 6302.01, "total_tokens": 4053504} +{"current_steps": 4230, "total_steps": 11784, "loss": 0.0221, "lr": 1.6188467387776779e-06, "epoch": 0.3589613034623218, "percentage": 35.9, "elapsed_time": "0:10:43", "remaining_time": "0:19:09", "throughput": 6305.6, "total_tokens": 4057856} +{"current_steps": 4235, "total_steps": 11784, "loss": 0.0434, "lr": 1.6176825724020286e-06, "epoch": 0.3593856076035302, "percentage": 35.94, "elapsed_time": "0:10:43", "remaining_time": "0:19:07", "throughput": 6309.97, "total_tokens": 4062784} +{"current_steps": 4240, "total_steps": 11784, "loss": 0.0282, "lr": 1.6165170508879007e-06, "epoch": 0.35980991174473864, "percentage": 35.98, "elapsed_time": "0:10:44", "remaining_time": "0:19:06", "throughput": 6313.81, "total_tokens": 4067328} +{"current_steps": 4245, "total_steps": 11784, "loss": 0.0755, "lr": 1.6153501767923408e-06, "epoch": 0.36023421588594706, "percentage": 36.02, "elapsed_time": "0:10:44", "remaining_time": "0:19:04", "throughput": 6318.78, "total_tokens": 4072704} +{"current_steps": 4250, "total_steps": 11784, "loss": 0.1098, "lr": 1.6141819526753626e-06, "epoch": 0.3606585200271555, "percentage": 36.07, "elapsed_time": "0:10:44", "remaining_time": "0:19:03", "throughput": 6322.95, "total_tokens": 4077504} +{"current_steps": 4255, "total_steps": 11784, "loss": 0.1086, "lr": 1.613012381099942e-06, "epoch": 0.3610828241683639, "percentage": 36.11, "elapsed_time": "0:10:45", "remaining_time": "0:19:01", "throughput": 6327.04, "total_tokens": 4082240} +{"current_steps": 4260, "total_steps": 11784, "loss": 0.0658, "lr": 1.6118414646320111e-06, "epoch": 0.3615071283095723, "percentage": 36.15, "elapsed_time": "0:10:45", "remaining_time": "0:19:00", "throughput": 6330.14, "total_tokens": 4086272} +{"current_steps": 4265, "total_steps": 11784, "loss": 0.0744, "lr": 1.6106692058404518e-06, "epoch": 0.36193143245078074, "percentage": 36.19, "elapsed_time": "0:10:45", "remaining_time": "0:18:58", "throughput": 6334.05, "total_tokens": 4090880} +{"current_steps": 4270, "total_steps": 11784, "loss": 0.0715, "lr": 1.6094956072970924e-06, "epoch": 0.36235573659198916, "percentage": 36.24, "elapsed_time": "0:10:46", "remaining_time": "0:18:57", "throughput": 6338.01, "total_tokens": 4095552} +{"current_steps": 4275, "total_steps": 11784, "loss": 0.0807, "lr": 1.608320671576699e-06, "epoch": 0.36278004073319753, "percentage": 36.28, "elapsed_time": "0:10:46", "remaining_time": "0:18:55", "throughput": 6342.18, "total_tokens": 4100352} +{"current_steps": 4280, "total_steps": 11784, "loss": 0.052, "lr": 1.6071444012569723e-06, "epoch": 0.36320434487440595, "percentage": 36.32, "elapsed_time": "0:10:46", "remaining_time": "0:18:54", "throughput": 6345.74, "total_tokens": 4104704} +{"current_steps": 4285, "total_steps": 11784, "loss": 0.1166, "lr": 1.6059667989185405e-06, "epoch": 0.36362864901561437, "percentage": 36.36, "elapsed_time": "0:10:47", "remaining_time": "0:18:52", "throughput": 6349.69, "total_tokens": 4109376} +{"current_steps": 4290, "total_steps": 11784, "loss": 0.0883, "lr": 1.6047878671449544e-06, "epoch": 0.3640529531568228, "percentage": 36.41, "elapsed_time": "0:10:47", "remaining_time": "0:18:51", "throughput": 6354.27, "total_tokens": 4114496} +{"current_steps": 4295, "total_steps": 11784, "loss": 0.0369, "lr": 1.6036076085226812e-06, "epoch": 0.3644772572980312, "percentage": 36.45, "elapsed_time": "0:10:47", "remaining_time": "0:18:49", "throughput": 6358.72, "total_tokens": 4119552} +{"current_steps": 4300, "total_steps": 11784, "loss": 0.0301, "lr": 1.6024260256410995e-06, "epoch": 0.36490156143923963, "percentage": 36.49, "elapsed_time": "0:10:48", "remaining_time": "0:18:48", "throughput": 6362.87, "total_tokens": 4124352} +{"current_steps": 4305, "total_steps": 11784, "loss": 0.0234, "lr": 1.601243121092493e-06, "epoch": 0.36532586558044805, "percentage": 36.53, "elapsed_time": "0:10:48", "remaining_time": "0:18:46", "throughput": 6367.01, "total_tokens": 4129152} +{"current_steps": 4310, "total_steps": 11784, "loss": 0.0369, "lr": 1.6000588974720443e-06, "epoch": 0.36575016972165647, "percentage": 36.58, "elapsed_time": "0:10:48", "remaining_time": "0:18:45", "throughput": 6371.36, "total_tokens": 4134144} +{"current_steps": 4315, "total_steps": 11784, "loss": 0.1336, "lr": 1.5988733573778314e-06, "epoch": 0.3661744738628649, "percentage": 36.62, "elapsed_time": "0:10:49", "remaining_time": "0:18:43", "throughput": 6375.28, "total_tokens": 4138816} +{"current_steps": 4320, "total_steps": 11784, "loss": 0.1023, "lr": 1.597686503410819e-06, "epoch": 0.3665987780040733, "percentage": 36.66, "elapsed_time": "0:10:49", "remaining_time": "0:18:42", "throughput": 6379.48, "total_tokens": 4143680} +{"current_steps": 4325, "total_steps": 11784, "loss": 0.0953, "lr": 1.596498338174856e-06, "epoch": 0.36702308214528173, "percentage": 36.7, "elapsed_time": "0:10:49", "remaining_time": "0:18:40", "throughput": 6384.43, "total_tokens": 4149120} +{"current_steps": 4330, "total_steps": 11784, "loss": 0.0331, "lr": 1.595308864276666e-06, "epoch": 0.36744738628649015, "percentage": 36.74, "elapsed_time": "0:10:50", "remaining_time": "0:18:39", "throughput": 6389.16, "total_tokens": 4154432} +{"current_steps": 4335, "total_steps": 11784, "loss": 0.0485, "lr": 1.5941180843258452e-06, "epoch": 0.36787169042769857, "percentage": 36.79, "elapsed_time": "0:10:50", "remaining_time": "0:18:37", "throughput": 6393.43, "total_tokens": 4159360} +{"current_steps": 4340, "total_steps": 11784, "loss": 0.0196, "lr": 1.5929260009348551e-06, "epoch": 0.368295994568907, "percentage": 36.83, "elapsed_time": "0:10:50", "remaining_time": "0:18:36", "throughput": 6396.61, "total_tokens": 4163520} +{"current_steps": 4345, "total_steps": 11784, "loss": 0.1079, "lr": 1.5917326167190163e-06, "epoch": 0.3687202987101154, "percentage": 36.87, "elapsed_time": "0:10:51", "remaining_time": "0:18:34", "throughput": 6401.13, "total_tokens": 4168640} +{"current_steps": 4350, "total_steps": 11784, "loss": 0.1585, "lr": 1.5905379342965033e-06, "epoch": 0.36914460285132383, "percentage": 36.91, "elapsed_time": "0:10:51", "remaining_time": "0:18:33", "throughput": 6405.0, "total_tokens": 4173312} +{"current_steps": 4355, "total_steps": 11784, "loss": 0.1323, "lr": 1.589341956288339e-06, "epoch": 0.36956890699253225, "percentage": 36.96, "elapsed_time": "0:10:51", "remaining_time": "0:18:32", "throughput": 6408.47, "total_tokens": 4177664} +{"current_steps": 4360, "total_steps": 11784, "loss": 0.0261, "lr": 1.5881446853183888e-06, "epoch": 0.3699932111337407, "percentage": 37.0, "elapsed_time": "0:10:52", "remaining_time": "0:18:30", "throughput": 6411.9, "total_tokens": 4182016} +{"current_steps": 4365, "total_steps": 11784, "loss": 0.0039, "lr": 1.586946124013354e-06, "epoch": 0.3704175152749491, "percentage": 37.04, "elapsed_time": "0:10:52", "remaining_time": "0:18:29", "throughput": 6415.7, "total_tokens": 4186624} +{"current_steps": 4370, "total_steps": 11784, "loss": 0.0708, "lr": 1.585746275002768e-06, "epoch": 0.3708418194161575, "percentage": 37.08, "elapsed_time": "0:10:52", "remaining_time": "0:18:27", "throughput": 6420.4, "total_tokens": 4191936} +{"current_steps": 4375, "total_steps": 11784, "loss": 0.087, "lr": 1.5845451409189887e-06, "epoch": 0.37126612355736593, "percentage": 37.13, "elapsed_time": "0:10:53", "remaining_time": "0:18:26", "throughput": 6423.95, "total_tokens": 4196352} +{"current_steps": 4380, "total_steps": 11784, "loss": 0.0648, "lr": 1.5833427243971927e-06, "epoch": 0.37169042769857435, "percentage": 37.17, "elapsed_time": "0:10:53", "remaining_time": "0:18:24", "throughput": 6427.73, "total_tokens": 4200960} +{"current_steps": 4385, "total_steps": 11784, "loss": 0.0723, "lr": 1.582139028075371e-06, "epoch": 0.3721147318397828, "percentage": 37.21, "elapsed_time": "0:10:53", "remaining_time": "0:18:23", "throughput": 6432.33, "total_tokens": 4206208} +{"current_steps": 4390, "total_steps": 11784, "loss": 0.0497, "lr": 1.580934054594322e-06, "epoch": 0.3725390359809912, "percentage": 37.25, "elapsed_time": "0:10:54", "remaining_time": "0:18:21", "throughput": 6435.95, "total_tokens": 4210688} +{"current_steps": 4395, "total_steps": 11784, "loss": 0.0667, "lr": 1.5797278065976463e-06, "epoch": 0.3729633401221996, "percentage": 37.3, "elapsed_time": "0:10:54", "remaining_time": "0:18:20", "throughput": 6439.9, "total_tokens": 4215424} +{"current_steps": 4400, "total_steps": 11784, "loss": 0.0108, "lr": 1.5785202867317407e-06, "epoch": 0.37338764426340804, "percentage": 37.34, "elapsed_time": "0:10:54", "remaining_time": "0:18:19", "throughput": 6443.9, "total_tokens": 4220160} +{"current_steps": 4405, "total_steps": 11784, "loss": 0.1083, "lr": 1.5773114976457915e-06, "epoch": 0.37381194840461646, "percentage": 37.38, "elapsed_time": "0:10:55", "remaining_time": "0:18:17", "throughput": 6447.78, "total_tokens": 4224832} +{"current_steps": 4410, "total_steps": 11784, "loss": 0.0833, "lr": 1.576101441991771e-06, "epoch": 0.3742362525458248, "percentage": 37.42, "elapsed_time": "0:10:55", "remaining_time": "0:18:16", "throughput": 6452.89, "total_tokens": 4230464} +{"current_steps": 4415, "total_steps": 11784, "loss": 0.0739, "lr": 1.574890122424429e-06, "epoch": 0.37466055668703324, "percentage": 37.47, "elapsed_time": "0:10:55", "remaining_time": "0:18:14", "throughput": 6456.34, "total_tokens": 4234816} +{"current_steps": 4420, "total_steps": 11784, "loss": 0.1099, "lr": 1.573677541601289e-06, "epoch": 0.37508486082824166, "percentage": 37.51, "elapsed_time": "0:10:56", "remaining_time": "0:18:13", "throughput": 6460.02, "total_tokens": 4239424} +{"current_steps": 4425, "total_steps": 11784, "loss": 0.0576, "lr": 1.5724637021826409e-06, "epoch": 0.3755091649694501, "percentage": 37.55, "elapsed_time": "0:10:56", "remaining_time": "0:18:11", "throughput": 6463.92, "total_tokens": 4244160} +{"current_steps": 4430, "total_steps": 11784, "loss": 0.0613, "lr": 1.5712486068315367e-06, "epoch": 0.3759334691106585, "percentage": 37.59, "elapsed_time": "0:10:56", "remaining_time": "0:18:10", "throughput": 6467.07, "total_tokens": 4248320} +{"current_steps": 4435, "total_steps": 11784, "loss": 0.0184, "lr": 1.5700322582137826e-06, "epoch": 0.3763577732518669, "percentage": 37.64, "elapsed_time": "0:10:57", "remaining_time": "0:18:09", "throughput": 6470.49, "total_tokens": 4252672} +{"current_steps": 4440, "total_steps": 11784, "loss": 0.0768, "lr": 1.5688146589979358e-06, "epoch": 0.37678207739307534, "percentage": 37.68, "elapsed_time": "0:10:57", "remaining_time": "0:18:07", "throughput": 6474.21, "total_tokens": 4257280} +{"current_steps": 4445, "total_steps": 11784, "loss": 0.0366, "lr": 1.5675958118552962e-06, "epoch": 0.37720638153428376, "percentage": 37.72, "elapsed_time": "0:10:57", "remaining_time": "0:18:06", "throughput": 6478.84, "total_tokens": 4262592} +{"current_steps": 4450, "total_steps": 11784, "loss": 0.0796, "lr": 1.5663757194599013e-06, "epoch": 0.3776306856754922, "percentage": 37.76, "elapsed_time": "0:10:58", "remaining_time": "0:18:04", "throughput": 6483.36, "total_tokens": 4267840} +{"current_steps": 4455, "total_steps": 11784, "loss": 0.0535, "lr": 1.5651543844885216e-06, "epoch": 0.3780549898167006, "percentage": 37.81, "elapsed_time": "0:10:58", "remaining_time": "0:18:03", "throughput": 6487.22, "total_tokens": 4272576} +{"current_steps": 4460, "total_steps": 11784, "loss": 0.0441, "lr": 1.5639318096206533e-06, "epoch": 0.378479293957909, "percentage": 37.85, "elapsed_time": "0:10:58", "remaining_time": "0:18:02", "throughput": 6491.45, "total_tokens": 4277568} +{"current_steps": 4465, "total_steps": 11784, "loss": 0.0889, "lr": 1.562707997538512e-06, "epoch": 0.37890359809911744, "percentage": 37.89, "elapsed_time": "0:10:59", "remaining_time": "0:18:00", "throughput": 6496.08, "total_tokens": 4282880} +{"current_steps": 4470, "total_steps": 11784, "loss": 0.087, "lr": 1.5614829509270288e-06, "epoch": 0.37932790224032586, "percentage": 37.93, "elapsed_time": "0:10:59", "remaining_time": "0:17:59", "throughput": 6499.49, "total_tokens": 4287296} +{"current_steps": 4475, "total_steps": 11784, "loss": 0.0615, "lr": 1.5602566724738426e-06, "epoch": 0.3797522063815343, "percentage": 37.98, "elapsed_time": "0:10:59", "remaining_time": "0:17:57", "throughput": 6502.94, "total_tokens": 4291712} +{"current_steps": 4480, "total_steps": 11784, "loss": 0.0883, "lr": 1.5590291648692952e-06, "epoch": 0.3801765105227427, "percentage": 38.02, "elapsed_time": "0:11:00", "remaining_time": "0:17:56", "throughput": 6507.61, "total_tokens": 4297088} +{"current_steps": 4485, "total_steps": 11784, "loss": 0.0393, "lr": 1.5578004308064245e-06, "epoch": 0.3806008146639511, "percentage": 38.06, "elapsed_time": "0:11:00", "remaining_time": "0:17:55", "throughput": 6512.72, "total_tokens": 4302784} +{"current_steps": 4490, "total_steps": 11784, "loss": 0.0433, "lr": 1.55657047298096e-06, "epoch": 0.38102511880515955, "percentage": 38.1, "elapsed_time": "0:11:01", "remaining_time": "0:17:53", "throughput": 6516.69, "total_tokens": 4307584} +{"current_steps": 4495, "total_steps": 11784, "loss": 0.0148, "lr": 1.5553392940913148e-06, "epoch": 0.38144942294636797, "percentage": 38.14, "elapsed_time": "0:11:01", "remaining_time": "0:17:52", "throughput": 6520.24, "total_tokens": 4312064} +{"current_steps": 4500, "total_steps": 11784, "loss": 0.0059, "lr": 1.554106896838582e-06, "epoch": 0.3818737270875764, "percentage": 38.19, "elapsed_time": "0:11:01", "remaining_time": "0:17:51", "throughput": 6523.94, "total_tokens": 4316672} +{"current_steps": 4505, "total_steps": 11784, "loss": 0.0332, "lr": 1.5528732839265272e-06, "epoch": 0.3822980312287848, "percentage": 38.23, "elapsed_time": "0:11:01", "remaining_time": "0:17:49", "throughput": 6527.39, "total_tokens": 4321088} +{"current_steps": 4510, "total_steps": 11784, "loss": 0.0341, "lr": 1.5516384580615832e-06, "epoch": 0.3827223353699932, "percentage": 38.27, "elapsed_time": "0:11:02", "remaining_time": "0:17:48", "throughput": 6531.75, "total_tokens": 4326208} +{"current_steps": 4515, "total_steps": 11784, "loss": 0.0553, "lr": 1.5504024219528437e-06, "epoch": 0.38314663951120165, "percentage": 38.31, "elapsed_time": "0:11:02", "remaining_time": "0:17:46", "throughput": 6535.68, "total_tokens": 4331008} +{"current_steps": 4520, "total_steps": 11784, "loss": 0.019, "lr": 1.5491651783120578e-06, "epoch": 0.38357094365241007, "percentage": 38.36, "elapsed_time": "0:11:03", "remaining_time": "0:17:45", "throughput": 6539.79, "total_tokens": 4335936} +{"current_steps": 4525, "total_steps": 11784, "loss": 0.0442, "lr": 1.5479267298536238e-06, "epoch": 0.3839952477936185, "percentage": 38.4, "elapsed_time": "0:11:03", "remaining_time": "0:17:44", "throughput": 6543.53, "total_tokens": 4340608} +{"current_steps": 4530, "total_steps": 11784, "loss": 0.0841, "lr": 1.5466870792945828e-06, "epoch": 0.3844195519348269, "percentage": 38.44, "elapsed_time": "0:11:03", "remaining_time": "0:17:42", "throughput": 6547.38, "total_tokens": 4345344} +{"current_steps": 4535, "total_steps": 11784, "loss": 0.1063, "lr": 1.545446229354614e-06, "epoch": 0.38484385607603533, "percentage": 38.48, "elapsed_time": "0:11:04", "remaining_time": "0:17:41", "throughput": 6553.27, "total_tokens": 4351680} +{"current_steps": 4540, "total_steps": 11784, "loss": 0.12, "lr": 1.5442041827560272e-06, "epoch": 0.3852681602172437, "percentage": 38.53, "elapsed_time": "0:11:04", "remaining_time": "0:17:40", "throughput": 6556.57, "total_tokens": 4356032} +{"current_steps": 4545, "total_steps": 11784, "loss": 0.0567, "lr": 1.542960942223758e-06, "epoch": 0.3856924643584521, "percentage": 38.57, "elapsed_time": "0:11:04", "remaining_time": "0:17:38", "throughput": 6560.15, "total_tokens": 4360576} +{"current_steps": 4550, "total_steps": 11784, "loss": 0.1224, "lr": 1.541716510485361e-06, "epoch": 0.38611676849966053, "percentage": 38.61, "elapsed_time": "0:11:05", "remaining_time": "0:17:37", "throughput": 6564.09, "total_tokens": 4365376} +{"current_steps": 4555, "total_steps": 11784, "loss": 0.015, "lr": 1.5404708902710048e-06, "epoch": 0.38654107264086895, "percentage": 38.65, "elapsed_time": "0:11:05", "remaining_time": "0:17:35", "throughput": 6568.25, "total_tokens": 4370368} +{"current_steps": 4560, "total_steps": 11784, "loss": 0.0918, "lr": 1.5392240843134648e-06, "epoch": 0.3869653767820774, "percentage": 38.7, "elapsed_time": "0:11:05", "remaining_time": "0:17:34", "throughput": 6572.56, "total_tokens": 4375488} +{"current_steps": 4565, "total_steps": 11784, "loss": 0.1303, "lr": 1.5379760953481178e-06, "epoch": 0.3873896809232858, "percentage": 38.74, "elapsed_time": "0:11:06", "remaining_time": "0:17:33", "throughput": 6576.58, "total_tokens": 4380352} +{"current_steps": 4570, "total_steps": 11784, "loss": 0.0735, "lr": 1.5367269261129367e-06, "epoch": 0.3878139850644942, "percentage": 38.78, "elapsed_time": "0:11:06", "remaining_time": "0:17:31", "throughput": 6581.12, "total_tokens": 4385664} +{"current_steps": 4575, "total_steps": 11784, "loss": 0.1257, "lr": 1.5354765793484831e-06, "epoch": 0.38823828920570264, "percentage": 38.82, "elapsed_time": "0:11:06", "remaining_time": "0:17:30", "throughput": 6585.08, "total_tokens": 4390528} +{"current_steps": 4580, "total_steps": 11784, "loss": 0.0082, "lr": 1.5342250577979023e-06, "epoch": 0.38866259334691106, "percentage": 38.87, "elapsed_time": "0:11:07", "remaining_time": "0:17:29", "throughput": 6588.71, "total_tokens": 4395136} +{"current_steps": 4585, "total_steps": 11784, "loss": 0.0835, "lr": 1.532972364206917e-06, "epoch": 0.3890868974881195, "percentage": 38.91, "elapsed_time": "0:11:07", "remaining_time": "0:17:27", "throughput": 6593.03, "total_tokens": 4400320} +{"current_steps": 4590, "total_steps": 11784, "loss": 0.0612, "lr": 1.5317185013238209e-06, "epoch": 0.3895112016293279, "percentage": 38.95, "elapsed_time": "0:11:07", "remaining_time": "0:17:26", "throughput": 6596.81, "total_tokens": 4405056} +{"current_steps": 4595, "total_steps": 11784, "loss": 0.0862, "lr": 1.5304634718994738e-06, "epoch": 0.3899355057705363, "percentage": 38.99, "elapsed_time": "0:11:08", "remaining_time": "0:17:25", "throughput": 6601.62, "total_tokens": 4410624} +{"current_steps": 4600, "total_steps": 11784, "loss": 0.0614, "lr": 1.5292072786872938e-06, "epoch": 0.39035980991174474, "percentage": 39.04, "elapsed_time": "0:11:08", "remaining_time": "0:17:23", "throughput": 6605.5, "total_tokens": 4415424} +{"current_steps": 4605, "total_steps": 11784, "loss": 0.0369, "lr": 1.527949924443253e-06, "epoch": 0.39078411405295316, "percentage": 39.08, "elapsed_time": "0:11:08", "remaining_time": "0:17:22", "throughput": 6609.35, "total_tokens": 4420224} +{"current_steps": 4610, "total_steps": 11784, "loss": 0.0958, "lr": 1.52669141192587e-06, "epoch": 0.3912084181941616, "percentage": 39.12, "elapsed_time": "0:11:09", "remaining_time": "0:17:21", "throughput": 6613.05, "total_tokens": 4424896} +{"current_steps": 4615, "total_steps": 11784, "loss": 0.0946, "lr": 1.5254317438962052e-06, "epoch": 0.39163272233537, "percentage": 39.16, "elapsed_time": "0:11:09", "remaining_time": "0:17:19", "throughput": 6616.37, "total_tokens": 4429312} +{"current_steps": 4620, "total_steps": 11784, "loss": 0.1145, "lr": 1.5241709231178539e-06, "epoch": 0.3920570264765784, "percentage": 39.21, "elapsed_time": "0:11:09", "remaining_time": "0:17:18", "throughput": 6619.97, "total_tokens": 4433920} +{"current_steps": 4625, "total_steps": 11784, "loss": 0.0557, "lr": 1.5229089523569405e-06, "epoch": 0.39248133061778684, "percentage": 39.25, "elapsed_time": "0:11:10", "remaining_time": "0:17:17", "throughput": 6623.45, "total_tokens": 4438464} +{"current_steps": 4630, "total_steps": 11784, "loss": 0.0481, "lr": 1.5216458343821122e-06, "epoch": 0.39290563475899526, "percentage": 39.29, "elapsed_time": "0:11:10", "remaining_time": "0:17:15", "throughput": 6627.68, "total_tokens": 4443584} +{"current_steps": 4635, "total_steps": 11784, "loss": 0.0451, "lr": 1.5203815719645328e-06, "epoch": 0.3933299389002037, "percentage": 39.33, "elapsed_time": "0:11:10", "remaining_time": "0:17:14", "throughput": 6632.27, "total_tokens": 4448960} +{"current_steps": 4640, "total_steps": 11784, "loss": 0.0307, "lr": 1.5191161678778773e-06, "epoch": 0.3937542430414121, "percentage": 39.38, "elapsed_time": "0:11:11", "remaining_time": "0:17:13", "throughput": 6635.7, "total_tokens": 4453504} +{"current_steps": 4645, "total_steps": 11784, "loss": 0.108, "lr": 1.5178496248983251e-06, "epoch": 0.3941785471826205, "percentage": 39.42, "elapsed_time": "0:11:11", "remaining_time": "0:17:12", "throughput": 6639.18, "total_tokens": 4458048} +{"current_steps": 4650, "total_steps": 11784, "loss": 0.0384, "lr": 1.5165819458045554e-06, "epoch": 0.39460285132382894, "percentage": 39.46, "elapsed_time": "0:11:11", "remaining_time": "0:17:10", "throughput": 6643.35, "total_tokens": 4463168} +{"current_steps": 4655, "total_steps": 11784, "loss": 0.1386, "lr": 1.5153131333777377e-06, "epoch": 0.39502715546503736, "percentage": 39.5, "elapsed_time": "0:11:12", "remaining_time": "0:17:09", "throughput": 6646.58, "total_tokens": 4467520} +{"current_steps": 4660, "total_steps": 11784, "loss": 0.0864, "lr": 1.51404319040153e-06, "epoch": 0.3954514596062458, "percentage": 39.55, "elapsed_time": "0:11:12", "remaining_time": "0:17:08", "throughput": 6651.18, "total_tokens": 4472960} +{"current_steps": 4665, "total_steps": 11784, "loss": 0.058, "lr": 1.5127721196620697e-06, "epoch": 0.3958757637474542, "percentage": 39.59, "elapsed_time": "0:11:12", "remaining_time": "0:17:06", "throughput": 6654.43, "total_tokens": 4477312} +{"current_steps": 4670, "total_steps": 11784, "loss": 0.0255, "lr": 1.5114999239479685e-06, "epoch": 0.39630006788866257, "percentage": 39.63, "elapsed_time": "0:11:13", "remaining_time": "0:17:05", "throughput": 6658.66, "total_tokens": 4482432} +{"current_steps": 4675, "total_steps": 11784, "loss": 0.0754, "lr": 1.5102266060503063e-06, "epoch": 0.396724372029871, "percentage": 39.67, "elapsed_time": "0:11:13", "remaining_time": "0:17:04", "throughput": 6661.48, "total_tokens": 4486464} +{"current_steps": 4680, "total_steps": 11784, "loss": 0.104, "lr": 1.508952168762624e-06, "epoch": 0.3971486761710794, "percentage": 39.71, "elapsed_time": "0:11:13", "remaining_time": "0:17:02", "throughput": 6665.57, "total_tokens": 4491520} +{"current_steps": 4685, "total_steps": 11784, "loss": 0.0473, "lr": 1.5076766148809209e-06, "epoch": 0.3975729803122878, "percentage": 39.76, "elapsed_time": "0:11:14", "remaining_time": "0:17:01", "throughput": 6669.27, "total_tokens": 4496256} +{"current_steps": 4690, "total_steps": 11784, "loss": 0.0532, "lr": 1.506399947203643e-06, "epoch": 0.39799728445349625, "percentage": 39.8, "elapsed_time": "0:11:14", "remaining_time": "0:17:00", "throughput": 6672.23, "total_tokens": 4500416} +{"current_steps": 4695, "total_steps": 11784, "loss": 0.052, "lr": 1.5051221685316815e-06, "epoch": 0.39842158859470467, "percentage": 39.84, "elapsed_time": "0:11:14", "remaining_time": "0:16:58", "throughput": 6676.48, "total_tokens": 4505536} +{"current_steps": 4700, "total_steps": 11784, "loss": 0.1351, "lr": 1.5038432816683652e-06, "epoch": 0.3988458927359131, "percentage": 39.88, "elapsed_time": "0:11:15", "remaining_time": "0:16:57", "throughput": 6680.36, "total_tokens": 4510400} +{"current_steps": 4705, "total_steps": 11784, "loss": 0.0894, "lr": 1.5025632894194532e-06, "epoch": 0.3992701968771215, "percentage": 39.93, "elapsed_time": "0:11:15", "remaining_time": "0:16:56", "throughput": 6685.05, "total_tokens": 4515904} +{"current_steps": 4710, "total_steps": 11784, "loss": 0.0939, "lr": 1.5012821945931303e-06, "epoch": 0.39969450101832993, "percentage": 39.97, "elapsed_time": "0:11:15", "remaining_time": "0:16:55", "throughput": 6688.51, "total_tokens": 4520448} +{"current_steps": 4715, "total_steps": 11784, "loss": 0.0654, "lr": 1.5e-06, "epoch": 0.40011880515953835, "percentage": 40.01, "elapsed_time": "0:11:16", "remaining_time": "0:16:53", "throughput": 6693.01, "total_tokens": 4525824} +{"current_steps": 4720, "total_steps": 11784, "loss": 0.0782, "lr": 1.498716708453079e-06, "epoch": 0.40054310930074677, "percentage": 40.05, "elapsed_time": "0:11:16", "remaining_time": "0:16:52", "throughput": 6696.39, "total_tokens": 4530304} +{"current_steps": 4720, "total_steps": 11784, "eval_loss": 0.055105432868003845, "epoch": 0.40054310930074677, "percentage": 40.05, "elapsed_time": "0:11:32", "remaining_time": "0:17:16", "throughput": 6543.6, "total_tokens": 4530304} +{"current_steps": 4725, "total_steps": 11784, "loss": 0.1067, "lr": 1.4974323227677903e-06, "epoch": 0.4009674134419552, "percentage": 40.1, "elapsed_time": "0:12:09", "remaining_time": "0:18:10", "throughput": 6213.75, "total_tokens": 4534720} +{"current_steps": 4730, "total_steps": 11784, "loss": 0.1018, "lr": 1.4961468457619575e-06, "epoch": 0.4013917175831636, "percentage": 40.14, "elapsed_time": "0:12:10", "remaining_time": "0:18:08", "throughput": 6217.49, "total_tokens": 4539520} +{"current_steps": 4735, "total_steps": 11784, "loss": 0.0083, "lr": 1.4948602802557982e-06, "epoch": 0.40181602172437203, "percentage": 40.18, "elapsed_time": "0:12:10", "remaining_time": "0:18:07", "throughput": 6221.36, "total_tokens": 4544448} +{"current_steps": 4740, "total_steps": 11784, "loss": 0.0448, "lr": 1.4935726290719177e-06, "epoch": 0.40224032586558045, "percentage": 40.22, "elapsed_time": "0:12:10", "remaining_time": "0:18:06", "throughput": 6225.5, "total_tokens": 4549632} +{"current_steps": 4745, "total_steps": 11784, "loss": 0.0697, "lr": 1.492283895035305e-06, "epoch": 0.40266463000678887, "percentage": 40.27, "elapsed_time": "0:12:11", "remaining_time": "0:18:04", "throughput": 6229.38, "total_tokens": 4554560} +{"current_steps": 4750, "total_steps": 11784, "loss": 0.0605, "lr": 1.490994080973322e-06, "epoch": 0.4030889341479973, "percentage": 40.31, "elapsed_time": "0:12:11", "remaining_time": "0:18:03", "throughput": 6232.88, "total_tokens": 4559168} +{"current_steps": 4755, "total_steps": 11784, "loss": 0.0556, "lr": 1.4897031897157025e-06, "epoch": 0.4035132382892057, "percentage": 40.35, "elapsed_time": "0:12:11", "remaining_time": "0:18:01", "throughput": 6236.56, "total_tokens": 4563968} +{"current_steps": 4760, "total_steps": 11784, "loss": 0.0479, "lr": 1.4884112240945425e-06, "epoch": 0.40393754243041413, "percentage": 40.39, "elapsed_time": "0:12:12", "remaining_time": "0:18:00", "throughput": 6241.0, "total_tokens": 4569408} +{"current_steps": 4765, "total_steps": 11784, "loss": 0.0856, "lr": 1.4871181869442952e-06, "epoch": 0.40436184657162255, "percentage": 40.44, "elapsed_time": "0:12:12", "remaining_time": "0:17:58", "throughput": 6244.24, "total_tokens": 4573824} +{"current_steps": 4770, "total_steps": 11784, "loss": 0.126, "lr": 1.485824081101764e-06, "epoch": 0.40478615071283097, "percentage": 40.48, "elapsed_time": "0:12:12", "remaining_time": "0:17:57", "throughput": 6247.65, "total_tokens": 4578368} +{"current_steps": 4775, "total_steps": 11784, "loss": 0.0938, "lr": 1.4845289094060984e-06, "epoch": 0.4052104548540394, "percentage": 40.52, "elapsed_time": "0:12:13", "remaining_time": "0:17:56", "throughput": 6251.21, "total_tokens": 4583040} +{"current_steps": 4780, "total_steps": 11784, "loss": 0.1106, "lr": 1.4832326746987846e-06, "epoch": 0.4056347589952478, "percentage": 40.56, "elapsed_time": "0:12:13", "remaining_time": "0:17:54", "throughput": 6255.06, "total_tokens": 4587968} +{"current_steps": 4785, "total_steps": 11784, "loss": 0.1431, "lr": 1.4819353798236424e-06, "epoch": 0.40605906313645623, "percentage": 40.61, "elapsed_time": "0:12:13", "remaining_time": "0:17:53", "throughput": 6259.28, "total_tokens": 4593216} +{"current_steps": 4790, "total_steps": 11784, "loss": 0.0959, "lr": 1.4806370276268163e-06, "epoch": 0.40648336727766465, "percentage": 40.65, "elapsed_time": "0:12:14", "remaining_time": "0:17:51", "throughput": 6262.74, "total_tokens": 4597824} +{"current_steps": 4795, "total_steps": 11784, "loss": 0.0695, "lr": 1.4793376209567714e-06, "epoch": 0.4069076714188731, "percentage": 40.69, "elapsed_time": "0:12:14", "remaining_time": "0:17:50", "throughput": 6266.73, "total_tokens": 4602880} +{"current_steps": 4800, "total_steps": 11784, "loss": 0.0287, "lr": 1.4780371626642858e-06, "epoch": 0.4073319755600815, "percentage": 40.73, "elapsed_time": "0:12:14", "remaining_time": "0:17:49", "throughput": 6270.53, "total_tokens": 4607744} +{"current_steps": 4805, "total_steps": 11784, "loss": 0.1188, "lr": 1.4767356556024448e-06, "epoch": 0.40775627970128986, "percentage": 40.78, "elapsed_time": "0:12:15", "remaining_time": "0:17:47", "throughput": 6273.86, "total_tokens": 4612224} +{"current_steps": 4810, "total_steps": 11784, "loss": 0.0525, "lr": 1.4754331026266344e-06, "epoch": 0.4081805838424983, "percentage": 40.82, "elapsed_time": "0:12:15", "remaining_time": "0:17:46", "throughput": 6277.17, "total_tokens": 4616704} +{"current_steps": 4815, "total_steps": 11784, "loss": 0.0684, "lr": 1.474129506594536e-06, "epoch": 0.4086048879837067, "percentage": 40.86, "elapsed_time": "0:12:15", "remaining_time": "0:17:44", "throughput": 6280.93, "total_tokens": 4621568} +{"current_steps": 4820, "total_steps": 11784, "loss": 0.0427, "lr": 1.472824870366118e-06, "epoch": 0.4090291921249151, "percentage": 40.9, "elapsed_time": "0:12:16", "remaining_time": "0:17:43", "throughput": 6284.41, "total_tokens": 4626176} +{"current_steps": 4825, "total_steps": 11784, "loss": 0.0541, "lr": 1.4715191968036324e-06, "epoch": 0.40945349626612354, "percentage": 40.95, "elapsed_time": "0:12:16", "remaining_time": "0:17:42", "throughput": 6287.39, "total_tokens": 4630400} +{"current_steps": 4830, "total_steps": 11784, "loss": 0.0376, "lr": 1.4702124887716058e-06, "epoch": 0.40987780040733196, "percentage": 40.99, "elapsed_time": "0:12:16", "remaining_time": "0:17:40", "throughput": 6290.45, "total_tokens": 4634688} +{"current_steps": 4835, "total_steps": 11784, "loss": 0.0444, "lr": 1.4689047491368354e-06, "epoch": 0.4103021045485404, "percentage": 41.03, "elapsed_time": "0:12:17", "remaining_time": "0:17:39", "throughput": 6293.68, "total_tokens": 4639104} +{"current_steps": 4840, "total_steps": 11784, "loss": 0.0833, "lr": 1.4675959807683808e-06, "epoch": 0.4107264086897488, "percentage": 41.07, "elapsed_time": "0:12:17", "remaining_time": "0:17:37", "throughput": 6296.65, "total_tokens": 4643328} +{"current_steps": 4845, "total_steps": 11784, "loss": 0.0481, "lr": 1.4662861865375588e-06, "epoch": 0.4111507128309572, "percentage": 41.12, "elapsed_time": "0:12:17", "remaining_time": "0:17:36", "throughput": 6300.53, "total_tokens": 4648320} +{"current_steps": 4850, "total_steps": 11784, "loss": 0.0984, "lr": 1.4649753693179373e-06, "epoch": 0.41157501697216564, "percentage": 41.16, "elapsed_time": "0:12:18", "remaining_time": "0:17:35", "throughput": 6304.19, "total_tokens": 4653120} +{"current_steps": 4855, "total_steps": 11784, "loss": 0.073, "lr": 1.4636635319853272e-06, "epoch": 0.41199932111337406, "percentage": 41.2, "elapsed_time": "0:12:18", "remaining_time": "0:17:33", "throughput": 6307.98, "total_tokens": 4658048} +{"current_steps": 4860, "total_steps": 11784, "loss": 0.0463, "lr": 1.4623506774177796e-06, "epoch": 0.4124236252545825, "percentage": 41.24, "elapsed_time": "0:12:18", "remaining_time": "0:17:32", "throughput": 6311.74, "total_tokens": 4662976} +{"current_steps": 4865, "total_steps": 11784, "loss": 0.0902, "lr": 1.4610368084955748e-06, "epoch": 0.4128479293957909, "percentage": 41.28, "elapsed_time": "0:12:19", "remaining_time": "0:17:31", "throughput": 6315.39, "total_tokens": 4667840} +{"current_steps": 4870, "total_steps": 11784, "loss": 0.096, "lr": 1.4597219281012208e-06, "epoch": 0.4132722335369993, "percentage": 41.33, "elapsed_time": "0:12:19", "remaining_time": "0:17:29", "throughput": 6319.92, "total_tokens": 4673408} +{"current_steps": 4875, "total_steps": 11784, "loss": 0.1332, "lr": 1.4584060391194436e-06, "epoch": 0.41369653767820774, "percentage": 41.37, "elapsed_time": "0:12:19", "remaining_time": "0:17:28", "throughput": 6325.11, "total_tokens": 4679552} +{"current_steps": 4880, "total_steps": 11784, "loss": 0.0965, "lr": 1.4570891444371814e-06, "epoch": 0.41412084181941616, "percentage": 41.41, "elapsed_time": "0:12:20", "remaining_time": "0:17:27", "throughput": 6328.77, "total_tokens": 4684352} +{"current_steps": 4885, "total_steps": 11784, "loss": 0.0692, "lr": 1.4557712469435797e-06, "epoch": 0.4145451459606246, "percentage": 41.45, "elapsed_time": "0:12:20", "remaining_time": "0:17:25", "throughput": 6331.87, "total_tokens": 4688704} +{"current_steps": 4890, "total_steps": 11784, "loss": 0.0413, "lr": 1.4544523495299841e-06, "epoch": 0.414969450101833, "percentage": 41.5, "elapsed_time": "0:12:20", "remaining_time": "0:17:24", "throughput": 6335.4, "total_tokens": 4693440} +{"current_steps": 4895, "total_steps": 11784, "loss": 0.0806, "lr": 1.4531324550899333e-06, "epoch": 0.4153937542430414, "percentage": 41.54, "elapsed_time": "0:12:21", "remaining_time": "0:17:23", "throughput": 6339.32, "total_tokens": 4698496} +{"current_steps": 4900, "total_steps": 11784, "loss": 0.0962, "lr": 1.451811566519154e-06, "epoch": 0.41581805838424984, "percentage": 41.58, "elapsed_time": "0:12:21", "remaining_time": "0:17:21", "throughput": 6343.3, "total_tokens": 4703616} +{"current_steps": 4905, "total_steps": 11784, "loss": 0.0827, "lr": 1.450489686715553e-06, "epoch": 0.41624236252545826, "percentage": 41.62, "elapsed_time": "0:12:21", "remaining_time": "0:17:20", "throughput": 6345.92, "total_tokens": 4707584} +{"current_steps": 4910, "total_steps": 11784, "loss": 0.1271, "lr": 1.4491668185792131e-06, "epoch": 0.4166666666666667, "percentage": 41.67, "elapsed_time": "0:12:22", "remaining_time": "0:17:19", "throughput": 6349.01, "total_tokens": 4711936} +{"current_steps": 4915, "total_steps": 11784, "loss": 0.0545, "lr": 1.4478429650123851e-06, "epoch": 0.4170909708078751, "percentage": 41.71, "elapsed_time": "0:12:22", "remaining_time": "0:17:17", "throughput": 6352.9, "total_tokens": 4716992} +{"current_steps": 4920, "total_steps": 11784, "loss": 0.0595, "lr": 1.44651812891948e-06, "epoch": 0.4175152749490835, "percentage": 41.75, "elapsed_time": "0:12:22", "remaining_time": "0:17:16", "throughput": 6356.54, "total_tokens": 4721792} +{"current_steps": 4925, "total_steps": 11784, "loss": 0.0644, "lr": 1.4451923132070669e-06, "epoch": 0.41793957909029195, "percentage": 41.79, "elapsed_time": "0:12:23", "remaining_time": "0:17:14", "throughput": 6359.71, "total_tokens": 4726208} +{"current_steps": 4930, "total_steps": 11784, "loss": 0.0712, "lr": 1.4438655207838628e-06, "epoch": 0.41836388323150037, "percentage": 41.84, "elapsed_time": "0:12:23", "remaining_time": "0:17:13", "throughput": 6363.15, "total_tokens": 4730880} +{"current_steps": 4935, "total_steps": 11784, "loss": 0.0898, "lr": 1.4425377545607275e-06, "epoch": 0.41878818737270873, "percentage": 41.88, "elapsed_time": "0:12:23", "remaining_time": "0:17:12", "throughput": 6367.25, "total_tokens": 4736128} +{"current_steps": 4940, "total_steps": 11784, "loss": 0.0292, "lr": 1.4412090174506567e-06, "epoch": 0.41921249151391715, "percentage": 41.92, "elapsed_time": "0:12:24", "remaining_time": "0:17:10", "throughput": 6370.47, "total_tokens": 4740608} +{"current_steps": 4945, "total_steps": 11784, "loss": 0.0739, "lr": 1.4398793123687777e-06, "epoch": 0.41963679565512557, "percentage": 41.96, "elapsed_time": "0:12:24", "remaining_time": "0:17:09", "throughput": 6374.05, "total_tokens": 4745408} +{"current_steps": 4950, "total_steps": 11784, "loss": 0.0038, "lr": 1.4385486422323404e-06, "epoch": 0.420061099796334, "percentage": 42.01, "elapsed_time": "0:12:24", "remaining_time": "0:17:08", "throughput": 6377.85, "total_tokens": 4750400} +{"current_steps": 4955, "total_steps": 11784, "loss": 0.0556, "lr": 1.4372170099607123e-06, "epoch": 0.4204854039375424, "percentage": 42.05, "elapsed_time": "0:12:25", "remaining_time": "0:17:06", "throughput": 6381.22, "total_tokens": 4755008} +{"current_steps": 4960, "total_steps": 11784, "loss": 0.1251, "lr": 1.435884418475371e-06, "epoch": 0.42090970807875083, "percentage": 42.09, "elapsed_time": "0:12:25", "remaining_time": "0:17:05", "throughput": 6384.36, "total_tokens": 4759424} +{"current_steps": 4965, "total_steps": 11784, "loss": 0.0476, "lr": 1.4345508706998994e-06, "epoch": 0.42133401221995925, "percentage": 42.13, "elapsed_time": "0:12:25", "remaining_time": "0:17:04", "throughput": 6387.64, "total_tokens": 4763968} +{"current_steps": 4970, "total_steps": 11784, "loss": 0.065, "lr": 1.433216369559978e-06, "epoch": 0.4217583163611677, "percentage": 42.18, "elapsed_time": "0:12:26", "remaining_time": "0:17:02", "throughput": 6391.44, "total_tokens": 4768960} +{"current_steps": 4975, "total_steps": 11784, "loss": 0.0698, "lr": 1.4318809179833791e-06, "epoch": 0.4221826205023761, "percentage": 42.22, "elapsed_time": "0:12:26", "remaining_time": "0:17:01", "throughput": 6395.94, "total_tokens": 4774592} +{"current_steps": 4980, "total_steps": 11784, "loss": 0.0981, "lr": 1.4305445188999596e-06, "epoch": 0.4226069246435845, "percentage": 42.26, "elapsed_time": "0:12:26", "remaining_time": "0:17:00", "throughput": 6399.0, "total_tokens": 4778944} +{"current_steps": 4985, "total_steps": 11784, "loss": 0.1095, "lr": 1.4292071752416558e-06, "epoch": 0.42303122878479293, "percentage": 42.3, "elapsed_time": "0:12:27", "remaining_time": "0:16:59", "throughput": 6402.29, "total_tokens": 4783488} +{"current_steps": 4990, "total_steps": 11784, "loss": 0.0264, "lr": 1.4278688899424764e-06, "epoch": 0.42345553292600135, "percentage": 42.35, "elapsed_time": "0:12:27", "remaining_time": "0:16:57", "throughput": 6405.8, "total_tokens": 4788288} +{"current_steps": 4995, "total_steps": 11784, "loss": 0.0264, "lr": 1.4265296659384953e-06, "epoch": 0.4238798370672098, "percentage": 42.39, "elapsed_time": "0:12:27", "remaining_time": "0:16:56", "throughput": 6409.06, "total_tokens": 4792896} +{"current_steps": 5000, "total_steps": 11784, "loss": 0.1121, "lr": 1.4251895061678463e-06, "epoch": 0.4243041412084182, "percentage": 42.43, "elapsed_time": "0:12:28", "remaining_time": "0:16:55", "throughput": 6411.86, "total_tokens": 4797056} +{"current_steps": 5005, "total_steps": 11784, "loss": 0.0697, "lr": 1.4238484135707162e-06, "epoch": 0.4247284453496266, "percentage": 42.47, "elapsed_time": "0:12:28", "remaining_time": "0:16:53", "throughput": 6415.93, "total_tokens": 4802304} +{"current_steps": 5010, "total_steps": 11784, "loss": 0.0716, "lr": 1.4225063910893384e-06, "epoch": 0.42515274949083504, "percentage": 42.52, "elapsed_time": "0:12:28", "remaining_time": "0:16:52", "throughput": 6419.84, "total_tokens": 4807424} +{"current_steps": 5015, "total_steps": 11784, "loss": 0.0602, "lr": 1.4211634416679855e-06, "epoch": 0.42557705363204346, "percentage": 42.56, "elapsed_time": "0:12:29", "remaining_time": "0:16:51", "throughput": 6423.36, "total_tokens": 4812224} +{"current_steps": 5020, "total_steps": 11784, "loss": 0.1474, "lr": 1.419819568252965e-06, "epoch": 0.4260013577732519, "percentage": 42.6, "elapsed_time": "0:12:29", "remaining_time": "0:16:49", "throughput": 6427.13, "total_tokens": 4817216} +{"current_steps": 5025, "total_steps": 11784, "loss": 0.0352, "lr": 1.418474773792611e-06, "epoch": 0.4264256619144603, "percentage": 42.64, "elapsed_time": "0:12:29", "remaining_time": "0:16:48", "throughput": 6431.02, "total_tokens": 4822336} +{"current_steps": 5030, "total_steps": 11784, "loss": 0.0397, "lr": 1.4171290612372779e-06, "epoch": 0.4268499660556687, "percentage": 42.68, "elapsed_time": "0:12:30", "remaining_time": "0:16:47", "throughput": 6434.79, "total_tokens": 4827328} +{"current_steps": 5035, "total_steps": 11784, "loss": 0.0677, "lr": 1.4157824335393349e-06, "epoch": 0.42727427019687714, "percentage": 42.73, "elapsed_time": "0:12:30", "remaining_time": "0:16:46", "throughput": 6437.94, "total_tokens": 4831808} +{"current_steps": 5040, "total_steps": 11784, "loss": 0.0246, "lr": 1.4144348936531588e-06, "epoch": 0.42769857433808556, "percentage": 42.77, "elapsed_time": "0:12:30", "remaining_time": "0:16:44", "throughput": 6440.87, "total_tokens": 4836096} +{"current_steps": 5045, "total_steps": 11784, "loss": 0.0595, "lr": 1.413086444535127e-06, "epoch": 0.428122878479294, "percentage": 42.81, "elapsed_time": "0:12:31", "remaining_time": "0:16:43", "throughput": 6443.88, "total_tokens": 4840448} +{"current_steps": 5050, "total_steps": 11784, "loss": 0.0571, "lr": 1.4117370891436133e-06, "epoch": 0.4285471826205024, "percentage": 42.85, "elapsed_time": "0:12:31", "remaining_time": "0:16:42", "throughput": 6447.31, "total_tokens": 4845184} +{"current_steps": 5055, "total_steps": 11784, "loss": 0.1268, "lr": 1.410386830438978e-06, "epoch": 0.4289714867617108, "percentage": 42.9, "elapsed_time": "0:12:31", "remaining_time": "0:16:40", "throughput": 6450.75, "total_tokens": 4849920} +{"current_steps": 5060, "total_steps": 11784, "loss": 0.0976, "lr": 1.4090356713835635e-06, "epoch": 0.42939579090291924, "percentage": 42.94, "elapsed_time": "0:12:32", "remaining_time": "0:16:39", "throughput": 6453.87, "total_tokens": 4854400} +{"current_steps": 5065, "total_steps": 11784, "loss": 0.085, "lr": 1.4076836149416886e-06, "epoch": 0.4298200950441276, "percentage": 42.98, "elapsed_time": "0:12:32", "remaining_time": "0:16:38", "throughput": 6457.62, "total_tokens": 4859392} +{"current_steps": 5070, "total_steps": 11784, "loss": 0.1246, "lr": 1.4063306640796404e-06, "epoch": 0.430244399185336, "percentage": 43.02, "elapsed_time": "0:12:32", "remaining_time": "0:16:36", "throughput": 6461.45, "total_tokens": 4864512} +{"current_steps": 5075, "total_steps": 11784, "loss": 0.0893, "lr": 1.4049768217656674e-06, "epoch": 0.43066870332654444, "percentage": 43.07, "elapsed_time": "0:12:33", "remaining_time": "0:16:35", "throughput": 6465.59, "total_tokens": 4869888} +{"current_steps": 5080, "total_steps": 11784, "loss": 0.0676, "lr": 1.4036220909699748e-06, "epoch": 0.43109300746775286, "percentage": 43.11, "elapsed_time": "0:12:33", "remaining_time": "0:16:34", "throughput": 6468.59, "total_tokens": 4874304} +{"current_steps": 5085, "total_steps": 11784, "loss": 0.0927, "lr": 1.4022664746647168e-06, "epoch": 0.4315173116089613, "percentage": 43.15, "elapsed_time": "0:12:33", "remaining_time": "0:16:33", "throughput": 6472.36, "total_tokens": 4879360} +{"current_steps": 5090, "total_steps": 11784, "loss": 0.0828, "lr": 1.40090997582399e-06, "epoch": 0.4319416157501697, "percentage": 43.19, "elapsed_time": "0:12:34", "remaining_time": "0:16:31", "throughput": 6475.11, "total_tokens": 4883520} +{"current_steps": 5095, "total_steps": 11784, "loss": 0.039, "lr": 1.3995525974238278e-06, "epoch": 0.4323659198913781, "percentage": 43.24, "elapsed_time": "0:12:34", "remaining_time": "0:16:30", "throughput": 6478.58, "total_tokens": 4888320} +{"current_steps": 5100, "total_steps": 11784, "loss": 0.0103, "lr": 1.398194342442193e-06, "epoch": 0.43279022403258655, "percentage": 43.28, "elapsed_time": "0:12:34", "remaining_time": "0:16:29", "throughput": 6482.83, "total_tokens": 4893824} +{"current_steps": 5105, "total_steps": 11784, "loss": 0.0693, "lr": 1.396835213858971e-06, "epoch": 0.43321452817379497, "percentage": 43.32, "elapsed_time": "0:12:35", "remaining_time": "0:16:28", "throughput": 6486.08, "total_tokens": 4898432} +{"current_steps": 5110, "total_steps": 11784, "loss": 0.106, "lr": 1.395475214655965e-06, "epoch": 0.4336388323150034, "percentage": 43.36, "elapsed_time": "0:12:35", "remaining_time": "0:16:26", "throughput": 6489.35, "total_tokens": 4903040} +{"current_steps": 5115, "total_steps": 11784, "loss": 0.0744, "lr": 1.394114347816887e-06, "epoch": 0.4340631364562118, "percentage": 43.41, "elapsed_time": "0:12:35", "remaining_time": "0:16:25", "throughput": 6493.08, "total_tokens": 4908096} +{"current_steps": 5120, "total_steps": 11784, "loss": 0.1002, "lr": 1.3927526163273538e-06, "epoch": 0.4344874405974202, "percentage": 43.45, "elapsed_time": "0:12:36", "remaining_time": "0:16:24", "throughput": 6496.27, "total_tokens": 4912640} +{"current_steps": 5125, "total_steps": 11784, "loss": 0.0305, "lr": 1.3913900231748776e-06, "epoch": 0.43491174473862865, "percentage": 43.49, "elapsed_time": "0:12:36", "remaining_time": "0:16:23", "throughput": 6499.8, "total_tokens": 4917504} +{"current_steps": 5130, "total_steps": 11784, "loss": 0.0281, "lr": 1.3900265713488623e-06, "epoch": 0.43533604887983707, "percentage": 43.53, "elapsed_time": "0:12:36", "remaining_time": "0:16:21", "throughput": 6503.66, "total_tokens": 4922688} +{"current_steps": 5135, "total_steps": 11784, "loss": 0.0589, "lr": 1.3886622638405952e-06, "epoch": 0.4357603530210455, "percentage": 43.58, "elapsed_time": "0:12:37", "remaining_time": "0:16:20", "throughput": 6506.47, "total_tokens": 4926976} +{"current_steps": 5140, "total_steps": 11784, "loss": 0.052, "lr": 1.3872971036432406e-06, "epoch": 0.4361846571622539, "percentage": 43.62, "elapsed_time": "0:12:37", "remaining_time": "0:16:19", "throughput": 6509.52, "total_tokens": 4931456} +{"current_steps": 5145, "total_steps": 11784, "loss": 0.0948, "lr": 1.385931093751834e-06, "epoch": 0.43660896130346233, "percentage": 43.66, "elapsed_time": "0:12:37", "remaining_time": "0:16:17", "throughput": 6512.67, "total_tokens": 4936000} +{"current_steps": 5150, "total_steps": 11784, "loss": 0.0645, "lr": 1.384564237163275e-06, "epoch": 0.43703326544467075, "percentage": 43.7, "elapsed_time": "0:12:38", "remaining_time": "0:16:16", "throughput": 6515.5, "total_tokens": 4940288} +{"current_steps": 5155, "total_steps": 11784, "loss": 0.0545, "lr": 1.3831965368763203e-06, "epoch": 0.43745756958587917, "percentage": 43.75, "elapsed_time": "0:12:38", "remaining_time": "0:16:15", "throughput": 6518.37, "total_tokens": 4944576} +{"current_steps": 5160, "total_steps": 11784, "loss": 0.0682, "lr": 1.3818279958915785e-06, "epoch": 0.4378818737270876, "percentage": 43.79, "elapsed_time": "0:12:38", "remaining_time": "0:16:14", "throughput": 6521.35, "total_tokens": 4948992} +{"current_steps": 5165, "total_steps": 11784, "loss": 0.0657, "lr": 1.3804586172115015e-06, "epoch": 0.438306177868296, "percentage": 43.83, "elapsed_time": "0:12:39", "remaining_time": "0:16:12", "throughput": 6524.7, "total_tokens": 4953728} +{"current_steps": 5170, "total_steps": 11784, "loss": 0.055, "lr": 1.3790884038403793e-06, "epoch": 0.43873048200950443, "percentage": 43.87, "elapsed_time": "0:12:39", "remaining_time": "0:16:11", "throughput": 6528.37, "total_tokens": 4958720} +{"current_steps": 5175, "total_steps": 11784, "loss": 0.0761, "lr": 1.3777173587843341e-06, "epoch": 0.43915478615071285, "percentage": 43.92, "elapsed_time": "0:12:39", "remaining_time": "0:16:10", "throughput": 6532.14, "total_tokens": 4963840} +{"current_steps": 5180, "total_steps": 11784, "loss": 0.0099, "lr": 1.3763454850513122e-06, "epoch": 0.43957909029192127, "percentage": 43.96, "elapsed_time": "0:12:40", "remaining_time": "0:16:09", "throughput": 6535.46, "total_tokens": 4968512} +{"current_steps": 5185, "total_steps": 11784, "loss": 0.079, "lr": 1.3749727856510766e-06, "epoch": 0.4400033944331297, "percentage": 44.0, "elapsed_time": "0:12:40", "remaining_time": "0:16:07", "throughput": 6538.45, "total_tokens": 4972928} +{"current_steps": 5190, "total_steps": 11784, "loss": 0.1034, "lr": 1.373599263595204e-06, "epoch": 0.4404276985743381, "percentage": 44.04, "elapsed_time": "0:12:40", "remaining_time": "0:16:06", "throughput": 6541.77, "total_tokens": 4977664} +{"current_steps": 5195, "total_steps": 11784, "loss": 0.1265, "lr": 1.3722249218970744e-06, "epoch": 0.4408520027155465, "percentage": 44.09, "elapsed_time": "0:12:41", "remaining_time": "0:16:05", "throughput": 6545.66, "total_tokens": 4982912} +{"current_steps": 5200, "total_steps": 11784, "loss": 0.0489, "lr": 1.3708497635718672e-06, "epoch": 0.4412763068567549, "percentage": 44.13, "elapsed_time": "0:12:41", "remaining_time": "0:16:04", "throughput": 6549.87, "total_tokens": 4988416} +{"current_steps": 5205, "total_steps": 11784, "loss": 0.0982, "lr": 1.3694737916365515e-06, "epoch": 0.4417006109979633, "percentage": 44.17, "elapsed_time": "0:12:41", "remaining_time": "0:16:03", "throughput": 6553.57, "total_tokens": 4993472} +{"current_steps": 5210, "total_steps": 11784, "loss": 0.037, "lr": 1.3680970091098832e-06, "epoch": 0.44212491513917174, "percentage": 44.21, "elapsed_time": "0:12:42", "remaining_time": "0:16:01", "throughput": 6556.91, "total_tokens": 4998208} +{"current_steps": 5215, "total_steps": 11784, "loss": 0.0563, "lr": 1.366719419012396e-06, "epoch": 0.44254921928038016, "percentage": 44.25, "elapsed_time": "0:12:42", "remaining_time": "0:16:00", "throughput": 6560.31, "total_tokens": 5003008} +{"current_steps": 5220, "total_steps": 11784, "loss": 0.0438, "lr": 1.3653410243663951e-06, "epoch": 0.4429735234215886, "percentage": 44.3, "elapsed_time": "0:12:42", "remaining_time": "0:15:59", "throughput": 6564.66, "total_tokens": 5008704} +{"current_steps": 5225, "total_steps": 11784, "loss": 0.1245, "lr": 1.363961828195951e-06, "epoch": 0.443397827562797, "percentage": 44.34, "elapsed_time": "0:12:43", "remaining_time": "0:15:58", "throughput": 6567.58, "total_tokens": 5013120} +{"current_steps": 5230, "total_steps": 11784, "loss": 0.0402, "lr": 1.3625818335268923e-06, "epoch": 0.4438221317040054, "percentage": 44.38, "elapsed_time": "0:12:43", "remaining_time": "0:15:56", "throughput": 6570.67, "total_tokens": 5017664} +{"current_steps": 5235, "total_steps": 11784, "loss": 0.058, "lr": 1.3612010433868004e-06, "epoch": 0.44424643584521384, "percentage": 44.42, "elapsed_time": "0:12:43", "remaining_time": "0:15:55", "throughput": 6574.13, "total_tokens": 5022528} +{"current_steps": 5240, "total_steps": 11784, "loss": 0.0781, "lr": 1.3598194608050008e-06, "epoch": 0.44467073998642226, "percentage": 44.47, "elapsed_time": "0:12:44", "remaining_time": "0:15:54", "throughput": 6577.21, "total_tokens": 5027072} +{"current_steps": 5245, "total_steps": 11784, "loss": 0.0093, "lr": 1.3584370888125583e-06, "epoch": 0.4450950441276307, "percentage": 44.51, "elapsed_time": "0:12:44", "remaining_time": "0:15:53", "throughput": 6580.06, "total_tokens": 5031424} +{"current_steps": 5250, "total_steps": 11784, "loss": 0.056, "lr": 1.357053930442269e-06, "epoch": 0.4455193482688391, "percentage": 44.55, "elapsed_time": "0:12:44", "remaining_time": "0:15:52", "throughput": 6583.69, "total_tokens": 5036480} +{"current_steps": 5255, "total_steps": 11784, "loss": 0.0619, "lr": 1.355669988728655e-06, "epoch": 0.4459436524100475, "percentage": 44.59, "elapsed_time": "0:12:45", "remaining_time": "0:15:50", "throughput": 6587.61, "total_tokens": 5041792} +{"current_steps": 5260, "total_steps": 11784, "loss": 0.0033, "lr": 1.3542852667079557e-06, "epoch": 0.44636795655125594, "percentage": 44.64, "elapsed_time": "0:12:45", "remaining_time": "0:15:49", "throughput": 6590.95, "total_tokens": 5046592} +{"current_steps": 5265, "total_steps": 11784, "loss": 0.1114, "lr": 1.352899767418124e-06, "epoch": 0.44679226069246436, "percentage": 44.68, "elapsed_time": "0:12:46", "remaining_time": "0:15:48", "throughput": 6594.4, "total_tokens": 5051456} +{"current_steps": 5270, "total_steps": 11784, "loss": 0.0761, "lr": 1.3515134938988168e-06, "epoch": 0.4472165648336728, "percentage": 44.72, "elapsed_time": "0:12:46", "remaining_time": "0:15:47", "throughput": 6597.76, "total_tokens": 5056320} +{"current_steps": 5275, "total_steps": 11784, "loss": 0.0149, "lr": 1.3501264491913906e-06, "epoch": 0.4476408689748812, "percentage": 44.76, "elapsed_time": "0:12:46", "remaining_time": "0:15:46", "throughput": 6601.22, "total_tokens": 5061248} +{"current_steps": 5280, "total_steps": 11784, "loss": 0.0755, "lr": 1.348738636338893e-06, "epoch": 0.4480651731160896, "percentage": 44.81, "elapsed_time": "0:12:47", "remaining_time": "0:15:44", "throughput": 6604.12, "total_tokens": 5065664} +{"current_steps": 5285, "total_steps": 11784, "loss": 0.0789, "lr": 1.3473500583860568e-06, "epoch": 0.44848947725729804, "percentage": 44.85, "elapsed_time": "0:12:47", "remaining_time": "0:15:43", "throughput": 6608.69, "total_tokens": 5071552} +{"current_steps": 5290, "total_steps": 11784, "loss": 0.0564, "lr": 1.3459607183792945e-06, "epoch": 0.44891378139850646, "percentage": 44.89, "elapsed_time": "0:12:47", "remaining_time": "0:15:42", "throughput": 6611.67, "total_tokens": 5076032} +{"current_steps": 5295, "total_steps": 11784, "loss": 0.0543, "lr": 1.344570619366689e-06, "epoch": 0.4493380855397149, "percentage": 44.93, "elapsed_time": "0:12:48", "remaining_time": "0:15:41", "throughput": 6614.54, "total_tokens": 5080384} +{"current_steps": 5300, "total_steps": 11784, "loss": 0.0623, "lr": 1.3431797643979894e-06, "epoch": 0.4497623896809233, "percentage": 44.98, "elapsed_time": "0:12:48", "remaining_time": "0:15:40", "throughput": 6618.07, "total_tokens": 5085376} +{"current_steps": 5305, "total_steps": 11784, "loss": 0.0338, "lr": 1.3417881565246027e-06, "epoch": 0.4501866938221317, "percentage": 45.02, "elapsed_time": "0:12:48", "remaining_time": "0:15:38", "throughput": 6621.35, "total_tokens": 5090112} +{"current_steps": 5310, "total_steps": 11784, "loss": 0.05, "lr": 1.3403957987995882e-06, "epoch": 0.45061099796334014, "percentage": 45.06, "elapsed_time": "0:12:49", "remaining_time": "0:15:37", "throughput": 6625.25, "total_tokens": 5095424} +{"current_steps": 5310, "total_steps": 11784, "eval_loss": 0.06341014802455902, "epoch": 0.45061099796334014, "percentage": 45.06, "elapsed_time": "0:13:04", "remaining_time": "0:15:56", "throughput": 6491.69, "total_tokens": 5095424} +{"current_steps": 5315, "total_steps": 11784, "loss": 0.0584, "lr": 1.33900269427765e-06, "epoch": 0.45103530210454856, "percentage": 45.1, "elapsed_time": "0:13:31", "remaining_time": "0:16:27", "throughput": 6283.83, "total_tokens": 5100864} +{"current_steps": 5320, "total_steps": 11784, "loss": 0.0825, "lr": 1.3376088460151306e-06, "epoch": 0.451459606245757, "percentage": 45.15, "elapsed_time": "0:13:32", "remaining_time": "0:16:26", "throughput": 6286.47, "total_tokens": 5105088} +{"current_steps": 5325, "total_steps": 11784, "loss": 0.044, "lr": 1.336214257070004e-06, "epoch": 0.4518839103869654, "percentage": 45.19, "elapsed_time": "0:13:32", "remaining_time": "0:16:25", "throughput": 6289.68, "total_tokens": 5109760} +{"current_steps": 5330, "total_steps": 11784, "loss": 0.0885, "lr": 1.3348189305018702e-06, "epoch": 0.45230821452817377, "percentage": 45.23, "elapsed_time": "0:13:32", "remaining_time": "0:16:24", "throughput": 6292.6, "total_tokens": 5114176} +{"current_steps": 5335, "total_steps": 11784, "loss": 0.0254, "lr": 1.3334228693719464e-06, "epoch": 0.4527325186693822, "percentage": 45.27, "elapsed_time": "0:13:33", "remaining_time": "0:16:22", "throughput": 6295.51, "total_tokens": 5118592} +{"current_steps": 5340, "total_steps": 11784, "loss": 0.1096, "lr": 1.3320260767430614e-06, "epoch": 0.4531568228105906, "percentage": 45.32, "elapsed_time": "0:13:33", "remaining_time": "0:16:21", "throughput": 6299.03, "total_tokens": 5123584} +{"current_steps": 5345, "total_steps": 11784, "loss": 0.0228, "lr": 1.3306285556796492e-06, "epoch": 0.45358112695179903, "percentage": 45.36, "elapsed_time": "0:13:33", "remaining_time": "0:16:20", "throughput": 6302.16, "total_tokens": 5128192} +{"current_steps": 5350, "total_steps": 11784, "loss": 0.0764, "lr": 1.3292303092477424e-06, "epoch": 0.45400543109300745, "percentage": 45.4, "elapsed_time": "0:13:34", "remaining_time": "0:16:18", "throughput": 6305.34, "total_tokens": 5132864} +{"current_steps": 5355, "total_steps": 11784, "loss": 0.0411, "lr": 1.3278313405149638e-06, "epoch": 0.45442973523421587, "percentage": 45.44, "elapsed_time": "0:13:34", "remaining_time": "0:16:17", "throughput": 6308.19, "total_tokens": 5137216} +{"current_steps": 5360, "total_steps": 11784, "loss": 0.0462, "lr": 1.3264316525505216e-06, "epoch": 0.4548540393754243, "percentage": 45.49, "elapsed_time": "0:13:34", "remaining_time": "0:16:16", "throughput": 6312.03, "total_tokens": 5142528} +{"current_steps": 5365, "total_steps": 11784, "loss": 0.0102, "lr": 1.3250312484252021e-06, "epoch": 0.4552783435166327, "percentage": 45.53, "elapsed_time": "0:13:35", "remaining_time": "0:16:15", "throughput": 6316.0, "total_tokens": 5147968} +{"current_steps": 5370, "total_steps": 11784, "loss": 0.0413, "lr": 1.3236301312113627e-06, "epoch": 0.45570264765784113, "percentage": 45.57, "elapsed_time": "0:13:35", "remaining_time": "0:16:13", "throughput": 6318.9, "total_tokens": 5152384} +{"current_steps": 5375, "total_steps": 11784, "loss": 0.1069, "lr": 1.3222283039829247e-06, "epoch": 0.45612695179904955, "percentage": 45.61, "elapsed_time": "0:13:35", "remaining_time": "0:16:12", "throughput": 6321.99, "total_tokens": 5156992} +{"current_steps": 5380, "total_steps": 11784, "loss": 0.0965, "lr": 1.3208257698153676e-06, "epoch": 0.45655125594025797, "percentage": 45.66, "elapsed_time": "0:13:36", "remaining_time": "0:16:11", "throughput": 6325.5, "total_tokens": 5161984} +{"current_steps": 5385, "total_steps": 11784, "loss": 0.0604, "lr": 1.3194225317857216e-06, "epoch": 0.4569755600814664, "percentage": 45.7, "elapsed_time": "0:13:36", "remaining_time": "0:16:10", "throughput": 6329.07, "total_tokens": 5167040} +{"current_steps": 5390, "total_steps": 11784, "loss": 0.0475, "lr": 1.3180185929725616e-06, "epoch": 0.4573998642226748, "percentage": 45.74, "elapsed_time": "0:13:36", "remaining_time": "0:16:08", "throughput": 6332.29, "total_tokens": 5171776} +{"current_steps": 5395, "total_steps": 11784, "loss": 0.117, "lr": 1.3166139564559992e-06, "epoch": 0.45782416836388323, "percentage": 45.78, "elapsed_time": "0:13:37", "remaining_time": "0:16:07", "throughput": 6335.92, "total_tokens": 5176896} +{"current_steps": 5400, "total_steps": 11784, "loss": 0.0416, "lr": 1.3152086253176773e-06, "epoch": 0.45824847250509165, "percentage": 45.82, "elapsed_time": "0:13:37", "remaining_time": "0:16:06", "throughput": 6338.8, "total_tokens": 5181312} +{"current_steps": 5405, "total_steps": 11784, "loss": 0.008, "lr": 1.313802602640763e-06, "epoch": 0.4586727766463001, "percentage": 45.87, "elapsed_time": "0:13:37", "remaining_time": "0:16:05", "throughput": 6342.07, "total_tokens": 5186112} +{"current_steps": 5410, "total_steps": 11784, "loss": 0.0387, "lr": 1.3123958915099392e-06, "epoch": 0.4590970807875085, "percentage": 45.91, "elapsed_time": "0:13:38", "remaining_time": "0:16:03", "throughput": 6345.47, "total_tokens": 5191040} +{"current_steps": 5415, "total_steps": 11784, "loss": 0.0441, "lr": 1.3109884950114005e-06, "epoch": 0.4595213849287169, "percentage": 45.95, "elapsed_time": "0:13:38", "remaining_time": "0:16:02", "throughput": 6348.47, "total_tokens": 5195584} +{"current_steps": 5420, "total_steps": 11784, "loss": 0.0799, "lr": 1.309580416232845e-06, "epoch": 0.45994568906992533, "percentage": 45.99, "elapsed_time": "0:13:38", "remaining_time": "0:16:01", "throughput": 6351.6, "total_tokens": 5200256} +{"current_steps": 5425, "total_steps": 11784, "loss": 0.0576, "lr": 1.3081716582634672e-06, "epoch": 0.46036999321113375, "percentage": 46.04, "elapsed_time": "0:13:39", "remaining_time": "0:16:00", "throughput": 6355.23, "total_tokens": 5205376} +{"current_steps": 5430, "total_steps": 11784, "loss": 0.0254, "lr": 1.3067622241939518e-06, "epoch": 0.4607942973523422, "percentage": 46.08, "elapsed_time": "0:13:39", "remaining_time": "0:15:58", "throughput": 6359.75, "total_tokens": 5211392} +{"current_steps": 5435, "total_steps": 11784, "loss": 0.0247, "lr": 1.305352117116467e-06, "epoch": 0.4612186014935506, "percentage": 46.12, "elapsed_time": "0:13:39", "remaining_time": "0:15:57", "throughput": 6362.41, "total_tokens": 5215616} +{"current_steps": 5440, "total_steps": 11784, "loss": 0.0862, "lr": 1.3039413401246576e-06, "epoch": 0.461642905634759, "percentage": 46.16, "elapsed_time": "0:13:40", "remaining_time": "0:15:56", "throughput": 6365.88, "total_tokens": 5220608} +{"current_steps": 5445, "total_steps": 11784, "loss": 0.1111, "lr": 1.3025298963136377e-06, "epoch": 0.46206720977596744, "percentage": 46.21, "elapsed_time": "0:13:40", "remaining_time": "0:15:55", "throughput": 6369.09, "total_tokens": 5225344} +{"current_steps": 5450, "total_steps": 11784, "loss": 0.0417, "lr": 1.3011177887799844e-06, "epoch": 0.46249151391717586, "percentage": 46.25, "elapsed_time": "0:13:40", "remaining_time": "0:15:53", "throughput": 6372.72, "total_tokens": 5230464} +{"current_steps": 5455, "total_steps": 11784, "loss": 0.0548, "lr": 1.2997050206217315e-06, "epoch": 0.4629158180583843, "percentage": 46.29, "elapsed_time": "0:13:41", "remaining_time": "0:15:52", "throughput": 6375.97, "total_tokens": 5235264} +{"current_steps": 5460, "total_steps": 11784, "loss": 0.0883, "lr": 1.2982915949383614e-06, "epoch": 0.46334012219959264, "percentage": 46.33, "elapsed_time": "0:13:41", "remaining_time": "0:15:51", "throughput": 6378.95, "total_tokens": 5239808} +{"current_steps": 5465, "total_steps": 11784, "loss": 0.0629, "lr": 1.2968775148308002e-06, "epoch": 0.46376442634080106, "percentage": 46.38, "elapsed_time": "0:13:41", "remaining_time": "0:15:50", "throughput": 6381.98, "total_tokens": 5244416} +{"current_steps": 5470, "total_steps": 11784, "loss": 0.0882, "lr": 1.295462783401408e-06, "epoch": 0.4641887304820095, "percentage": 46.42, "elapsed_time": "0:13:42", "remaining_time": "0:15:48", "throughput": 6385.29, "total_tokens": 5249280} +{"current_steps": 5475, "total_steps": 11784, "loss": 0.1072, "lr": 1.2940474037539755e-06, "epoch": 0.4646130346232179, "percentage": 46.46, "elapsed_time": "0:13:42", "remaining_time": "0:15:47", "throughput": 6388.57, "total_tokens": 5254080} +{"current_steps": 5480, "total_steps": 11784, "loss": 0.0185, "lr": 1.2926313789937143e-06, "epoch": 0.4650373387644263, "percentage": 46.5, "elapsed_time": "0:13:42", "remaining_time": "0:15:46", "throughput": 6392.11, "total_tokens": 5259136} +{"current_steps": 5485, "total_steps": 11784, "loss": 0.0443, "lr": 1.2912147122272522e-06, "epoch": 0.46546164290563474, "percentage": 46.55, "elapsed_time": "0:13:43", "remaining_time": "0:15:45", "throughput": 6395.13, "total_tokens": 5263744} +{"current_steps": 5490, "total_steps": 11784, "loss": 0.0569, "lr": 1.289797406562625e-06, "epoch": 0.46588594704684316, "percentage": 46.59, "elapsed_time": "0:13:43", "remaining_time": "0:15:44", "throughput": 6398.37, "total_tokens": 5268544} +{"current_steps": 5495, "total_steps": 11784, "loss": 0.0288, "lr": 1.2883794651092704e-06, "epoch": 0.4663102511880516, "percentage": 46.63, "elapsed_time": "0:13:43", "remaining_time": "0:15:42", "throughput": 6401.53, "total_tokens": 5273280} +{"current_steps": 5500, "total_steps": 11784, "loss": 0.0231, "lr": 1.2869608909780212e-06, "epoch": 0.46673455532926, "percentage": 46.67, "elapsed_time": "0:13:44", "remaining_time": "0:15:41", "throughput": 6404.56, "total_tokens": 5277888} +{"current_steps": 5505, "total_steps": 11784, "loss": 0.0518, "lr": 1.2855416872810973e-06, "epoch": 0.4671588594704684, "percentage": 46.72, "elapsed_time": "0:13:44", "remaining_time": "0:15:40", "throughput": 6407.48, "total_tokens": 5282432} +{"current_steps": 5510, "total_steps": 11784, "loss": 0.0088, "lr": 1.284121857132101e-06, "epoch": 0.46758316361167684, "percentage": 46.76, "elapsed_time": "0:13:44", "remaining_time": "0:15:39", "throughput": 6412.05, "total_tokens": 5288512} +{"current_steps": 5515, "total_steps": 11784, "loss": 0.0204, "lr": 1.2827014036460082e-06, "epoch": 0.46800746775288526, "percentage": 46.8, "elapsed_time": "0:13:45", "remaining_time": "0:15:37", "throughput": 6414.72, "total_tokens": 5292800} +{"current_steps": 5520, "total_steps": 11784, "loss": 0.0274, "lr": 1.2812803299391628e-06, "epoch": 0.4684317718940937, "percentage": 46.84, "elapsed_time": "0:13:45", "remaining_time": "0:15:36", "throughput": 6418.22, "total_tokens": 5297856} +{"current_steps": 5525, "total_steps": 11784, "loss": 0.0859, "lr": 1.2798586391292689e-06, "epoch": 0.4688560760353021, "percentage": 46.89, "elapsed_time": "0:13:45", "remaining_time": "0:15:35", "throughput": 6421.6, "total_tokens": 5302784} +{"current_steps": 5530, "total_steps": 11784, "loss": 0.0555, "lr": 1.2784363343353848e-06, "epoch": 0.4692803801765105, "percentage": 46.93, "elapsed_time": "0:13:46", "remaining_time": "0:15:34", "throughput": 6424.87, "total_tokens": 5307648} +{"current_steps": 5535, "total_steps": 11784, "loss": 0.0496, "lr": 1.2770134186779158e-06, "epoch": 0.46970468431771895, "percentage": 46.97, "elapsed_time": "0:13:46", "remaining_time": "0:15:33", "throughput": 6427.27, "total_tokens": 5311680} +{"current_steps": 5540, "total_steps": 11784, "loss": 0.053, "lr": 1.2755898952786076e-06, "epoch": 0.47012898845892737, "percentage": 47.01, "elapsed_time": "0:13:46", "remaining_time": "0:15:31", "throughput": 6430.28, "total_tokens": 5316288} +{"current_steps": 5545, "total_steps": 11784, "loss": 0.0291, "lr": 1.2741657672605385e-06, "epoch": 0.4705532926001358, "percentage": 47.06, "elapsed_time": "0:13:47", "remaining_time": "0:15:30", "throughput": 6432.82, "total_tokens": 5320448} +{"current_steps": 5550, "total_steps": 11784, "loss": 0.0483, "lr": 1.272741037748114e-06, "epoch": 0.4709775967413442, "percentage": 47.1, "elapsed_time": "0:13:47", "remaining_time": "0:15:29", "throughput": 6435.66, "total_tokens": 5324928} +{"current_steps": 5555, "total_steps": 11784, "loss": 0.0429, "lr": 1.2713157098670588e-06, "epoch": 0.4714019008825526, "percentage": 47.14, "elapsed_time": "0:13:47", "remaining_time": "0:15:28", "throughput": 6438.94, "total_tokens": 5329792} +{"current_steps": 5560, "total_steps": 11784, "loss": 0.0697, "lr": 1.2698897867444112e-06, "epoch": 0.47182620502376105, "percentage": 47.18, "elapsed_time": "0:13:48", "remaining_time": "0:15:26", "throughput": 6442.26, "total_tokens": 5334720} +{"current_steps": 5565, "total_steps": 11784, "loss": 0.0769, "lr": 1.268463271508514e-06, "epoch": 0.47225050916496947, "percentage": 47.23, "elapsed_time": "0:13:48", "remaining_time": "0:15:25", "throughput": 6445.93, "total_tokens": 5339968} +{"current_steps": 5570, "total_steps": 11784, "loss": 0.0083, "lr": 1.2670361672890099e-06, "epoch": 0.4726748133061779, "percentage": 47.27, "elapsed_time": "0:13:48", "remaining_time": "0:15:24", "throughput": 6449.57, "total_tokens": 5345216} +{"current_steps": 5575, "total_steps": 11784, "loss": 0.116, "lr": 1.265608477216834e-06, "epoch": 0.4730991174473863, "percentage": 47.31, "elapsed_time": "0:13:49", "remaining_time": "0:15:23", "throughput": 6453.17, "total_tokens": 5350400} +{"current_steps": 5580, "total_steps": 11784, "loss": 0.0868, "lr": 1.2641802044242065e-06, "epoch": 0.47352342158859473, "percentage": 47.35, "elapsed_time": "0:13:49", "remaining_time": "0:15:22", "throughput": 6456.1, "total_tokens": 5354944} +{"current_steps": 5585, "total_steps": 11784, "loss": 0.1187, "lr": 1.2627513520446252e-06, "epoch": 0.47394772572980315, "percentage": 47.39, "elapsed_time": "0:13:49", "remaining_time": "0:15:20", "throughput": 6458.55, "total_tokens": 5359040} +{"current_steps": 5590, "total_steps": 11784, "loss": 0.0866, "lr": 1.2613219232128608e-06, "epoch": 0.4743720298710115, "percentage": 47.44, "elapsed_time": "0:13:50", "remaining_time": "0:15:19", "throughput": 6461.48, "total_tokens": 5363584} +{"current_steps": 5595, "total_steps": 11784, "loss": 0.0184, "lr": 1.2598919210649475e-06, "epoch": 0.47479633401221993, "percentage": 47.48, "elapsed_time": "0:13:50", "remaining_time": "0:15:18", "throughput": 6464.54, "total_tokens": 5368256} +{"current_steps": 5600, "total_steps": 11784, "loss": 0.0563, "lr": 1.2584613487381787e-06, "epoch": 0.47522063815342835, "percentage": 47.52, "elapsed_time": "0:13:50", "remaining_time": "0:15:17", "throughput": 6467.47, "total_tokens": 5372800} +{"current_steps": 5605, "total_steps": 11784, "loss": 0.0413, "lr": 1.257030209371097e-06, "epoch": 0.4756449422946368, "percentage": 47.56, "elapsed_time": "0:13:51", "remaining_time": "0:15:16", "throughput": 6470.33, "total_tokens": 5377280} +{"current_steps": 5610, "total_steps": 11784, "loss": 0.1219, "lr": 1.2555985061034902e-06, "epoch": 0.4760692464358452, "percentage": 47.61, "elapsed_time": "0:13:51", "remaining_time": "0:15:14", "throughput": 6473.64, "total_tokens": 5382208} +{"current_steps": 5615, "total_steps": 11784, "loss": 0.0935, "lr": 1.2541662420763832e-06, "epoch": 0.4764935505770536, "percentage": 47.65, "elapsed_time": "0:13:51", "remaining_time": "0:15:13", "throughput": 6476.62, "total_tokens": 5386816} +{"current_steps": 5620, "total_steps": 11784, "loss": 0.0457, "lr": 1.2527334204320306e-06, "epoch": 0.47691785471826204, "percentage": 47.69, "elapsed_time": "0:13:52", "remaining_time": "0:15:12", "throughput": 6479.53, "total_tokens": 5391360} +{"current_steps": 5625, "total_steps": 11784, "loss": 0.0584, "lr": 1.251300044313911e-06, "epoch": 0.47734215885947046, "percentage": 47.73, "elapsed_time": "0:13:52", "remaining_time": "0:15:11", "throughput": 6482.44, "total_tokens": 5395904} +{"current_steps": 5630, "total_steps": 11784, "loss": 0.0555, "lr": 1.2498661168667188e-06, "epoch": 0.4777664630006789, "percentage": 47.78, "elapsed_time": "0:13:52", "remaining_time": "0:15:10", "throughput": 6485.33, "total_tokens": 5400448} +{"current_steps": 5635, "total_steps": 11784, "loss": 0.0431, "lr": 1.2484316412363585e-06, "epoch": 0.4781907671418873, "percentage": 47.82, "elapsed_time": "0:13:53", "remaining_time": "0:15:09", "throughput": 6489.08, "total_tokens": 5405824} +{"current_steps": 5640, "total_steps": 11784, "loss": 0.0407, "lr": 1.246996620569937e-06, "epoch": 0.4786150712830957, "percentage": 47.86, "elapsed_time": "0:13:53", "remaining_time": "0:15:07", "throughput": 6492.31, "total_tokens": 5410688} +{"current_steps": 5645, "total_steps": 11784, "loss": 0.0646, "lr": 1.245561058015757e-06, "epoch": 0.47903937542430414, "percentage": 47.9, "elapsed_time": "0:13:53", "remaining_time": "0:15:06", "throughput": 6495.27, "total_tokens": 5415296} +{"current_steps": 5650, "total_steps": 11784, "loss": 0.1001, "lr": 1.2441249567233098e-06, "epoch": 0.47946367956551256, "percentage": 47.95, "elapsed_time": "0:13:54", "remaining_time": "0:15:05", "throughput": 6497.93, "total_tokens": 5419648} +{"current_steps": 5655, "total_steps": 11784, "loss": 0.0398, "lr": 1.2426883198432696e-06, "epoch": 0.479887983706721, "percentage": 47.99, "elapsed_time": "0:13:54", "remaining_time": "0:15:04", "throughput": 6501.18, "total_tokens": 5424576} +{"current_steps": 5660, "total_steps": 11784, "loss": 0.033, "lr": 1.2412511505274844e-06, "epoch": 0.4803122878479294, "percentage": 48.03, "elapsed_time": "0:13:54", "remaining_time": "0:15:03", "throughput": 6504.11, "total_tokens": 5429184} +{"current_steps": 5665, "total_steps": 11784, "loss": 0.0589, "lr": 1.2398134519289708e-06, "epoch": 0.4807365919891378, "percentage": 48.07, "elapsed_time": "0:13:55", "remaining_time": "0:15:01", "throughput": 6506.78, "total_tokens": 5433536} +{"current_steps": 5670, "total_steps": 11784, "loss": 0.1094, "lr": 1.2383752272019071e-06, "epoch": 0.48116089613034624, "percentage": 48.12, "elapsed_time": "0:13:55", "remaining_time": "0:15:00", "throughput": 6510.07, "total_tokens": 5438464} +{"current_steps": 5675, "total_steps": 11784, "loss": 0.0679, "lr": 1.2369364795016252e-06, "epoch": 0.48158520027155466, "percentage": 48.16, "elapsed_time": "0:13:55", "remaining_time": "0:14:59", "throughput": 6513.09, "total_tokens": 5443136} +{"current_steps": 5680, "total_steps": 11784, "loss": 0.0332, "lr": 1.2354972119846045e-06, "epoch": 0.4820095044127631, "percentage": 48.2, "elapsed_time": "0:13:56", "remaining_time": "0:14:58", "throughput": 6516.05, "total_tokens": 5447744} +{"current_steps": 5685, "total_steps": 11784, "loss": 0.0926, "lr": 1.2340574278084648e-06, "epoch": 0.4824338085539715, "percentage": 48.24, "elapsed_time": "0:13:56", "remaining_time": "0:14:57", "throughput": 6519.48, "total_tokens": 5452800} +{"current_steps": 5690, "total_steps": 11784, "loss": 0.0565, "lr": 1.23261713013196e-06, "epoch": 0.4828581126951799, "percentage": 48.29, "elapsed_time": "0:13:56", "remaining_time": "0:14:56", "throughput": 6522.51, "total_tokens": 5457472} +{"current_steps": 5695, "total_steps": 11784, "loss": 0.0754, "lr": 1.2311763221149697e-06, "epoch": 0.48328241683638834, "percentage": 48.33, "elapsed_time": "0:13:57", "remaining_time": "0:14:54", "throughput": 6525.66, "total_tokens": 5462272} +{"current_steps": 5700, "total_steps": 11784, "loss": 0.0176, "lr": 1.2297350069184935e-06, "epoch": 0.48370672097759676, "percentage": 48.37, "elapsed_time": "0:13:57", "remaining_time": "0:14:53", "throughput": 6528.6, "total_tokens": 5466880} +{"current_steps": 5705, "total_steps": 11784, "loss": 0.0447, "lr": 1.228293187704644e-06, "epoch": 0.4841310251188052, "percentage": 48.41, "elapsed_time": "0:13:57", "remaining_time": "0:14:52", "throughput": 6531.67, "total_tokens": 5471616} +{"current_steps": 5710, "total_steps": 11784, "loss": 0.0748, "lr": 1.2268508676366393e-06, "epoch": 0.4845553292600136, "percentage": 48.46, "elapsed_time": "0:13:58", "remaining_time": "0:14:51", "throughput": 6534.52, "total_tokens": 5476160} +{"current_steps": 5715, "total_steps": 11784, "loss": 0.1343, "lr": 1.225408049878796e-06, "epoch": 0.484979633401222, "percentage": 48.5, "elapsed_time": "0:13:58", "remaining_time": "0:14:50", "throughput": 6537.65, "total_tokens": 5480960} +{"current_steps": 5720, "total_steps": 11784, "loss": 0.062, "lr": 1.223964737596523e-06, "epoch": 0.48540393754243044, "percentage": 48.54, "elapsed_time": "0:13:58", "remaining_time": "0:14:49", "throughput": 6541.55, "total_tokens": 5486528} +{"current_steps": 5725, "total_steps": 11784, "loss": 0.0411, "lr": 1.2225209339563143e-06, "epoch": 0.4858282416836388, "percentage": 48.58, "elapsed_time": "0:13:59", "remaining_time": "0:14:48", "throughput": 6544.78, "total_tokens": 5491456} +{"current_steps": 5730, "total_steps": 11784, "loss": 0.0602, "lr": 1.2210766421257419e-06, "epoch": 0.4862525458248472, "percentage": 48.63, "elapsed_time": "0:13:59", "remaining_time": "0:14:46", "throughput": 6548.29, "total_tokens": 5496640} +{"current_steps": 5735, "total_steps": 11784, "loss": 0.0293, "lr": 1.2196318652734477e-06, "epoch": 0.48667684996605565, "percentage": 48.67, "elapsed_time": "0:13:59", "remaining_time": "0:14:45", "throughput": 6551.33, "total_tokens": 5501376} +{"current_steps": 5740, "total_steps": 11784, "loss": 0.0521, "lr": 1.2181866065691392e-06, "epoch": 0.48710115410726407, "percentage": 48.71, "elapsed_time": "0:14:00", "remaining_time": "0:14:44", "throughput": 6554.11, "total_tokens": 5505856} +{"current_steps": 5745, "total_steps": 11784, "loss": 0.0564, "lr": 1.2167408691835807e-06, "epoch": 0.4875254582484725, "percentage": 48.75, "elapsed_time": "0:14:00", "remaining_time": "0:14:43", "throughput": 6557.31, "total_tokens": 5510720} +{"current_steps": 5750, "total_steps": 11784, "loss": 0.0466, "lr": 1.2152946562885857e-06, "epoch": 0.4879497623896809, "percentage": 48.79, "elapsed_time": "0:14:00", "remaining_time": "0:14:42", "throughput": 6559.76, "total_tokens": 5514880} +{"current_steps": 5755, "total_steps": 11784, "loss": 0.0491, "lr": 1.2138479710570123e-06, "epoch": 0.48837406653088933, "percentage": 48.84, "elapsed_time": "0:14:01", "remaining_time": "0:14:41", "throughput": 6562.8, "total_tokens": 5519616} +{"current_steps": 5760, "total_steps": 11784, "loss": 0.0656, "lr": 1.2124008166627535e-06, "epoch": 0.48879837067209775, "percentage": 48.88, "elapsed_time": "0:14:01", "remaining_time": "0:14:39", "throughput": 6565.45, "total_tokens": 5523968} +{"current_steps": 5765, "total_steps": 11784, "loss": 0.0673, "lr": 1.2109531962807332e-06, "epoch": 0.48922267481330617, "percentage": 48.92, "elapsed_time": "0:14:01", "remaining_time": "0:14:38", "throughput": 6568.78, "total_tokens": 5528960} +{"current_steps": 5770, "total_steps": 11784, "loss": 0.0817, "lr": 1.2095051130868959e-06, "epoch": 0.4896469789545146, "percentage": 48.96, "elapsed_time": "0:14:02", "remaining_time": "0:14:37", "throughput": 6573.6, "total_tokens": 5535488} +{"current_steps": 5775, "total_steps": 11784, "loss": 0.1018, "lr": 1.2080565702582027e-06, "epoch": 0.490071283095723, "percentage": 49.01, "elapsed_time": "0:14:02", "remaining_time": "0:14:36", "throughput": 6576.68, "total_tokens": 5540288} +{"current_steps": 5780, "total_steps": 11784, "loss": 0.0485, "lr": 1.2066075709726225e-06, "epoch": 0.49049558723693143, "percentage": 49.05, "elapsed_time": "0:14:02", "remaining_time": "0:14:35", "throughput": 6580.51, "total_tokens": 5545792} +{"current_steps": 5785, "total_steps": 11784, "loss": 0.0674, "lr": 1.2051581184091263e-06, "epoch": 0.49091989137813985, "percentage": 49.09, "elapsed_time": "0:14:03", "remaining_time": "0:14:34", "throughput": 6583.36, "total_tokens": 5550336} +{"current_steps": 5790, "total_steps": 11784, "loss": 0.0727, "lr": 1.2037082157476782e-06, "epoch": 0.49134419551934827, "percentage": 49.13, "elapsed_time": "0:14:03", "remaining_time": "0:14:33", "throughput": 6586.64, "total_tokens": 5555328} +{"current_steps": 5795, "total_steps": 11784, "loss": 0.0649, "lr": 1.2022578661692312e-06, "epoch": 0.4917684996605567, "percentage": 49.18, "elapsed_time": "0:14:03", "remaining_time": "0:14:32", "throughput": 6590.51, "total_tokens": 5560896} +{"current_steps": 5800, "total_steps": 11784, "loss": 0.0633, "lr": 1.2008070728557185e-06, "epoch": 0.4921928038017651, "percentage": 49.22, "elapsed_time": "0:14:04", "remaining_time": "0:14:30", "throughput": 6593.73, "total_tokens": 5565824} +{"current_steps": 5805, "total_steps": 11784, "loss": 0.0425, "lr": 1.1993558389900462e-06, "epoch": 0.49261710794297353, "percentage": 49.26, "elapsed_time": "0:14:04", "remaining_time": "0:14:29", "throughput": 6596.54, "total_tokens": 5570368} +{"current_steps": 5810, "total_steps": 11784, "loss": 0.0633, "lr": 1.197904167756087e-06, "epoch": 0.49304141208418195, "percentage": 49.3, "elapsed_time": "0:14:04", "remaining_time": "0:14:28", "throughput": 6599.31, "total_tokens": 5574848} +{"current_steps": 5815, "total_steps": 11784, "loss": 0.0689, "lr": 1.1964520623386741e-06, "epoch": 0.49346571622539037, "percentage": 49.35, "elapsed_time": "0:14:05", "remaining_time": "0:14:27", "throughput": 6602.17, "total_tokens": 5579456} +{"current_steps": 5820, "total_steps": 11784, "loss": 0.0423, "lr": 1.1949995259235919e-06, "epoch": 0.4938900203665988, "percentage": 49.39, "elapsed_time": "0:14:05", "remaining_time": "0:14:26", "throughput": 6605.4, "total_tokens": 5584384} +{"current_steps": 5825, "total_steps": 11784, "loss": 0.123, "lr": 1.1935465616975716e-06, "epoch": 0.4943143245078072, "percentage": 49.43, "elapsed_time": "0:14:05", "remaining_time": "0:14:25", "throughput": 6608.94, "total_tokens": 5589632} +{"current_steps": 5830, "total_steps": 11784, "loss": 0.0281, "lr": 1.192093172848282e-06, "epoch": 0.49473862864901563, "percentage": 49.47, "elapsed_time": "0:14:06", "remaining_time": "0:14:24", "throughput": 6611.63, "total_tokens": 5594048} +{"current_steps": 5835, "total_steps": 11784, "loss": 0.0515, "lr": 1.1906393625643242e-06, "epoch": 0.49516293279022405, "percentage": 49.52, "elapsed_time": "0:14:06", "remaining_time": "0:14:22", "throughput": 6614.57, "total_tokens": 5598720} +{"current_steps": 5840, "total_steps": 11784, "loss": 0.1113, "lr": 1.1891851340352235e-06, "epoch": 0.4955872369314325, "percentage": 49.56, "elapsed_time": "0:14:06", "remaining_time": "0:14:21", "throughput": 6617.23, "total_tokens": 5603136} +{"current_steps": 5845, "total_steps": 11784, "loss": 0.0388, "lr": 1.1877304904514232e-06, "epoch": 0.4960115410726409, "percentage": 49.6, "elapsed_time": "0:14:07", "remaining_time": "0:14:20", "throughput": 6620.25, "total_tokens": 5607872} +{"current_steps": 5850, "total_steps": 11784, "loss": 0.039, "lr": 1.1862754350042764e-06, "epoch": 0.4964358452138493, "percentage": 49.64, "elapsed_time": "0:14:07", "remaining_time": "0:14:19", "throughput": 6622.99, "total_tokens": 5612352} +{"current_steps": 5855, "total_steps": 11784, "loss": 0.077, "lr": 1.1848199708860404e-06, "epoch": 0.4968601493550577, "percentage": 49.69, "elapsed_time": "0:14:07", "remaining_time": "0:14:18", "throughput": 6626.38, "total_tokens": 5617472} +{"current_steps": 5860, "total_steps": 11784, "loss": 0.0934, "lr": 1.183364101289869e-06, "epoch": 0.4972844534962661, "percentage": 49.73, "elapsed_time": "0:14:08", "remaining_time": "0:14:17", "throughput": 6628.98, "total_tokens": 5621824} +{"current_steps": 5865, "total_steps": 11784, "loss": 0.0631, "lr": 1.1819078294098057e-06, "epoch": 0.4977087576374745, "percentage": 49.77, "elapsed_time": "0:14:08", "remaining_time": "0:14:16", "throughput": 6631.69, "total_tokens": 5626304} +{"current_steps": 5870, "total_steps": 11784, "loss": 0.0413, "lr": 1.180451158440776e-06, "epoch": 0.49813306177868294, "percentage": 49.81, "elapsed_time": "0:14:08", "remaining_time": "0:14:15", "throughput": 6635.33, "total_tokens": 5631680} +{"current_steps": 5875, "total_steps": 11784, "loss": 0.115, "lr": 1.1789940915785823e-06, "epoch": 0.49855736591989136, "percentage": 49.86, "elapsed_time": "0:14:09", "remaining_time": "0:14:13", "throughput": 6637.8, "total_tokens": 5635904} +{"current_steps": 5880, "total_steps": 11784, "loss": 0.0422, "lr": 1.177536632019894e-06, "epoch": 0.4989816700610998, "percentage": 49.9, "elapsed_time": "0:14:09", "remaining_time": "0:14:12", "throughput": 6640.66, "total_tokens": 5640512} +{"current_steps": 5885, "total_steps": 11784, "loss": 0.0691, "lr": 1.1760787829622423e-06, "epoch": 0.4994059742023082, "percentage": 49.94, "elapsed_time": "0:14:09", "remaining_time": "0:14:11", "throughput": 6644.83, "total_tokens": 5646464} +{"current_steps": 5890, "total_steps": 11784, "loss": 0.0249, "lr": 1.1746205476040137e-06, "epoch": 0.4998302783435166, "percentage": 49.98, "elapsed_time": "0:14:10", "remaining_time": "0:14:10", "throughput": 6647.63, "total_tokens": 5651008} +{"current_steps": 5895, "total_steps": 11784, "loss": 0.0536, "lr": 1.173161929144442e-06, "epoch": 0.5002545824847251, "percentage": 50.03, "elapsed_time": "0:14:10", "remaining_time": "0:14:09", "throughput": 6650.46, "total_tokens": 5655616} +{"current_steps": 5900, "total_steps": 11784, "loss": 0.0293, "lr": 1.171702930783601e-06, "epoch": 0.5006788866259335, "percentage": 50.07, "elapsed_time": "0:14:10", "remaining_time": "0:14:08", "throughput": 6653.41, "total_tokens": 5660352} +{"current_steps": 5900, "total_steps": 11784, "eval_loss": 0.05498848110437393, "epoch": 0.5006788866259335, "percentage": 50.07, "elapsed_time": "0:14:26", "remaining_time": "0:14:24", "throughput": 6531.54, "total_tokens": 5660352} +{"current_steps": 5905, "total_steps": 11784, "loss": 0.0627, "lr": 1.1702435557223986e-06, "epoch": 0.5011031907671419, "percentage": 50.11, "elapsed_time": "0:15:05", "remaining_time": "0:15:01", "throughput": 6253.95, "total_tokens": 5664832} +{"current_steps": 5910, "total_steps": 11784, "loss": 0.0832, "lr": 1.1687838071625684e-06, "epoch": 0.5015274949083504, "percentage": 50.15, "elapsed_time": "0:15:06", "remaining_time": "0:15:00", "throughput": 6257.13, "total_tokens": 5669824} +{"current_steps": 5915, "total_steps": 11784, "loss": 0.0744, "lr": 1.167323688306664e-06, "epoch": 0.5019517990495588, "percentage": 50.2, "elapsed_time": "0:15:06", "remaining_time": "0:14:59", "throughput": 6259.75, "total_tokens": 5674240} +{"current_steps": 5920, "total_steps": 11784, "loss": 0.0557, "lr": 1.1658632023580515e-06, "epoch": 0.5023761031907671, "percentage": 50.24, "elapsed_time": "0:15:06", "remaining_time": "0:14:58", "throughput": 6262.97, "total_tokens": 5679296} +{"current_steps": 5925, "total_steps": 11784, "loss": 0.0411, "lr": 1.1644023525209014e-06, "epoch": 0.5028004073319755, "percentage": 50.28, "elapsed_time": "0:15:07", "remaining_time": "0:14:57", "throughput": 6265.73, "total_tokens": 5683840} +{"current_steps": 5930, "total_steps": 11784, "loss": 0.0754, "lr": 1.162941142000184e-06, "epoch": 0.5032247114731839, "percentage": 50.32, "elapsed_time": "0:15:07", "remaining_time": "0:14:55", "throughput": 6268.97, "total_tokens": 5688896} +{"current_steps": 5935, "total_steps": 11784, "loss": 0.0428, "lr": 1.1614795740016598e-06, "epoch": 0.5036490156143923, "percentage": 50.36, "elapsed_time": "0:15:07", "remaining_time": "0:14:54", "throughput": 6271.72, "total_tokens": 5693440} +{"current_steps": 5940, "total_steps": 11784, "loss": 0.073, "lr": 1.160017651731874e-06, "epoch": 0.5040733197556008, "percentage": 50.41, "elapsed_time": "0:15:08", "remaining_time": "0:14:53", "throughput": 6274.41, "total_tokens": 5697920} +{"current_steps": 5945, "total_steps": 11784, "loss": 0.0417, "lr": 1.1585553783981486e-06, "epoch": 0.5044976238968092, "percentage": 50.45, "elapsed_time": "0:15:08", "remaining_time": "0:14:52", "throughput": 6277.22, "total_tokens": 5702528} +{"current_steps": 5950, "total_steps": 11784, "loss": 0.0924, "lr": 1.1570927572085766e-06, "epoch": 0.5049219280380176, "percentage": 50.49, "elapsed_time": "0:15:08", "remaining_time": "0:14:51", "throughput": 6280.42, "total_tokens": 5707584} +{"current_steps": 5955, "total_steps": 11784, "loss": 0.046, "lr": 1.1556297913720137e-06, "epoch": 0.505346232179226, "percentage": 50.53, "elapsed_time": "0:15:09", "remaining_time": "0:14:49", "throughput": 6283.19, "total_tokens": 5712192} +{"current_steps": 5960, "total_steps": 11784, "loss": 0.1587, "lr": 1.1541664840980715e-06, "epoch": 0.5057705363204344, "percentage": 50.58, "elapsed_time": "0:15:09", "remaining_time": "0:14:48", "throughput": 6286.77, "total_tokens": 5717632} +{"current_steps": 5965, "total_steps": 11784, "loss": 0.0854, "lr": 1.1527028385971107e-06, "epoch": 0.5061948404616429, "percentage": 50.62, "elapsed_time": "0:15:09", "remaining_time": "0:14:47", "throughput": 6289.51, "total_tokens": 5722176} +{"current_steps": 5970, "total_steps": 11784, "loss": 0.0459, "lr": 1.1512388580802348e-06, "epoch": 0.5066191446028513, "percentage": 50.66, "elapsed_time": "0:15:10", "remaining_time": "0:14:46", "throughput": 6292.25, "total_tokens": 5726720} +{"current_steps": 5975, "total_steps": 11784, "loss": 0.0672, "lr": 1.1497745457592815e-06, "epoch": 0.5070434487440597, "percentage": 50.7, "elapsed_time": "0:15:10", "remaining_time": "0:14:45", "throughput": 6295.06, "total_tokens": 5731328} +{"current_steps": 5980, "total_steps": 11784, "loss": 0.0614, "lr": 1.1483099048468168e-06, "epoch": 0.5074677528852681, "percentage": 50.75, "elapsed_time": "0:15:10", "remaining_time": "0:14:43", "throughput": 6298.14, "total_tokens": 5736256} +{"current_steps": 5985, "total_steps": 11784, "loss": 0.0764, "lr": 1.1468449385561272e-06, "epoch": 0.5078920570264766, "percentage": 50.79, "elapsed_time": "0:15:11", "remaining_time": "0:14:42", "throughput": 6301.26, "total_tokens": 5741248} +{"current_steps": 5990, "total_steps": 11784, "loss": 0.0344, "lr": 1.145379650101214e-06, "epoch": 0.508316361167685, "percentage": 50.83, "elapsed_time": "0:15:11", "remaining_time": "0:14:41", "throughput": 6304.47, "total_tokens": 5746304} +{"current_steps": 5995, "total_steps": 11784, "loss": 0.097, "lr": 1.143914042696784e-06, "epoch": 0.5087406653088934, "percentage": 50.87, "elapsed_time": "0:15:11", "remaining_time": "0:14:40", "throughput": 6307.85, "total_tokens": 5751552} +{"current_steps": 6000, "total_steps": 11784, "loss": 0.0568, "lr": 1.1424481195582445e-06, "epoch": 0.5091649694501018, "percentage": 50.92, "elapsed_time": "0:15:12", "remaining_time": "0:14:39", "throughput": 6310.52, "total_tokens": 5756032} +{"current_steps": 6005, "total_steps": 11784, "loss": 0.0818, "lr": 1.1409818839016958e-06, "epoch": 0.5095892735913102, "percentage": 50.96, "elapsed_time": "0:15:12", "remaining_time": "0:14:38", "throughput": 6314.2, "total_tokens": 5761600} +{"current_steps": 6010, "total_steps": 11784, "loss": 0.0286, "lr": 1.1395153389439231e-06, "epoch": 0.5100135777325187, "percentage": 51.0, "elapsed_time": "0:15:12", "remaining_time": "0:14:36", "throughput": 6317.1, "total_tokens": 5766336} +{"current_steps": 6015, "total_steps": 11784, "loss": 0.0889, "lr": 1.1380484879023903e-06, "epoch": 0.5104378818737271, "percentage": 51.04, "elapsed_time": "0:15:13", "remaining_time": "0:14:35", "throughput": 6320.3, "total_tokens": 5771392} +{"current_steps": 6020, "total_steps": 11784, "loss": 0.0375, "lr": 1.1365813339952334e-06, "epoch": 0.5108621860149355, "percentage": 51.09, "elapsed_time": "0:15:13", "remaining_time": "0:14:34", "throughput": 6322.88, "total_tokens": 5775808} +{"current_steps": 6025, "total_steps": 11784, "loss": 0.0905, "lr": 1.1351138804412524e-06, "epoch": 0.5112864901561439, "percentage": 51.13, "elapsed_time": "0:15:13", "remaining_time": "0:14:33", "throughput": 6325.97, "total_tokens": 5780800} +{"current_steps": 6030, "total_steps": 11784, "loss": 0.031, "lr": 1.1336461304599047e-06, "epoch": 0.5117107942973523, "percentage": 51.17, "elapsed_time": "0:15:14", "remaining_time": "0:14:32", "throughput": 6329.53, "total_tokens": 5786304} +{"current_steps": 6035, "total_steps": 11784, "loss": 0.0295, "lr": 1.1321780872712983e-06, "epoch": 0.5121350984385608, "percentage": 51.21, "elapsed_time": "0:15:14", "remaining_time": "0:14:31", "throughput": 6332.72, "total_tokens": 5791360} +{"current_steps": 6040, "total_steps": 11784, "loss": 0.019, "lr": 1.1307097540961838e-06, "epoch": 0.5125594025797692, "percentage": 51.26, "elapsed_time": "0:15:14", "remaining_time": "0:14:30", "throughput": 6335.34, "total_tokens": 5795840} +{"current_steps": 6045, "total_steps": 11784, "loss": 0.0386, "lr": 1.129241134155949e-06, "epoch": 0.5129837067209776, "percentage": 51.3, "elapsed_time": "0:15:15", "remaining_time": "0:14:28", "throughput": 6338.21, "total_tokens": 5800576} +{"current_steps": 6050, "total_steps": 11784, "loss": 0.0621, "lr": 1.1277722306726103e-06, "epoch": 0.513408010862186, "percentage": 51.34, "elapsed_time": "0:15:15", "remaining_time": "0:14:27", "throughput": 6341.38, "total_tokens": 5805632} +{"current_steps": 6055, "total_steps": 11784, "loss": 0.0254, "lr": 1.1263030468688057e-06, "epoch": 0.5138323150033944, "percentage": 51.38, "elapsed_time": "0:15:15", "remaining_time": "0:14:26", "throughput": 6344.56, "total_tokens": 5810688} +{"current_steps": 6060, "total_steps": 11784, "loss": 0.0513, "lr": 1.1248335859677891e-06, "epoch": 0.5142566191446029, "percentage": 51.43, "elapsed_time": "0:15:16", "remaining_time": "0:14:25", "throughput": 6347.6, "total_tokens": 5815616} +{"current_steps": 6065, "total_steps": 11784, "loss": 0.0772, "lr": 1.1233638511934218e-06, "epoch": 0.5146809232858113, "percentage": 51.47, "elapsed_time": "0:15:16", "remaining_time": "0:14:24", "throughput": 6350.75, "total_tokens": 5820672} +{"current_steps": 6070, "total_steps": 11784, "loss": 0.0595, "lr": 1.121893845770166e-06, "epoch": 0.5151052274270197, "percentage": 51.51, "elapsed_time": "0:15:16", "remaining_time": "0:14:23", "throughput": 6353.13, "total_tokens": 5824896} +{"current_steps": 6075, "total_steps": 11784, "loss": 0.0458, "lr": 1.120423572923078e-06, "epoch": 0.5155295315682281, "percentage": 51.55, "elapsed_time": "0:15:17", "remaining_time": "0:14:21", "throughput": 6355.98, "total_tokens": 5829632} +{"current_steps": 6080, "total_steps": 11784, "loss": 0.0292, "lr": 1.1189530358778004e-06, "epoch": 0.5159538357094365, "percentage": 51.6, "elapsed_time": "0:15:17", "remaining_time": "0:14:20", "throughput": 6358.68, "total_tokens": 5834240} +{"current_steps": 6085, "total_steps": 11784, "loss": 0.0951, "lr": 1.1174822378605551e-06, "epoch": 0.516378139850645, "percentage": 51.64, "elapsed_time": "0:15:17", "remaining_time": "0:14:19", "throughput": 6361.35, "total_tokens": 5838784} +{"current_steps": 6090, "total_steps": 11784, "loss": 0.0588, "lr": 1.116011182098138e-06, "epoch": 0.5168024439918534, "percentage": 51.68, "elapsed_time": "0:15:18", "remaining_time": "0:14:18", "throughput": 6363.75, "total_tokens": 5843072} +{"current_steps": 6095, "total_steps": 11784, "loss": 0.0476, "lr": 1.1145398718179085e-06, "epoch": 0.5172267481330618, "percentage": 51.72, "elapsed_time": "0:15:18", "remaining_time": "0:14:17", "throughput": 6366.19, "total_tokens": 5847360} +{"current_steps": 6100, "total_steps": 11784, "loss": 0.019, "lr": 1.1130683102477862e-06, "epoch": 0.5176510522742702, "percentage": 51.77, "elapsed_time": "0:15:18", "remaining_time": "0:14:16", "throughput": 6369.14, "total_tokens": 5852224} +{"current_steps": 6105, "total_steps": 11784, "loss": 0.0241, "lr": 1.1115965006162405e-06, "epoch": 0.5180753564154786, "percentage": 51.81, "elapsed_time": "0:15:19", "remaining_time": "0:14:15", "throughput": 6372.14, "total_tokens": 5857152} +{"current_steps": 6110, "total_steps": 11784, "loss": 0.1214, "lr": 1.110124446152286e-06, "epoch": 0.5184996605566871, "percentage": 51.85, "elapsed_time": "0:15:19", "remaining_time": "0:14:13", "throughput": 6374.98, "total_tokens": 5861888} +{"current_steps": 6115, "total_steps": 11784, "loss": 0.1294, "lr": 1.1086521500854744e-06, "epoch": 0.5189239646978955, "percentage": 51.89, "elapsed_time": "0:15:19", "remaining_time": "0:14:12", "throughput": 6377.72, "total_tokens": 5866496} +{"current_steps": 6120, "total_steps": 11784, "loss": 0.0606, "lr": 1.1071796156458868e-06, "epoch": 0.5193482688391039, "percentage": 51.93, "elapsed_time": "0:15:20", "remaining_time": "0:14:11", "throughput": 6380.25, "total_tokens": 5870912} +{"current_steps": 6125, "total_steps": 11784, "loss": 0.0881, "lr": 1.1057068460641281e-06, "epoch": 0.5197725729803123, "percentage": 51.98, "elapsed_time": "0:15:20", "remaining_time": "0:14:10", "throughput": 6384.04, "total_tokens": 5876672} +{"current_steps": 6130, "total_steps": 11784, "loss": 0.0278, "lr": 1.1042338445713183e-06, "epoch": 0.5201968771215207, "percentage": 52.02, "elapsed_time": "0:15:20", "remaining_time": "0:14:09", "throughput": 6386.51, "total_tokens": 5881024} +{"current_steps": 6135, "total_steps": 11784, "loss": 0.1703, "lr": 1.1027606143990867e-06, "epoch": 0.5206211812627292, "percentage": 52.06, "elapsed_time": "0:15:21", "remaining_time": "0:14:08", "throughput": 6389.67, "total_tokens": 5886080} +{"current_steps": 6140, "total_steps": 11784, "loss": 0.0505, "lr": 1.1012871587795638e-06, "epoch": 0.5210454854039376, "percentage": 52.1, "elapsed_time": "0:15:21", "remaining_time": "0:14:07", "throughput": 6392.57, "total_tokens": 5890880} +{"current_steps": 6145, "total_steps": 11784, "loss": 0.0369, "lr": 1.0998134809453756e-06, "epoch": 0.521469789545146, "percentage": 52.15, "elapsed_time": "0:15:21", "remaining_time": "0:14:05", "throughput": 6395.22, "total_tokens": 5895424} +{"current_steps": 6150, "total_steps": 11784, "loss": 0.0887, "lr": 1.0983395841296347e-06, "epoch": 0.5218940936863544, "percentage": 52.19, "elapsed_time": "0:15:22", "remaining_time": "0:14:04", "throughput": 6398.22, "total_tokens": 5900352} +{"current_steps": 6155, "total_steps": 11784, "loss": 0.0695, "lr": 1.0968654715659347e-06, "epoch": 0.5223183978275628, "percentage": 52.23, "elapsed_time": "0:15:22", "remaining_time": "0:14:03", "throughput": 6400.94, "total_tokens": 5904960} +{"current_steps": 6160, "total_steps": 11784, "loss": 0.0687, "lr": 1.095391146488342e-06, "epoch": 0.5227427019687713, "percentage": 52.27, "elapsed_time": "0:15:22", "remaining_time": "0:14:02", "throughput": 6404.06, "total_tokens": 5910016} +{"current_steps": 6165, "total_steps": 11784, "loss": 0.0779, "lr": 1.09391661213139e-06, "epoch": 0.5231670061099797, "percentage": 52.32, "elapsed_time": "0:15:23", "remaining_time": "0:14:01", "throughput": 6407.07, "total_tokens": 5914944} +{"current_steps": 6170, "total_steps": 11784, "loss": 0.0389, "lr": 1.0924418717300707e-06, "epoch": 0.5235913102511881, "percentage": 52.36, "elapsed_time": "0:15:23", "remaining_time": "0:14:00", "throughput": 6410.61, "total_tokens": 5920448} +{"current_steps": 6175, "total_steps": 11784, "loss": 0.0357, "lr": 1.090966928519828e-06, "epoch": 0.5240156143923965, "percentage": 52.4, "elapsed_time": "0:15:23", "remaining_time": "0:13:59", "throughput": 6413.9, "total_tokens": 5925696} +{"current_steps": 6180, "total_steps": 11784, "loss": 0.0356, "lr": 1.0894917857365511e-06, "epoch": 0.5244399185336049, "percentage": 52.44, "elapsed_time": "0:15:24", "remaining_time": "0:13:58", "throughput": 6416.87, "total_tokens": 5930624} +{"current_steps": 6185, "total_steps": 11784, "loss": 0.0751, "lr": 1.0880164466165673e-06, "epoch": 0.5248642226748133, "percentage": 52.49, "elapsed_time": "0:15:24", "remaining_time": "0:13:56", "throughput": 6419.49, "total_tokens": 5935168} +{"current_steps": 6190, "total_steps": 11784, "loss": 0.0332, "lr": 1.0865409143966338e-06, "epoch": 0.5252885268160217, "percentage": 52.53, "elapsed_time": "0:15:24", "remaining_time": "0:13:55", "throughput": 6422.1, "total_tokens": 5939712} +{"current_steps": 6195, "total_steps": 11784, "loss": 0.0108, "lr": 1.0850651923139317e-06, "epoch": 0.5257128309572301, "percentage": 52.57, "elapsed_time": "0:15:25", "remaining_time": "0:13:54", "throughput": 6425.02, "total_tokens": 5944576} +{"current_steps": 6200, "total_steps": 11784, "loss": 0.0517, "lr": 1.0835892836060598e-06, "epoch": 0.5261371350984385, "percentage": 52.61, "elapsed_time": "0:15:25", "remaining_time": "0:13:53", "throughput": 6427.69, "total_tokens": 5949184} +{"current_steps": 6205, "total_steps": 11784, "loss": 0.0215, "lr": 1.0821131915110246e-06, "epoch": 0.5265614392396469, "percentage": 52.66, "elapsed_time": "0:15:25", "remaining_time": "0:13:52", "throughput": 6430.71, "total_tokens": 5954176} +{"current_steps": 6210, "total_steps": 11784, "loss": 0.0753, "lr": 1.080636919267236e-06, "epoch": 0.5269857433808554, "percentage": 52.7, "elapsed_time": "0:15:26", "remaining_time": "0:13:51", "throughput": 6433.27, "total_tokens": 5958656} +{"current_steps": 6215, "total_steps": 11784, "loss": 0.0789, "lr": 1.079160470113499e-06, "epoch": 0.5274100475220638, "percentage": 52.74, "elapsed_time": "0:15:26", "remaining_time": "0:13:50", "throughput": 6435.96, "total_tokens": 5963264} +{"current_steps": 6220, "total_steps": 11784, "loss": 0.0936, "lr": 1.0776838472890064e-06, "epoch": 0.5278343516632722, "percentage": 52.78, "elapsed_time": "0:15:26", "remaining_time": "0:13:49", "throughput": 6439.27, "total_tokens": 5968576} +{"current_steps": 6225, "total_steps": 11784, "loss": 0.0034, "lr": 1.0762070540333322e-06, "epoch": 0.5282586558044806, "percentage": 52.83, "elapsed_time": "0:15:27", "remaining_time": "0:13:48", "throughput": 6442.0, "total_tokens": 5973248} +{"current_steps": 6230, "total_steps": 11784, "loss": 0.0488, "lr": 1.0747300935864243e-06, "epoch": 0.528682959945689, "percentage": 52.87, "elapsed_time": "0:15:27", "remaining_time": "0:13:46", "throughput": 6444.69, "total_tokens": 5977920} +{"current_steps": 6235, "total_steps": 11784, "loss": 0.0588, "lr": 1.0732529691885977e-06, "epoch": 0.5291072640868975, "percentage": 52.91, "elapsed_time": "0:15:27", "remaining_time": "0:13:45", "throughput": 6447.43, "total_tokens": 5982656} +{"current_steps": 6240, "total_steps": 11784, "loss": 0.0431, "lr": 1.0717756840805263e-06, "epoch": 0.5295315682281059, "percentage": 52.95, "elapsed_time": "0:15:28", "remaining_time": "0:13:44", "throughput": 6450.2, "total_tokens": 5987392} +{"current_steps": 6245, "total_steps": 11784, "loss": 0.0505, "lr": 1.0702982415032378e-06, "epoch": 0.5299558723693143, "percentage": 53.0, "elapsed_time": "0:15:28", "remaining_time": "0:13:43", "throughput": 6454.05, "total_tokens": 5993280} +{"current_steps": 6250, "total_steps": 11784, "loss": 0.0423, "lr": 1.068820644698104e-06, "epoch": 0.5303801765105227, "percentage": 53.04, "elapsed_time": "0:15:28", "remaining_time": "0:13:42", "throughput": 6457.05, "total_tokens": 5998272} +{"current_steps": 6255, "total_steps": 11784, "loss": 0.023, "lr": 1.0673428969068363e-06, "epoch": 0.5308044806517311, "percentage": 53.08, "elapsed_time": "0:15:29", "remaining_time": "0:13:41", "throughput": 6459.65, "total_tokens": 6002816} +{"current_steps": 6260, "total_steps": 11784, "loss": 0.028, "lr": 1.0658650013714765e-06, "epoch": 0.5312287847929396, "percentage": 53.12, "elapsed_time": "0:15:29", "remaining_time": "0:13:40", "throughput": 6462.61, "total_tokens": 6007744} +{"current_steps": 6265, "total_steps": 11784, "loss": 0.099, "lr": 1.0643869613343906e-06, "epoch": 0.531653088934148, "percentage": 53.17, "elapsed_time": "0:15:29", "remaining_time": "0:13:39", "throughput": 6464.7, "total_tokens": 6011776} +{"current_steps": 6270, "total_steps": 11784, "loss": 0.0939, "lr": 1.062908780038262e-06, "epoch": 0.5320773930753564, "percentage": 53.21, "elapsed_time": "0:15:30", "remaining_time": "0:13:38", "throughput": 6468.21, "total_tokens": 6017344} +{"current_steps": 6275, "total_steps": 11784, "loss": 0.0401, "lr": 1.0614304607260843e-06, "epoch": 0.5325016972165648, "percentage": 53.25, "elapsed_time": "0:15:30", "remaining_time": "0:13:37", "throughput": 6471.03, "total_tokens": 6022144} +{"current_steps": 6280, "total_steps": 11784, "loss": 0.0351, "lr": 1.0599520066411529e-06, "epoch": 0.5329260013577732, "percentage": 53.29, "elapsed_time": "0:15:30", "remaining_time": "0:13:35", "throughput": 6474.55, "total_tokens": 6027712} +{"current_steps": 6285, "total_steps": 11784, "loss": 0.0586, "lr": 1.0584734210270597e-06, "epoch": 0.5333503054989817, "percentage": 53.34, "elapsed_time": "0:15:31", "remaining_time": "0:13:34", "throughput": 6476.9, "total_tokens": 6032064} +{"current_steps": 6290, "total_steps": 11784, "loss": 0.0382, "lr": 1.0569947071276845e-06, "epoch": 0.5337746096401901, "percentage": 53.38, "elapsed_time": "0:15:31", "remaining_time": "0:13:33", "throughput": 6479.16, "total_tokens": 6036288} +{"current_steps": 6295, "total_steps": 11784, "loss": 0.0912, "lr": 1.0555158681871897e-06, "epoch": 0.5341989137813985, "percentage": 53.42, "elapsed_time": "0:15:31", "remaining_time": "0:13:32", "throughput": 6481.87, "total_tokens": 6040960} +{"current_steps": 6300, "total_steps": 11784, "loss": 0.0313, "lr": 1.0540369074500103e-06, "epoch": 0.5346232179226069, "percentage": 53.46, "elapsed_time": "0:15:32", "remaining_time": "0:13:31", "throughput": 6484.31, "total_tokens": 6045376} +{"current_steps": 6305, "total_steps": 11784, "loss": 0.0809, "lr": 1.0525578281608503e-06, "epoch": 0.5350475220638153, "percentage": 53.5, "elapsed_time": "0:15:32", "remaining_time": "0:13:30", "throughput": 6486.83, "total_tokens": 6049856} +{"current_steps": 6310, "total_steps": 11784, "loss": 0.0089, "lr": 1.0510786335646725e-06, "epoch": 0.5354718262050238, "percentage": 53.55, "elapsed_time": "0:15:32", "remaining_time": "0:13:29", "throughput": 6489.17, "total_tokens": 6054144} +{"current_steps": 6315, "total_steps": 11784, "loss": 0.0477, "lr": 1.0495993269066935e-06, "epoch": 0.5358961303462322, "percentage": 53.59, "elapsed_time": "0:15:33", "remaining_time": "0:13:28", "throughput": 6492.95, "total_tokens": 6060032} +{"current_steps": 6320, "total_steps": 11784, "loss": 0.132, "lr": 1.0481199114323746e-06, "epoch": 0.5363204344874406, "percentage": 53.63, "elapsed_time": "0:15:33", "remaining_time": "0:13:27", "throughput": 6496.11, "total_tokens": 6065280} +{"current_steps": 6325, "total_steps": 11784, "loss": 0.0846, "lr": 1.0466403903874175e-06, "epoch": 0.536744738628649, "percentage": 53.67, "elapsed_time": "0:15:34", "remaining_time": "0:13:26", "throughput": 6498.88, "total_tokens": 6070080} +{"current_steps": 6330, "total_steps": 11784, "loss": 0.0642, "lr": 1.0451607670177543e-06, "epoch": 0.5371690427698574, "percentage": 53.72, "elapsed_time": "0:15:34", "remaining_time": "0:13:25", "throughput": 6502.69, "total_tokens": 6076032} +{"current_steps": 6335, "total_steps": 11784, "loss": 0.0577, "lr": 1.0436810445695421e-06, "epoch": 0.5375933469110659, "percentage": 53.76, "elapsed_time": "0:15:34", "remaining_time": "0:13:23", "throughput": 6505.43, "total_tokens": 6080768} +{"current_steps": 6340, "total_steps": 11784, "loss": 0.0294, "lr": 1.0422012262891548e-06, "epoch": 0.5380176510522743, "percentage": 53.8, "elapsed_time": "0:15:35", "remaining_time": "0:13:22", "throughput": 6507.99, "total_tokens": 6085312} +{"current_steps": 6345, "total_steps": 11784, "loss": 0.0297, "lr": 1.0407213154231774e-06, "epoch": 0.5384419551934827, "percentage": 53.84, "elapsed_time": "0:15:35", "remaining_time": "0:13:21", "throughput": 6510.74, "total_tokens": 6090048} +{"current_steps": 6350, "total_steps": 11784, "loss": 0.0992, "lr": 1.0392413152183973e-06, "epoch": 0.5388662593346911, "percentage": 53.89, "elapsed_time": "0:15:35", "remaining_time": "0:13:20", "throughput": 6513.38, "total_tokens": 6094720} +{"current_steps": 6355, "total_steps": 11784, "loss": 0.1116, "lr": 1.0377612289217982e-06, "epoch": 0.5392905634758995, "percentage": 53.93, "elapsed_time": "0:15:36", "remaining_time": "0:13:19", "throughput": 6516.06, "total_tokens": 6099456} +{"current_steps": 6360, "total_steps": 11784, "loss": 0.033, "lr": 1.0362810597805524e-06, "epoch": 0.539714867617108, "percentage": 53.97, "elapsed_time": "0:15:36", "remaining_time": "0:13:18", "throughput": 6518.99, "total_tokens": 6104448} +{"current_steps": 6365, "total_steps": 11784, "loss": 0.0312, "lr": 1.0348008110420149e-06, "epoch": 0.5401391717583164, "percentage": 54.01, "elapsed_time": "0:15:36", "remaining_time": "0:13:17", "throughput": 6521.57, "total_tokens": 6109056} +{"current_steps": 6370, "total_steps": 11784, "loss": 0.088, "lr": 1.0333204859537142e-06, "epoch": 0.5405634758995248, "percentage": 54.06, "elapsed_time": "0:15:37", "remaining_time": "0:13:16", "throughput": 6524.92, "total_tokens": 6114496} +{"current_steps": 6375, "total_steps": 11784, "loss": 0.0882, "lr": 1.0318400877633466e-06, "epoch": 0.5409877800407332, "percentage": 54.1, "elapsed_time": "0:15:37", "remaining_time": "0:13:15", "throughput": 6527.75, "total_tokens": 6119360} +{"current_steps": 6380, "total_steps": 11784, "loss": 0.0926, "lr": 1.030359619718769e-06, "epoch": 0.5414120841819416, "percentage": 54.14, "elapsed_time": "0:15:37", "remaining_time": "0:13:14", "throughput": 6530.73, "total_tokens": 6124352} +{"current_steps": 6385, "total_steps": 11784, "loss": 0.0509, "lr": 1.0288790850679916e-06, "epoch": 0.5418363883231501, "percentage": 54.18, "elapsed_time": "0:15:38", "remaining_time": "0:13:13", "throughput": 6533.24, "total_tokens": 6128832} +{"current_steps": 6390, "total_steps": 11784, "loss": 0.1019, "lr": 1.0273984870591706e-06, "epoch": 0.5422606924643585, "percentage": 54.23, "elapsed_time": "0:15:38", "remaining_time": "0:13:12", "throughput": 6535.7, "total_tokens": 6133312} +{"current_steps": 6395, "total_steps": 11784, "loss": 0.0668, "lr": 1.025917828940601e-06, "epoch": 0.5426849966055669, "percentage": 54.27, "elapsed_time": "0:15:38", "remaining_time": "0:13:11", "throughput": 6537.98, "total_tokens": 6137600} +{"current_steps": 6400, "total_steps": 11784, "loss": 0.0544, "lr": 1.02443711396071e-06, "epoch": 0.5431093007467753, "percentage": 54.31, "elapsed_time": "0:15:39", "remaining_time": "0:13:10", "throughput": 6540.78, "total_tokens": 6142464} +{"current_steps": 6405, "total_steps": 11784, "loss": 0.0586, "lr": 1.0229563453680495e-06, "epoch": 0.5435336048879837, "percentage": 54.35, "elapsed_time": "0:15:39", "remaining_time": "0:13:08", "throughput": 6543.37, "total_tokens": 6147072} +{"current_steps": 6410, "total_steps": 11784, "loss": 0.045, "lr": 1.021475526411289e-06, "epoch": 0.5439579090291922, "percentage": 54.4, "elapsed_time": "0:15:39", "remaining_time": "0:13:07", "throughput": 6546.01, "total_tokens": 6151744} +{"current_steps": 6415, "total_steps": 11784, "loss": 0.085, "lr": 1.0199946603392078e-06, "epoch": 0.5443822131704006, "percentage": 54.44, "elapsed_time": "0:15:40", "remaining_time": "0:13:06", "throughput": 6548.9, "total_tokens": 6156672} +{"current_steps": 6420, "total_steps": 11784, "loss": 0.0563, "lr": 1.01851375040069e-06, "epoch": 0.544806517311609, "percentage": 54.48, "elapsed_time": "0:15:40", "remaining_time": "0:13:05", "throughput": 6551.76, "total_tokens": 6161600} +{"current_steps": 6425, "total_steps": 11784, "loss": 0.0312, "lr": 1.0170327998447149e-06, "epoch": 0.5452308214528174, "percentage": 54.52, "elapsed_time": "0:15:40", "remaining_time": "0:13:04", "throughput": 6553.91, "total_tokens": 6165760} +{"current_steps": 6430, "total_steps": 11784, "loss": 0.0114, "lr": 1.015551811920351e-06, "epoch": 0.5456551255940258, "percentage": 54.57, "elapsed_time": "0:15:41", "remaining_time": "0:13:03", "throughput": 6560.49, "total_tokens": 6174912} +{"current_steps": 6435, "total_steps": 11784, "loss": 0.0856, "lr": 1.014070789876749e-06, "epoch": 0.5460794297352343, "percentage": 54.61, "elapsed_time": "0:15:41", "remaining_time": "0:13:02", "throughput": 6562.68, "total_tokens": 6179136} +{"current_steps": 6440, "total_steps": 11784, "loss": 0.1228, "lr": 1.0125897369631342e-06, "epoch": 0.5465037338764427, "percentage": 54.65, "elapsed_time": "0:15:41", "remaining_time": "0:13:01", "throughput": 6565.2, "total_tokens": 6183680} +{"current_steps": 6445, "total_steps": 11784, "loss": 0.0587, "lr": 1.0111086564288003e-06, "epoch": 0.546928038017651, "percentage": 54.69, "elapsed_time": "0:15:42", "remaining_time": "0:13:00", "throughput": 6568.05, "total_tokens": 6188608} +{"current_steps": 6450, "total_steps": 11784, "loss": 0.0592, "lr": 1.009627551523101e-06, "epoch": 0.5473523421588594, "percentage": 54.74, "elapsed_time": "0:15:42", "remaining_time": "0:12:59", "throughput": 6570.98, "total_tokens": 6193600} +{"current_steps": 6455, "total_steps": 11784, "loss": 0.0367, "lr": 1.008146425495443e-06, "epoch": 0.5477766463000678, "percentage": 54.78, "elapsed_time": "0:15:42", "remaining_time": "0:12:58", "throughput": 6573.83, "total_tokens": 6198528} +{"current_steps": 6460, "total_steps": 11784, "loss": 0.0359, "lr": 1.0066652815952805e-06, "epoch": 0.5482009504412763, "percentage": 54.82, "elapsed_time": "0:15:43", "remaining_time": "0:12:57", "throughput": 6577.26, "total_tokens": 6204096} +{"current_steps": 6465, "total_steps": 11784, "loss": 0.0465, "lr": 1.0051841230721063e-06, "epoch": 0.5486252545824847, "percentage": 54.86, "elapsed_time": "0:15:43", "remaining_time": "0:12:56", "throughput": 6579.8, "total_tokens": 6208704} +{"current_steps": 6470, "total_steps": 11784, "loss": 0.1061, "lr": 1.0037029531754453e-06, "epoch": 0.5490495587236931, "percentage": 54.9, "elapsed_time": "0:15:43", "remaining_time": "0:12:55", "throughput": 6582.46, "total_tokens": 6213440} +{"current_steps": 6475, "total_steps": 11784, "loss": 0.0551, "lr": 1.002221775154847e-06, "epoch": 0.5494738628649015, "percentage": 54.95, "elapsed_time": "0:15:44", "remaining_time": "0:12:54", "throughput": 6585.31, "total_tokens": 6218368} +{"current_steps": 6480, "total_steps": 11784, "loss": 0.0618, "lr": 1.0007405922598793e-06, "epoch": 0.5498981670061099, "percentage": 54.99, "elapsed_time": "0:15:44", "remaining_time": "0:12:53", "throughput": 6588.41, "total_tokens": 6223616} +{"current_steps": 6485, "total_steps": 11784, "loss": 0.0938, "lr": 9.992594077401208e-07, "epoch": 0.5503224711473184, "percentage": 55.03, "elapsed_time": "0:15:44", "remaining_time": "0:12:52", "throughput": 6591.15, "total_tokens": 6228480} +{"current_steps": 6490, "total_steps": 11784, "loss": 0.0534, "lr": 9.977782248451534e-07, "epoch": 0.5507467752885268, "percentage": 55.07, "elapsed_time": "0:15:45", "remaining_time": "0:12:51", "throughput": 6593.5, "total_tokens": 6232896} +{"current_steps": 6490, "total_steps": 11784, "eval_loss": 0.05577274411916733, "epoch": 0.5507467752885268, "percentage": 55.07, "elapsed_time": "0:16:01", "remaining_time": "0:13:04", "throughput": 6484.94, "total_tokens": 6232896} +{"current_steps": 6495, "total_steps": 11784, "loss": 0.0959, "lr": 9.962970468245548e-07, "epoch": 0.5511710794297352, "percentage": 55.12, "elapsed_time": "0:16:31", "remaining_time": "0:13:27", "throughput": 6290.9, "total_tokens": 6237696} +{"current_steps": 6500, "total_steps": 11784, "loss": 0.0324, "lr": 9.948158769278939e-07, "epoch": 0.5515953835709436, "percentage": 55.16, "elapsed_time": "0:16:31", "remaining_time": "0:13:26", "throughput": 6293.45, "total_tokens": 6242304} +{"current_steps": 6505, "total_steps": 11784, "loss": 0.0393, "lr": 9.933347184047194e-07, "epoch": 0.552019687712152, "percentage": 55.2, "elapsed_time": "0:16:32", "remaining_time": "0:13:25", "throughput": 6296.07, "total_tokens": 6246976} +{"current_steps": 6510, "total_steps": 11784, "loss": 0.0629, "lr": 9.918535745045571e-07, "epoch": 0.5524439918533605, "percentage": 55.24, "elapsed_time": "0:16:32", "remaining_time": "0:13:24", "throughput": 6298.35, "total_tokens": 6251264} +{"current_steps": 6515, "total_steps": 11784, "loss": 0.0104, "lr": 9.903724484768991e-07, "epoch": 0.5528682959945689, "percentage": 55.29, "elapsed_time": "0:16:32", "remaining_time": "0:13:22", "throughput": 6300.88, "total_tokens": 6255872} +{"current_steps": 6520, "total_steps": 11784, "loss": 0.0381, "lr": 9.888913435711996e-07, "epoch": 0.5532926001357773, "percentage": 55.33, "elapsed_time": "0:16:33", "remaining_time": "0:13:21", "throughput": 6303.8, "total_tokens": 6260928} +{"current_steps": 6525, "total_steps": 11784, "loss": 0.04, "lr": 9.874102630368658e-07, "epoch": 0.5537169042769857, "percentage": 55.37, "elapsed_time": "0:16:33", "remaining_time": "0:13:20", "throughput": 6306.39, "total_tokens": 6265600} +{"current_steps": 6530, "total_steps": 11784, "loss": 0.113, "lr": 9.859292101232514e-07, "epoch": 0.5541412084181941, "percentage": 55.41, "elapsed_time": "0:16:33", "remaining_time": "0:13:19", "throughput": 6309.16, "total_tokens": 6270464} +{"current_steps": 6535, "total_steps": 11784, "loss": 0.0957, "lr": 9.84448188079649e-07, "epoch": 0.5545655125594026, "percentage": 55.46, "elapsed_time": "0:16:34", "remaining_time": "0:13:18", "throughput": 6311.56, "total_tokens": 6274944} +{"current_steps": 6540, "total_steps": 11784, "loss": 0.0578, "lr": 9.829672001552853e-07, "epoch": 0.554989816700611, "percentage": 55.5, "elapsed_time": "0:16:34", "remaining_time": "0:13:17", "throughput": 6313.98, "total_tokens": 6279424} +{"current_steps": 6545, "total_steps": 11784, "loss": 0.1313, "lr": 9.8148624959931e-07, "epoch": 0.5554141208418194, "percentage": 55.54, "elapsed_time": "0:16:34", "remaining_time": "0:13:16", "throughput": 6316.58, "total_tokens": 6284096} +{"current_steps": 6550, "total_steps": 11784, "loss": 0.03, "lr": 9.80005339660792e-07, "epoch": 0.5558384249830278, "percentage": 55.58, "elapsed_time": "0:16:35", "remaining_time": "0:13:15", "throughput": 6319.98, "total_tokens": 6289728} +{"current_steps": 6555, "total_steps": 11784, "loss": 0.0289, "lr": 9.785244735887112e-07, "epoch": 0.5562627291242362, "percentage": 55.63, "elapsed_time": "0:16:35", "remaining_time": "0:13:14", "throughput": 6322.59, "total_tokens": 6294400} +{"current_steps": 6560, "total_steps": 11784, "loss": 0.0557, "lr": 9.770436546319504e-07, "epoch": 0.5566870332654447, "percentage": 55.67, "elapsed_time": "0:16:35", "remaining_time": "0:13:13", "throughput": 6325.03, "total_tokens": 6298880} +{"current_steps": 6565, "total_steps": 11784, "loss": 0.0636, "lr": 9.755628860392901e-07, "epoch": 0.5571113374066531, "percentage": 55.71, "elapsed_time": "0:16:36", "remaining_time": "0:13:11", "throughput": 6327.49, "total_tokens": 6303424} +{"current_steps": 6570, "total_steps": 11784, "loss": 0.026, "lr": 9.740821710593988e-07, "epoch": 0.5575356415478615, "percentage": 55.75, "elapsed_time": "0:16:36", "remaining_time": "0:13:10", "throughput": 6330.03, "total_tokens": 6308032} +{"current_steps": 6575, "total_steps": 11784, "loss": 0.048, "lr": 9.726015129408296e-07, "epoch": 0.5579599456890699, "percentage": 55.8, "elapsed_time": "0:16:36", "remaining_time": "0:13:09", "throughput": 6332.75, "total_tokens": 6312832} +{"current_steps": 6580, "total_steps": 11784, "loss": 0.035, "lr": 9.711209149320083e-07, "epoch": 0.5583842498302783, "percentage": 55.84, "elapsed_time": "0:16:37", "remaining_time": "0:13:08", "throughput": 6335.18, "total_tokens": 6317312} +{"current_steps": 6585, "total_steps": 11784, "loss": 0.0341, "lr": 9.69640380281231e-07, "epoch": 0.5588085539714868, "percentage": 55.88, "elapsed_time": "0:16:37", "remaining_time": "0:13:07", "throughput": 6337.73, "total_tokens": 6321920} +{"current_steps": 6590, "total_steps": 11784, "loss": 0.0732, "lr": 9.681599122366533e-07, "epoch": 0.5592328581126952, "percentage": 55.92, "elapsed_time": "0:16:37", "remaining_time": "0:13:06", "throughput": 6340.09, "total_tokens": 6326336} +{"current_steps": 6595, "total_steps": 11784, "loss": 0.0904, "lr": 9.66679514046286e-07, "epoch": 0.5596571622539036, "percentage": 55.97, "elapsed_time": "0:16:38", "remaining_time": "0:13:05", "throughput": 6342.67, "total_tokens": 6331008} +{"current_steps": 6600, "total_steps": 11784, "loss": 0.0866, "lr": 9.65199188957985e-07, "epoch": 0.560081466395112, "percentage": 56.01, "elapsed_time": "0:16:38", "remaining_time": "0:13:04", "throughput": 6345.3, "total_tokens": 6335744} +{"current_steps": 6605, "total_steps": 11784, "loss": 0.0558, "lr": 9.637189402194475e-07, "epoch": 0.5605057705363204, "percentage": 56.05, "elapsed_time": "0:16:38", "remaining_time": "0:13:03", "throughput": 6348.18, "total_tokens": 6340736} +{"current_steps": 6610, "total_steps": 11784, "loss": 0.0955, "lr": 9.622387710782017e-07, "epoch": 0.5609300746775289, "percentage": 56.09, "elapsed_time": "0:16:39", "remaining_time": "0:13:02", "throughput": 6350.6, "total_tokens": 6345216} +{"current_steps": 6615, "total_steps": 11784, "loss": 0.0383, "lr": 9.607586847816029e-07, "epoch": 0.5613543788187373, "percentage": 56.14, "elapsed_time": "0:16:39", "remaining_time": "0:13:01", "throughput": 6353.33, "total_tokens": 6350080} +{"current_steps": 6620, "total_steps": 11784, "loss": 0.0689, "lr": 9.592786845768225e-07, "epoch": 0.5617786829599457, "percentage": 56.18, "elapsed_time": "0:16:39", "remaining_time": "0:12:59", "throughput": 6355.98, "total_tokens": 6354816} +{"current_steps": 6625, "total_steps": 11784, "loss": 0.0538, "lr": 9.577987737108454e-07, "epoch": 0.5622029871011541, "percentage": 56.22, "elapsed_time": "0:16:40", "remaining_time": "0:12:58", "throughput": 6358.97, "total_tokens": 6360000} +{"current_steps": 6630, "total_steps": 11784, "loss": 0.1221, "lr": 9.563189554304578e-07, "epoch": 0.5626272912423625, "percentage": 56.26, "elapsed_time": "0:16:40", "remaining_time": "0:12:57", "throughput": 6361.53, "total_tokens": 6364672} +{"current_steps": 6635, "total_steps": 11784, "loss": 0.0828, "lr": 9.548392329822456e-07, "epoch": 0.563051595383571, "percentage": 56.31, "elapsed_time": "0:16:40", "remaining_time": "0:12:56", "throughput": 6364.19, "total_tokens": 6369408} +{"current_steps": 6640, "total_steps": 11784, "loss": 0.0482, "lr": 9.533596096125825e-07, "epoch": 0.5634758995247794, "percentage": 56.35, "elapsed_time": "0:16:41", "remaining_time": "0:12:55", "throughput": 6366.76, "total_tokens": 6374080} +{"current_steps": 6645, "total_steps": 11784, "loss": 0.051, "lr": 9.518800885676256e-07, "epoch": 0.5639002036659878, "percentage": 56.39, "elapsed_time": "0:16:41", "remaining_time": "0:12:54", "throughput": 6369.69, "total_tokens": 6379200} +{"current_steps": 6650, "total_steps": 11784, "loss": 0.0577, "lr": 9.504006730933068e-07, "epoch": 0.5643245078071962, "percentage": 56.43, "elapsed_time": "0:16:41", "remaining_time": "0:12:53", "throughput": 6372.85, "total_tokens": 6384576} +{"current_steps": 6655, "total_steps": 11784, "loss": 0.0797, "lr": 9.489213664353276e-07, "epoch": 0.5647488119484046, "percentage": 56.47, "elapsed_time": "0:16:42", "remaining_time": "0:12:52", "throughput": 6375.84, "total_tokens": 6389760} +{"current_steps": 6660, "total_steps": 11784, "loss": 0.1317, "lr": 9.474421718391497e-07, "epoch": 0.5651731160896131, "percentage": 56.52, "elapsed_time": "0:16:42", "remaining_time": "0:12:51", "throughput": 6378.18, "total_tokens": 6394176} +{"current_steps": 6665, "total_steps": 11784, "loss": 0.0693, "lr": 9.459630925499897e-07, "epoch": 0.5655974202308215, "percentage": 56.56, "elapsed_time": "0:16:42", "remaining_time": "0:12:50", "throughput": 6380.86, "total_tokens": 6398976} +{"current_steps": 6670, "total_steps": 11784, "loss": 0.0414, "lr": 9.444841318128103e-07, "epoch": 0.5660217243720299, "percentage": 56.6, "elapsed_time": "0:16:43", "remaining_time": "0:12:49", "throughput": 6383.07, "total_tokens": 6403264} +{"current_steps": 6675, "total_steps": 11784, "loss": 0.0771, "lr": 9.430052928723152e-07, "epoch": 0.5664460285132383, "percentage": 56.64, "elapsed_time": "0:16:43", "remaining_time": "0:12:48", "throughput": 6385.8, "total_tokens": 6408128} +{"current_steps": 6680, "total_steps": 11784, "loss": 0.1131, "lr": 9.415265789729403e-07, "epoch": 0.5668703326544468, "percentage": 56.69, "elapsed_time": "0:16:43", "remaining_time": "0:12:46", "throughput": 6388.24, "total_tokens": 6412672} +{"current_steps": 6685, "total_steps": 11784, "loss": 0.0724, "lr": 9.400479933588468e-07, "epoch": 0.5672946367956552, "percentage": 56.73, "elapsed_time": "0:16:44", "remaining_time": "0:12:45", "throughput": 6390.58, "total_tokens": 6417088} +{"current_steps": 6690, "total_steps": 11784, "loss": 0.0707, "lr": 9.385695392739156e-07, "epoch": 0.5677189409368636, "percentage": 56.77, "elapsed_time": "0:16:44", "remaining_time": "0:12:44", "throughput": 6393.18, "total_tokens": 6421824} +{"current_steps": 6695, "total_steps": 11784, "loss": 0.0411, "lr": 9.370912199617376e-07, "epoch": 0.568143245078072, "percentage": 56.81, "elapsed_time": "0:16:44", "remaining_time": "0:12:43", "throughput": 6395.77, "total_tokens": 6426560} +{"current_steps": 6700, "total_steps": 11784, "loss": 0.0867, "lr": 9.356130386656093e-07, "epoch": 0.5685675492192804, "percentage": 56.86, "elapsed_time": "0:16:45", "remaining_time": "0:12:42", "throughput": 6398.15, "total_tokens": 6431040} +{"current_steps": 6705, "total_steps": 11784, "loss": 0.0488, "lr": 9.341349986285234e-07, "epoch": 0.5689918533604889, "percentage": 56.9, "elapsed_time": "0:16:45", "remaining_time": "0:12:41", "throughput": 6400.92, "total_tokens": 6435968} +{"current_steps": 6710, "total_steps": 11784, "loss": 0.1258, "lr": 9.326571030931636e-07, "epoch": 0.5694161575016972, "percentage": 56.94, "elapsed_time": "0:16:45", "remaining_time": "0:12:40", "throughput": 6403.45, "total_tokens": 6440640} +{"current_steps": 6715, "total_steps": 11784, "loss": 0.0646, "lr": 9.311793553018958e-07, "epoch": 0.5698404616429056, "percentage": 56.98, "elapsed_time": "0:16:46", "remaining_time": "0:12:39", "throughput": 6406.14, "total_tokens": 6445504} +{"current_steps": 6720, "total_steps": 11784, "loss": 0.0453, "lr": 9.297017584967624e-07, "epoch": 0.570264765784114, "percentage": 57.03, "elapsed_time": "0:16:46", "remaining_time": "0:12:38", "throughput": 6408.18, "total_tokens": 6449600} +{"current_steps": 6725, "total_steps": 11784, "loss": 0.0386, "lr": 9.282243159194734e-07, "epoch": 0.5706890699253224, "percentage": 57.07, "elapsed_time": "0:16:46", "remaining_time": "0:12:37", "throughput": 6410.95, "total_tokens": 6454528} +{"current_steps": 6730, "total_steps": 11784, "loss": 0.0768, "lr": 9.267470308114025e-07, "epoch": 0.5711133740665308, "percentage": 57.11, "elapsed_time": "0:16:47", "remaining_time": "0:12:36", "throughput": 6413.56, "total_tokens": 6459264} +{"current_steps": 6735, "total_steps": 11784, "loss": 0.078, "lr": 9.252699064135758e-07, "epoch": 0.5715376782077393, "percentage": 57.15, "elapsed_time": "0:16:47", "remaining_time": "0:12:35", "throughput": 6415.75, "total_tokens": 6463552} +{"current_steps": 6740, "total_steps": 11784, "loss": 0.0072, "lr": 9.23792945966668e-07, "epoch": 0.5719619823489477, "percentage": 57.2, "elapsed_time": "0:16:47", "remaining_time": "0:12:34", "throughput": 6418.61, "total_tokens": 6468608} +{"current_steps": 6745, "total_steps": 11784, "loss": 0.0303, "lr": 9.223161527109936e-07, "epoch": 0.5723862864901561, "percentage": 57.24, "elapsed_time": "0:16:48", "remaining_time": "0:12:33", "throughput": 6421.27, "total_tokens": 6473408} +{"current_steps": 6750, "total_steps": 11784, "loss": 0.0353, "lr": 9.208395298865014e-07, "epoch": 0.5728105906313645, "percentage": 57.28, "elapsed_time": "0:16:48", "remaining_time": "0:12:32", "throughput": 6424.28, "total_tokens": 6478656} +{"current_steps": 6755, "total_steps": 11784, "loss": 0.0986, "lr": 9.19363080732764e-07, "epoch": 0.573234894772573, "percentage": 57.32, "elapsed_time": "0:16:48", "remaining_time": "0:12:31", "throughput": 6426.79, "total_tokens": 6483328} +{"current_steps": 6760, "total_steps": 11784, "loss": 0.0211, "lr": 9.178868084889756e-07, "epoch": 0.5736591989137814, "percentage": 57.37, "elapsed_time": "0:16:49", "remaining_time": "0:12:29", "throughput": 6429.36, "total_tokens": 6488064} +{"current_steps": 6765, "total_steps": 11784, "loss": 0.1111, "lr": 9.164107163939401e-07, "epoch": 0.5740835030549898, "percentage": 57.41, "elapsed_time": "0:16:49", "remaining_time": "0:12:28", "throughput": 6432.02, "total_tokens": 6492864} +{"current_steps": 6770, "total_steps": 11784, "loss": 0.038, "lr": 9.149348076860685e-07, "epoch": 0.5745078071961982, "percentage": 57.45, "elapsed_time": "0:16:49", "remaining_time": "0:12:27", "throughput": 6434.27, "total_tokens": 6497216} +{"current_steps": 6775, "total_steps": 11784, "loss": 0.0373, "lr": 9.134590856033664e-07, "epoch": 0.5749321113374066, "percentage": 57.49, "elapsed_time": "0:16:50", "remaining_time": "0:12:26", "throughput": 6436.8, "total_tokens": 6501888} +{"current_steps": 6780, "total_steps": 11784, "loss": 0.0708, "lr": 9.11983553383433e-07, "epoch": 0.575356415478615, "percentage": 57.54, "elapsed_time": "0:16:50", "remaining_time": "0:12:25", "throughput": 6439.87, "total_tokens": 6507200} +{"current_steps": 6785, "total_steps": 11784, "loss": 0.0143, "lr": 9.105082142634489e-07, "epoch": 0.5757807196198235, "percentage": 57.58, "elapsed_time": "0:16:50", "remaining_time": "0:12:24", "throughput": 6445.63, "total_tokens": 6515840} +{"current_steps": 6790, "total_steps": 11784, "loss": 0.1098, "lr": 9.090330714801723e-07, "epoch": 0.5762050237610319, "percentage": 57.62, "elapsed_time": "0:16:51", "remaining_time": "0:12:23", "throughput": 6448.02, "total_tokens": 6520384} +{"current_steps": 6795, "total_steps": 11784, "loss": 0.0894, "lr": 9.075581282699294e-07, "epoch": 0.5766293279022403, "percentage": 57.66, "elapsed_time": "0:16:51", "remaining_time": "0:12:22", "throughput": 6450.49, "total_tokens": 6524992} +{"current_steps": 6800, "total_steps": 11784, "loss": 0.1289, "lr": 9.060833878686098e-07, "epoch": 0.5770536320434487, "percentage": 57.71, "elapsed_time": "0:16:51", "remaining_time": "0:12:21", "throughput": 6455.08, "total_tokens": 6532160} +{"current_steps": 6805, "total_steps": 11784, "loss": 0.0264, "lr": 9.046088535116581e-07, "epoch": 0.5774779361846571, "percentage": 57.75, "elapsed_time": "0:16:52", "remaining_time": "0:12:20", "throughput": 6457.21, "total_tokens": 6536384} +{"current_steps": 6810, "total_steps": 11784, "loss": 0.0253, "lr": 9.031345284340652e-07, "epoch": 0.5779022403258656, "percentage": 57.79, "elapsed_time": "0:16:52", "remaining_time": "0:12:19", "throughput": 6459.5, "total_tokens": 6540800} +{"current_steps": 6815, "total_steps": 11784, "loss": 0.1609, "lr": 9.016604158703654e-07, "epoch": 0.578326544467074, "percentage": 57.83, "elapsed_time": "0:16:52", "remaining_time": "0:12:18", "throughput": 6461.79, "total_tokens": 6545216} +{"current_steps": 6820, "total_steps": 11784, "loss": 0.0496, "lr": 9.001865190546244e-07, "epoch": 0.5787508486082824, "percentage": 57.88, "elapsed_time": "0:16:53", "remaining_time": "0:12:17", "throughput": 6464.7, "total_tokens": 6550400} +{"current_steps": 6825, "total_steps": 11784, "loss": 0.0241, "lr": 8.987128412204363e-07, "epoch": 0.5791751527494908, "percentage": 57.92, "elapsed_time": "0:16:53", "remaining_time": "0:12:16", "throughput": 6466.92, "total_tokens": 6554752} +{"current_steps": 6830, "total_steps": 11784, "loss": 0.0436, "lr": 8.972393856009132e-07, "epoch": 0.5795994568906992, "percentage": 57.96, "elapsed_time": "0:16:53", "remaining_time": "0:12:15", "throughput": 6469.58, "total_tokens": 6559616} +{"current_steps": 6835, "total_steps": 11784, "loss": 0.0387, "lr": 8.957661554286817e-07, "epoch": 0.5800237610319077, "percentage": 58.0, "elapsed_time": "0:16:54", "remaining_time": "0:12:14", "throughput": 6472.35, "total_tokens": 6564608} +{"current_steps": 6840, "total_steps": 11784, "loss": 0.0477, "lr": 8.942931539358718e-07, "epoch": 0.5804480651731161, "percentage": 58.04, "elapsed_time": "0:16:54", "remaining_time": "0:12:13", "throughput": 6474.65, "total_tokens": 6569024} +{"current_steps": 6845, "total_steps": 11784, "loss": 0.056, "lr": 8.928203843541131e-07, "epoch": 0.5808723693143245, "percentage": 58.09, "elapsed_time": "0:16:54", "remaining_time": "0:12:12", "throughput": 6477.44, "total_tokens": 6574016} +{"current_steps": 6850, "total_steps": 11784, "loss": 0.0153, "lr": 8.913478499145254e-07, "epoch": 0.5812966734555329, "percentage": 58.13, "elapsed_time": "0:16:55", "remaining_time": "0:12:11", "throughput": 6480.14, "total_tokens": 6578944} +{"current_steps": 6855, "total_steps": 11784, "loss": 0.0494, "lr": 8.898755538477138e-07, "epoch": 0.5817209775967414, "percentage": 58.17, "elapsed_time": "0:16:55", "remaining_time": "0:12:10", "throughput": 6483.12, "total_tokens": 6584192} +{"current_steps": 6860, "total_steps": 11784, "loss": 0.1016, "lr": 8.884034993837594e-07, "epoch": 0.5821452817379498, "percentage": 58.21, "elapsed_time": "0:16:55", "remaining_time": "0:12:09", "throughput": 6485.78, "total_tokens": 6589056} +{"current_steps": 6865, "total_steps": 11784, "loss": 0.0777, "lr": 8.869316897522141e-07, "epoch": 0.5825695858791582, "percentage": 58.26, "elapsed_time": "0:16:56", "remaining_time": "0:12:08", "throughput": 6488.08, "total_tokens": 6593536} +{"current_steps": 6870, "total_steps": 11784, "loss": 0.0843, "lr": 8.854601281820914e-07, "epoch": 0.5829938900203666, "percentage": 58.3, "elapsed_time": "0:16:56", "remaining_time": "0:12:07", "throughput": 6492.13, "total_tokens": 6600128} +{"current_steps": 6875, "total_steps": 11784, "loss": 0.0393, "lr": 8.839888179018621e-07, "epoch": 0.583418194161575, "percentage": 58.34, "elapsed_time": "0:16:56", "remaining_time": "0:12:06", "throughput": 6494.69, "total_tokens": 6604864} +{"current_steps": 6880, "total_steps": 11784, "loss": 0.0228, "lr": 8.825177621394449e-07, "epoch": 0.5838424983027835, "percentage": 58.38, "elapsed_time": "0:16:57", "remaining_time": "0:12:05", "throughput": 6497.31, "total_tokens": 6609728} +{"current_steps": 6885, "total_steps": 11784, "loss": 0.0307, "lr": 8.810469641222001e-07, "epoch": 0.5842668024439919, "percentage": 58.43, "elapsed_time": "0:16:57", "remaining_time": "0:12:04", "throughput": 6500.35, "total_tokens": 6615104} +{"current_steps": 6890, "total_steps": 11784, "loss": 0.0439, "lr": 8.795764270769221e-07, "epoch": 0.5846911065852003, "percentage": 58.47, "elapsed_time": "0:16:57", "remaining_time": "0:12:03", "throughput": 6503.1, "total_tokens": 6620096} +{"current_steps": 6895, "total_steps": 11784, "loss": 0.0911, "lr": 8.781061542298341e-07, "epoch": 0.5851154107264087, "percentage": 58.51, "elapsed_time": "0:16:58", "remaining_time": "0:12:02", "throughput": 6505.3, "total_tokens": 6624448} +{"current_steps": 6900, "total_steps": 11784, "loss": 0.1139, "lr": 8.766361488065783e-07, "epoch": 0.5855397148676171, "percentage": 58.55, "elapsed_time": "0:16:58", "remaining_time": "0:12:01", "throughput": 6507.52, "total_tokens": 6628800} +{"current_steps": 6905, "total_steps": 11784, "loss": 0.046, "lr": 8.751664140322112e-07, "epoch": 0.5859640190088256, "percentage": 58.6, "elapsed_time": "0:16:58", "remaining_time": "0:11:59", "throughput": 6510.15, "total_tokens": 6633664} +{"current_steps": 6910, "total_steps": 11784, "loss": 0.0924, "lr": 8.736969531311942e-07, "epoch": 0.586388323150034, "percentage": 58.64, "elapsed_time": "0:16:59", "remaining_time": "0:11:58", "throughput": 6512.92, "total_tokens": 6638720} +{"current_steps": 6915, "total_steps": 11784, "loss": 0.0468, "lr": 8.7222776932739e-07, "epoch": 0.5868126272912424, "percentage": 58.68, "elapsed_time": "0:16:59", "remaining_time": "0:11:57", "throughput": 6515.05, "total_tokens": 6643008} +{"current_steps": 6920, "total_steps": 11784, "loss": 0.0501, "lr": 8.70758865844051e-07, "epoch": 0.5872369314324508, "percentage": 58.72, "elapsed_time": "0:16:59", "remaining_time": "0:11:56", "throughput": 6517.23, "total_tokens": 6647360} +{"current_steps": 6925, "total_steps": 11784, "loss": 0.0371, "lr": 8.69290245903816e-07, "epoch": 0.5876612355736592, "percentage": 58.77, "elapsed_time": "0:17:00", "remaining_time": "0:11:55", "throughput": 6519.67, "total_tokens": 6652032} +{"current_steps": 6930, "total_steps": 11784, "loss": 0.0833, "lr": 8.678219127287018e-07, "epoch": 0.5880855397148677, "percentage": 58.81, "elapsed_time": "0:17:00", "remaining_time": "0:11:54", "throughput": 6521.79, "total_tokens": 6656320} +{"current_steps": 6935, "total_steps": 11784, "loss": 0.0731, "lr": 8.663538695400951e-07, "epoch": 0.5885098438560761, "percentage": 58.85, "elapsed_time": "0:17:00", "remaining_time": "0:11:53", "throughput": 6524.17, "total_tokens": 6660928} +{"current_steps": 6940, "total_steps": 11784, "loss": 0.027, "lr": 8.648861195587475e-07, "epoch": 0.5889341479972845, "percentage": 58.89, "elapsed_time": "0:17:01", "remaining_time": "0:11:52", "throughput": 6526.83, "total_tokens": 6665856} +{"current_steps": 6945, "total_steps": 11784, "loss": 0.0478, "lr": 8.634186660047663e-07, "epoch": 0.5893584521384929, "percentage": 58.94, "elapsed_time": "0:17:01", "remaining_time": "0:11:51", "throughput": 6528.93, "total_tokens": 6670144} +{"current_steps": 6950, "total_steps": 11784, "loss": 0.0928, "lr": 8.619515120976097e-07, "epoch": 0.5897827562797013, "percentage": 58.98, "elapsed_time": "0:17:01", "remaining_time": "0:11:50", "throughput": 6531.76, "total_tokens": 6675264} +{"current_steps": 6955, "total_steps": 11784, "loss": 0.13, "lr": 8.60484661056077e-07, "epoch": 0.5902070604209098, "percentage": 59.02, "elapsed_time": "0:17:02", "remaining_time": "0:11:49", "throughput": 6533.86, "total_tokens": 6679552} +{"current_steps": 6960, "total_steps": 11784, "loss": 0.0307, "lr": 8.590181160983043e-07, "epoch": 0.5906313645621182, "percentage": 59.06, "elapsed_time": "0:17:02", "remaining_time": "0:11:48", "throughput": 6535.99, "total_tokens": 6683904} +{"current_steps": 6965, "total_steps": 11784, "loss": 0.0527, "lr": 8.575518804417552e-07, "epoch": 0.5910556687033266, "percentage": 59.11, "elapsed_time": "0:17:02", "remaining_time": "0:11:47", "throughput": 6538.24, "total_tokens": 6688320} +{"current_steps": 6970, "total_steps": 11784, "loss": 0.0419, "lr": 8.560859573032161e-07, "epoch": 0.591479972844535, "percentage": 59.15, "elapsed_time": "0:17:03", "remaining_time": "0:11:46", "throughput": 6541.27, "total_tokens": 6693696} +{"current_steps": 6975, "total_steps": 11784, "loss": 0.0631, "lr": 8.546203498987861e-07, "epoch": 0.5919042769857433, "percentage": 59.19, "elapsed_time": "0:17:03", "remaining_time": "0:11:45", "throughput": 6543.82, "total_tokens": 6698496} +{"current_steps": 6980, "total_steps": 11784, "loss": 0.0257, "lr": 8.531550614438729e-07, "epoch": 0.5923285811269517, "percentage": 59.23, "elapsed_time": "0:17:03", "remaining_time": "0:11:44", "throughput": 6547.1, "total_tokens": 6704192} +{"current_steps": 6985, "total_steps": 11784, "loss": 0.0148, "lr": 8.516900951531832e-07, "epoch": 0.5927528852681602, "percentage": 59.28, "elapsed_time": "0:17:04", "remaining_time": "0:11:43", "throughput": 6549.22, "total_tokens": 6708480} +{"current_steps": 6990, "total_steps": 11784, "loss": 0.0556, "lr": 8.502254542407185e-07, "epoch": 0.5931771894093686, "percentage": 59.32, "elapsed_time": "0:17:04", "remaining_time": "0:11:42", "throughput": 6552.24, "total_tokens": 6713856} +{"current_steps": 6995, "total_steps": 11784, "loss": 0.1009, "lr": 8.487611419197653e-07, "epoch": 0.593601493550577, "percentage": 59.36, "elapsed_time": "0:17:05", "remaining_time": "0:11:41", "throughput": 6555.14, "total_tokens": 6719104} +{"current_steps": 7000, "total_steps": 11784, "loss": 0.0662, "lr": 8.472971614028895e-07, "epoch": 0.5940257976917854, "percentage": 59.4, "elapsed_time": "0:17:05", "remaining_time": "0:11:40", "throughput": 6557.19, "total_tokens": 6723328} +{"current_steps": 7005, "total_steps": 11784, "loss": 0.0326, "lr": 8.458335159019288e-07, "epoch": 0.5944501018329938, "percentage": 59.45, "elapsed_time": "0:17:05", "remaining_time": "0:11:39", "throughput": 6559.67, "total_tokens": 6728064} +{"current_steps": 7010, "total_steps": 11784, "loss": 0.0827, "lr": 8.443702086279866e-07, "epoch": 0.5948744059742023, "percentage": 59.49, "elapsed_time": "0:17:06", "remaining_time": "0:11:38", "throughput": 6562.22, "total_tokens": 6732864} +{"current_steps": 7015, "total_steps": 11784, "loss": 0.0157, "lr": 8.429072427914235e-07, "epoch": 0.5952987101154107, "percentage": 59.53, "elapsed_time": "0:17:06", "remaining_time": "0:11:37", "throughput": 6564.84, "total_tokens": 6737792} +{"current_steps": 7020, "total_steps": 11784, "loss": 0.0443, "lr": 8.414446216018516e-07, "epoch": 0.5957230142566191, "percentage": 59.57, "elapsed_time": "0:17:06", "remaining_time": "0:11:36", "throughput": 6567.6, "total_tokens": 6742848} +{"current_steps": 7025, "total_steps": 11784, "loss": 0.0561, "lr": 8.399823482681261e-07, "epoch": 0.5961473183978275, "percentage": 59.61, "elapsed_time": "0:17:07", "remaining_time": "0:11:35", "throughput": 6570.53, "total_tokens": 6748160} +{"current_steps": 7030, "total_steps": 11784, "loss": 0.0759, "lr": 8.385204259983403e-07, "epoch": 0.596571622539036, "percentage": 59.66, "elapsed_time": "0:17:07", "remaining_time": "0:11:34", "throughput": 6573.07, "total_tokens": 6752960} +{"current_steps": 7035, "total_steps": 11784, "loss": 0.0535, "lr": 8.37058857999816e-07, "epoch": 0.5969959266802444, "percentage": 59.7, "elapsed_time": "0:17:07", "remaining_time": "0:11:33", "throughput": 6575.8, "total_tokens": 6758016} +{"current_steps": 7040, "total_steps": 11784, "loss": 0.104, "lr": 8.355976474790987e-07, "epoch": 0.5974202308214528, "percentage": 59.74, "elapsed_time": "0:17:08", "remaining_time": "0:11:32", "throughput": 6578.22, "total_tokens": 6762688} +{"current_steps": 7045, "total_steps": 11784, "loss": 0.0365, "lr": 8.341367976419485e-07, "epoch": 0.5978445349626612, "percentage": 59.78, "elapsed_time": "0:17:08", "remaining_time": "0:11:31", "throughput": 6580.65, "total_tokens": 6767424} +{"current_steps": 7050, "total_steps": 11784, "loss": 0.0831, "lr": 8.326763116933359e-07, "epoch": 0.5982688391038696, "percentage": 59.83, "elapsed_time": "0:17:08", "remaining_time": "0:11:30", "throughput": 6582.65, "total_tokens": 6771648} +{"current_steps": 7055, "total_steps": 11784, "loss": 0.0632, "lr": 8.312161928374317e-07, "epoch": 0.598693143245078, "percentage": 59.87, "elapsed_time": "0:17:09", "remaining_time": "0:11:29", "throughput": 6585.46, "total_tokens": 6776832} +{"current_steps": 7060, "total_steps": 11784, "loss": 0.0284, "lr": 8.297564442776012e-07, "epoch": 0.5991174473862865, "percentage": 59.91, "elapsed_time": "0:17:09", "remaining_time": "0:11:28", "throughput": 6587.52, "total_tokens": 6781120} +{"current_steps": 7065, "total_steps": 11784, "loss": 0.0353, "lr": 8.282970692163988e-07, "epoch": 0.5995417515274949, "percentage": 59.95, "elapsed_time": "0:17:09", "remaining_time": "0:11:27", "throughput": 6589.66, "total_tokens": 6785472} +{"current_steps": 7070, "total_steps": 11784, "loss": 0.0856, "lr": 8.268380708555579e-07, "epoch": 0.5999660556687033, "percentage": 60.0, "elapsed_time": "0:17:10", "remaining_time": "0:11:26", "throughput": 6593.14, "total_tokens": 6791488} +{"current_steps": 7075, "total_steps": 11784, "loss": 0.081, "lr": 8.253794523959863e-07, "epoch": 0.6003903598099117, "percentage": 60.04, "elapsed_time": "0:17:10", "remaining_time": "0:11:25", "throughput": 6596.33, "total_tokens": 6797120} +{"current_steps": 7080, "total_steps": 11784, "loss": 0.0467, "lr": 8.239212170377576e-07, "epoch": 0.6008146639511202, "percentage": 60.08, "elapsed_time": "0:17:10", "remaining_time": "0:11:24", "throughput": 6598.86, "total_tokens": 6801984} +{"current_steps": 7080, "total_steps": 11784, "eval_loss": 0.05977928265929222, "epoch": 0.6008146639511202, "percentage": 60.08, "elapsed_time": "0:17:26", "remaining_time": "0:11:35", "throughput": 6498.94, "total_tokens": 6801984} +{"current_steps": 7085, "total_steps": 11784, "loss": 0.0763, "lr": 8.224633679801062e-07, "epoch": 0.6012389680923286, "percentage": 60.12, "elapsed_time": "0:18:08", "remaining_time": "0:12:01", "throughput": 6253.7, "total_tokens": 6806528} +{"current_steps": 7090, "total_steps": 11784, "loss": 0.1491, "lr": 8.210059084214176e-07, "epoch": 0.601663272233537, "percentage": 60.17, "elapsed_time": "0:18:08", "remaining_time": "0:12:00", "throughput": 6256.31, "total_tokens": 6811456} +{"current_steps": 7095, "total_steps": 11784, "loss": 0.027, "lr": 8.195488415592237e-07, "epoch": 0.6020875763747454, "percentage": 60.21, "elapsed_time": "0:18:09", "remaining_time": "0:11:59", "throughput": 6258.49, "total_tokens": 6815872} +{"current_steps": 7100, "total_steps": 11784, "loss": 0.0177, "lr": 8.180921705901941e-07, "epoch": 0.6025118805159538, "percentage": 60.25, "elapsed_time": "0:18:09", "remaining_time": "0:11:58", "throughput": 6261.54, "total_tokens": 6821376} +{"current_steps": 7105, "total_steps": 11784, "loss": 0.0499, "lr": 8.16635898710131e-07, "epoch": 0.6029361846571623, "percentage": 60.29, "elapsed_time": "0:18:09", "remaining_time": "0:11:57", "throughput": 6264.43, "total_tokens": 6826688} +{"current_steps": 7110, "total_steps": 11784, "loss": 0.0411, "lr": 8.151800291139596e-07, "epoch": 0.6033604887983707, "percentage": 60.34, "elapsed_time": "0:18:10", "remaining_time": "0:11:56", "throughput": 6267.05, "total_tokens": 6831680} +{"current_steps": 7115, "total_steps": 11784, "loss": 0.0234, "lr": 8.137245649957239e-07, "epoch": 0.6037847929395791, "percentage": 60.38, "elapsed_time": "0:18:10", "remaining_time": "0:11:55", "throughput": 6269.17, "total_tokens": 6836032} +{"current_steps": 7120, "total_steps": 11784, "loss": 0.0511, "lr": 8.122695095485767e-07, "epoch": 0.6042090970807875, "percentage": 60.42, "elapsed_time": "0:18:10", "remaining_time": "0:11:54", "throughput": 6271.46, "total_tokens": 6840576} +{"current_steps": 7125, "total_steps": 11784, "loss": 0.0629, "lr": 8.108148659647764e-07, "epoch": 0.6046334012219959, "percentage": 60.46, "elapsed_time": "0:18:11", "remaining_time": "0:11:53", "throughput": 6274.19, "total_tokens": 6845696} +{"current_steps": 7130, "total_steps": 11784, "loss": 0.0581, "lr": 8.093606374356758e-07, "epoch": 0.6050577053632044, "percentage": 60.51, "elapsed_time": "0:18:11", "remaining_time": "0:11:52", "throughput": 6276.25, "total_tokens": 6849984} +{"current_steps": 7135, "total_steps": 11784, "loss": 0.0429, "lr": 8.079068271517182e-07, "epoch": 0.6054820095044128, "percentage": 60.55, "elapsed_time": "0:18:11", "remaining_time": "0:11:51", "throughput": 6278.58, "total_tokens": 6854592} +{"current_steps": 7140, "total_steps": 11784, "loss": 0.1295, "lr": 8.064534383024284e-07, "epoch": 0.6059063136456212, "percentage": 60.59, "elapsed_time": "0:18:12", "remaining_time": "0:11:50", "throughput": 6281.19, "total_tokens": 6859584} +{"current_steps": 7145, "total_steps": 11784, "loss": 0.062, "lr": 8.050004740764082e-07, "epoch": 0.6063306177868296, "percentage": 60.63, "elapsed_time": "0:18:12", "remaining_time": "0:11:49", "throughput": 6283.62, "total_tokens": 6864320} +{"current_steps": 7150, "total_steps": 11784, "loss": 0.038, "lr": 8.035479376613261e-07, "epoch": 0.606754921928038, "percentage": 60.68, "elapsed_time": "0:18:12", "remaining_time": "0:11:48", "throughput": 6285.74, "total_tokens": 6868672} +{"current_steps": 7155, "total_steps": 11784, "loss": 0.0651, "lr": 8.020958322439132e-07, "epoch": 0.6071792260692465, "percentage": 60.72, "elapsed_time": "0:18:13", "remaining_time": "0:11:47", "throughput": 6287.92, "total_tokens": 6873088} +{"current_steps": 7160, "total_steps": 11784, "loss": 0.0519, "lr": 8.006441610099539e-07, "epoch": 0.6076035302104549, "percentage": 60.76, "elapsed_time": "0:18:13", "remaining_time": "0:11:46", "throughput": 6290.14, "total_tokens": 6877568} +{"current_steps": 7165, "total_steps": 11784, "loss": 0.1173, "lr": 7.991929271442817e-07, "epoch": 0.6080278343516633, "percentage": 60.8, "elapsed_time": "0:18:13", "remaining_time": "0:11:45", "throughput": 6292.41, "total_tokens": 6882112} +{"current_steps": 7170, "total_steps": 11784, "loss": 0.1062, "lr": 7.977421338307687e-07, "epoch": 0.6084521384928717, "percentage": 60.85, "elapsed_time": "0:18:14", "remaining_time": "0:11:44", "throughput": 6294.73, "total_tokens": 6886720} +{"current_steps": 7175, "total_steps": 11784, "loss": 0.0485, "lr": 7.962917842523215e-07, "epoch": 0.6088764426340801, "percentage": 60.89, "elapsed_time": "0:18:14", "remaining_time": "0:11:42", "throughput": 6297.25, "total_tokens": 6891584} +{"current_steps": 7180, "total_steps": 11784, "loss": 0.0222, "lr": 7.94841881590874e-07, "epoch": 0.6093007467752886, "percentage": 60.93, "elapsed_time": "0:18:14", "remaining_time": "0:11:41", "throughput": 6299.69, "total_tokens": 6896320} +{"current_steps": 7185, "total_steps": 11784, "loss": 0.1228, "lr": 7.933924290273774e-07, "epoch": 0.609725050916497, "percentage": 60.97, "elapsed_time": "0:18:15", "remaining_time": "0:11:40", "throughput": 6302.07, "total_tokens": 6900992} +{"current_steps": 7190, "total_steps": 11784, "loss": 0.0742, "lr": 7.919434297417976e-07, "epoch": 0.6101493550577054, "percentage": 61.01, "elapsed_time": "0:18:15", "remaining_time": "0:11:39", "throughput": 6304.36, "total_tokens": 6905600} +{"current_steps": 7195, "total_steps": 11784, "loss": 0.103, "lr": 7.904948869131039e-07, "epoch": 0.6105736591989138, "percentage": 61.06, "elapsed_time": "0:18:15", "remaining_time": "0:11:38", "throughput": 6306.65, "total_tokens": 6910208} +{"current_steps": 7200, "total_steps": 11784, "loss": 0.0421, "lr": 7.89046803719267e-07, "epoch": 0.6109979633401222, "percentage": 61.1, "elapsed_time": "0:18:16", "remaining_time": "0:11:37", "throughput": 6309.31, "total_tokens": 6915264} +{"current_steps": 7205, "total_steps": 11784, "loss": 0.0266, "lr": 7.875991833372463e-07, "epoch": 0.6114222674813307, "percentage": 61.14, "elapsed_time": "0:18:16", "remaining_time": "0:11:36", "throughput": 6311.57, "total_tokens": 6919808} +{"current_steps": 7210, "total_steps": 11784, "loss": 0.0321, "lr": 7.861520289429879e-07, "epoch": 0.6118465716225391, "percentage": 61.18, "elapsed_time": "0:18:16", "remaining_time": "0:11:35", "throughput": 6314.01, "total_tokens": 6924608} +{"current_steps": 7215, "total_steps": 11784, "loss": 0.0338, "lr": 7.847053437114141e-07, "epoch": 0.6122708757637475, "percentage": 61.23, "elapsed_time": "0:18:17", "remaining_time": "0:11:34", "throughput": 6316.44, "total_tokens": 6929344} +{"current_steps": 7220, "total_steps": 11784, "loss": 0.1193, "lr": 7.832591308164193e-07, "epoch": 0.6126951799049559, "percentage": 61.27, "elapsed_time": "0:18:17", "remaining_time": "0:11:33", "throughput": 6319.16, "total_tokens": 6934464} +{"current_steps": 7225, "total_steps": 11784, "loss": 0.0717, "lr": 7.818133934308606e-07, "epoch": 0.6131194840461643, "percentage": 61.31, "elapsed_time": "0:18:17", "remaining_time": "0:11:32", "throughput": 6321.76, "total_tokens": 6939456} +{"current_steps": 7230, "total_steps": 11784, "loss": 0.0072, "lr": 7.803681347265524e-07, "epoch": 0.6135437881873728, "percentage": 61.35, "elapsed_time": "0:18:18", "remaining_time": "0:11:31", "throughput": 6323.85, "total_tokens": 6943808} +{"current_steps": 7235, "total_steps": 11784, "loss": 0.0254, "lr": 7.789233578742583e-07, "epoch": 0.6139680923285811, "percentage": 61.4, "elapsed_time": "0:18:18", "remaining_time": "0:11:30", "throughput": 6326.39, "total_tokens": 6948736} +{"current_steps": 7240, "total_steps": 11784, "loss": 0.0926, "lr": 7.774790660436857e-07, "epoch": 0.6143923964697895, "percentage": 61.44, "elapsed_time": "0:18:18", "remaining_time": "0:11:29", "throughput": 6329.04, "total_tokens": 6953792} +{"current_steps": 7245, "total_steps": 11784, "loss": 0.0226, "lr": 7.760352624034769e-07, "epoch": 0.6148167006109979, "percentage": 61.48, "elapsed_time": "0:18:19", "remaining_time": "0:11:28", "throughput": 6331.55, "total_tokens": 6958656} +{"current_steps": 7250, "total_steps": 11784, "loss": 0.0326, "lr": 7.745919501212043e-07, "epoch": 0.6152410047522063, "percentage": 61.52, "elapsed_time": "0:18:19", "remaining_time": "0:11:27", "throughput": 6333.77, "total_tokens": 6963200} +{"current_steps": 7255, "total_steps": 11784, "loss": 0.0307, "lr": 7.731491323633608e-07, "epoch": 0.6156653088934148, "percentage": 61.57, "elapsed_time": "0:18:19", "remaining_time": "0:11:26", "throughput": 6336.58, "total_tokens": 6968448} +{"current_steps": 7260, "total_steps": 11784, "loss": 0.0835, "lr": 7.71706812295356e-07, "epoch": 0.6160896130346232, "percentage": 61.61, "elapsed_time": "0:18:20", "remaining_time": "0:11:25", "throughput": 6339.52, "total_tokens": 6973888} +{"current_steps": 7265, "total_steps": 11784, "loss": 0.0817, "lr": 7.702649930815065e-07, "epoch": 0.6165139171758316, "percentage": 61.65, "elapsed_time": "0:18:20", "remaining_time": "0:11:24", "throughput": 6341.61, "total_tokens": 6978304} +{"current_steps": 7270, "total_steps": 11784, "loss": 0.0605, "lr": 7.688236778850306e-07, "epoch": 0.61693822131704, "percentage": 61.69, "elapsed_time": "0:18:20", "remaining_time": "0:11:23", "throughput": 6344.09, "total_tokens": 6983168} +{"current_steps": 7275, "total_steps": 11784, "loss": 0.0463, "lr": 7.6738286986804e-07, "epoch": 0.6173625254582484, "percentage": 61.74, "elapsed_time": "0:18:21", "remaining_time": "0:11:22", "throughput": 6346.73, "total_tokens": 6988224} +{"current_steps": 7280, "total_steps": 11784, "loss": 0.0742, "lr": 7.659425721915351e-07, "epoch": 0.6177868295994569, "percentage": 61.78, "elapsed_time": "0:18:21", "remaining_time": "0:11:21", "throughput": 6348.69, "total_tokens": 6992448} +{"current_steps": 7285, "total_steps": 11784, "loss": 0.0507, "lr": 7.645027880153956e-07, "epoch": 0.6182111337406653, "percentage": 61.82, "elapsed_time": "0:18:21", "remaining_time": "0:11:20", "throughput": 6350.8, "total_tokens": 6996864} +{"current_steps": 7290, "total_steps": 11784, "loss": 0.0316, "lr": 7.63063520498375e-07, "epoch": 0.6186354378818737, "percentage": 61.86, "elapsed_time": "0:18:22", "remaining_time": "0:11:19", "throughput": 6353.48, "total_tokens": 7001984} +{"current_steps": 7295, "total_steps": 11784, "loss": 0.0762, "lr": 7.616247727980927e-07, "epoch": 0.6190597420230821, "percentage": 61.91, "elapsed_time": "0:18:22", "remaining_time": "0:11:18", "throughput": 6356.2, "total_tokens": 7007168} +{"current_steps": 7300, "total_steps": 11784, "loss": 0.0582, "lr": 7.601865480710289e-07, "epoch": 0.6194840461642905, "percentage": 61.95, "elapsed_time": "0:18:22", "remaining_time": "0:11:17", "throughput": 6358.98, "total_tokens": 7012416} +{"current_steps": 7305, "total_steps": 11784, "loss": 0.0792, "lr": 7.587488494725156e-07, "epoch": 0.619908350305499, "percentage": 61.99, "elapsed_time": "0:18:23", "remaining_time": "0:11:16", "throughput": 6360.91, "total_tokens": 7016576} +{"current_steps": 7310, "total_steps": 11784, "loss": 0.0391, "lr": 7.573116801567301e-07, "epoch": 0.6203326544467074, "percentage": 62.03, "elapsed_time": "0:18:23", "remaining_time": "0:11:15", "throughput": 6363.06, "total_tokens": 7021056} +{"current_steps": 7315, "total_steps": 11784, "loss": 0.091, "lr": 7.558750432766901e-07, "epoch": 0.6207569585879158, "percentage": 62.08, "elapsed_time": "0:18:23", "remaining_time": "0:11:14", "throughput": 6365.54, "total_tokens": 7025984} +{"current_steps": 7320, "total_steps": 11784, "loss": 0.0191, "lr": 7.544389419842429e-07, "epoch": 0.6211812627291242, "percentage": 62.12, "elapsed_time": "0:18:24", "remaining_time": "0:11:13", "throughput": 6368.23, "total_tokens": 7031104} +{"current_steps": 7325, "total_steps": 11784, "loss": 0.0467, "lr": 7.530033794300631e-07, "epoch": 0.6216055668703326, "percentage": 62.16, "elapsed_time": "0:18:24", "remaining_time": "0:11:12", "throughput": 6370.68, "total_tokens": 7035904} +{"current_steps": 7330, "total_steps": 11784, "loss": 0.1059, "lr": 7.515683587636412e-07, "epoch": 0.6220298710115411, "percentage": 62.2, "elapsed_time": "0:18:24", "remaining_time": "0:11:11", "throughput": 6372.86, "total_tokens": 7040384} +{"current_steps": 7335, "total_steps": 11784, "loss": 0.0149, "lr": 7.501338831332813e-07, "epoch": 0.6224541751527495, "percentage": 62.25, "elapsed_time": "0:18:25", "remaining_time": "0:11:10", "throughput": 6375.38, "total_tokens": 7045312} +{"current_steps": 7340, "total_steps": 11784, "loss": 0.034, "lr": 7.486999556860889e-07, "epoch": 0.6228784792939579, "percentage": 62.29, "elapsed_time": "0:18:25", "remaining_time": "0:11:09", "throughput": 6377.75, "total_tokens": 7050048} +{"current_steps": 7345, "total_steps": 11784, "loss": 0.0281, "lr": 7.472665795679694e-07, "epoch": 0.6233027834351663, "percentage": 62.33, "elapsed_time": "0:18:25", "remaining_time": "0:11:08", "throughput": 6379.97, "total_tokens": 7054592} +{"current_steps": 7350, "total_steps": 11784, "loss": 0.0953, "lr": 7.458337579236168e-07, "epoch": 0.6237270875763747, "percentage": 62.37, "elapsed_time": "0:18:26", "remaining_time": "0:11:07", "throughput": 6382.38, "total_tokens": 7059392} +{"current_steps": 7355, "total_steps": 11784, "loss": 0.113, "lr": 7.4440149389651e-07, "epoch": 0.6241513917175832, "percentage": 62.42, "elapsed_time": "0:18:26", "remaining_time": "0:11:06", "throughput": 6384.3, "total_tokens": 7063552} +{"current_steps": 7360, "total_steps": 11784, "loss": 0.0476, "lr": 7.429697906289029e-07, "epoch": 0.6245756958587916, "percentage": 62.46, "elapsed_time": "0:18:26", "remaining_time": "0:11:05", "throughput": 6386.67, "total_tokens": 7068288} +{"current_steps": 7365, "total_steps": 11784, "loss": 0.005, "lr": 7.415386512618216e-07, "epoch": 0.625, "percentage": 62.5, "elapsed_time": "0:18:27", "remaining_time": "0:11:04", "throughput": 6389.18, "total_tokens": 7073216} +{"current_steps": 7370, "total_steps": 11784, "loss": 0.0727, "lr": 7.401080789350525e-07, "epoch": 0.6254243041412084, "percentage": 62.54, "elapsed_time": "0:18:27", "remaining_time": "0:11:03", "throughput": 6391.41, "total_tokens": 7077824} +{"current_steps": 7375, "total_steps": 11784, "loss": 0.0238, "lr": 7.386780767871396e-07, "epoch": 0.6258486082824168, "percentage": 62.58, "elapsed_time": "0:18:27", "remaining_time": "0:11:02", "throughput": 6393.49, "total_tokens": 7082240} +{"current_steps": 7380, "total_steps": 11784, "loss": 0.078, "lr": 7.372486479553748e-07, "epoch": 0.6262729124236253, "percentage": 62.63, "elapsed_time": "0:18:28", "remaining_time": "0:11:01", "throughput": 6396.12, "total_tokens": 7087360} +{"current_steps": 7385, "total_steps": 11784, "loss": 0.0387, "lr": 7.358197955757939e-07, "epoch": 0.6266972165648337, "percentage": 62.67, "elapsed_time": "0:18:28", "remaining_time": "0:11:00", "throughput": 6398.66, "total_tokens": 7092288} +{"current_steps": 7390, "total_steps": 11784, "loss": 0.0334, "lr": 7.343915227831661e-07, "epoch": 0.6271215207060421, "percentage": 62.71, "elapsed_time": "0:18:28", "remaining_time": "0:10:59", "throughput": 6400.8, "total_tokens": 7096768} +{"current_steps": 7395, "total_steps": 11784, "loss": 0.0722, "lr": 7.329638327109902e-07, "epoch": 0.6275458248472505, "percentage": 62.75, "elapsed_time": "0:18:29", "remaining_time": "0:10:58", "throughput": 6402.99, "total_tokens": 7101312} +{"current_steps": 7400, "total_steps": 11784, "loss": 0.0878, "lr": 7.315367284914861e-07, "epoch": 0.6279701289884589, "percentage": 62.8, "elapsed_time": "0:18:29", "remaining_time": "0:10:57", "throughput": 6405.02, "total_tokens": 7105664} +{"current_steps": 7405, "total_steps": 11784, "loss": 0.0652, "lr": 7.301102132555891e-07, "epoch": 0.6283944331296674, "percentage": 62.84, "elapsed_time": "0:18:29", "remaining_time": "0:10:56", "throughput": 6407.21, "total_tokens": 7110208} +{"current_steps": 7410, "total_steps": 11784, "loss": 0.004, "lr": 7.286842901329412e-07, "epoch": 0.6288187372708758, "percentage": 62.88, "elapsed_time": "0:18:30", "remaining_time": "0:10:55", "throughput": 6409.7, "total_tokens": 7115136} +{"current_steps": 7415, "total_steps": 11784, "loss": 0.0337, "lr": 7.272589622518863e-07, "epoch": 0.6292430414120842, "percentage": 62.92, "elapsed_time": "0:18:30", "remaining_time": "0:10:54", "throughput": 6411.8, "total_tokens": 7119552} +{"current_steps": 7420, "total_steps": 11784, "loss": 0.0522, "lr": 7.258342327394616e-07, "epoch": 0.6296673455532926, "percentage": 62.97, "elapsed_time": "0:18:30", "remaining_time": "0:10:53", "throughput": 6414.18, "total_tokens": 7124352} +{"current_steps": 7425, "total_steps": 11784, "loss": 0.0316, "lr": 7.244101047213927e-07, "epoch": 0.630091649694501, "percentage": 63.01, "elapsed_time": "0:18:31", "remaining_time": "0:10:52", "throughput": 6416.28, "total_tokens": 7128768} +{"current_steps": 7430, "total_steps": 11784, "loss": 0.0971, "lr": 7.229865813220843e-07, "epoch": 0.6305159538357095, "percentage": 63.05, "elapsed_time": "0:18:31", "remaining_time": "0:10:51", "throughput": 6418.67, "total_tokens": 7133568} +{"current_steps": 7435, "total_steps": 11784, "loss": 0.0625, "lr": 7.215636656646151e-07, "epoch": 0.6309402579769179, "percentage": 63.09, "elapsed_time": "0:18:31", "remaining_time": "0:10:50", "throughput": 6420.88, "total_tokens": 7138112} +{"current_steps": 7440, "total_steps": 11784, "loss": 0.0557, "lr": 7.201413608707312e-07, "epoch": 0.6313645621181263, "percentage": 63.14, "elapsed_time": "0:18:32", "remaining_time": "0:10:49", "throughput": 6423.2, "total_tokens": 7142848} +{"current_steps": 7445, "total_steps": 11784, "loss": 0.0781, "lr": 7.187196700608372e-07, "epoch": 0.6317888662593347, "percentage": 63.18, "elapsed_time": "0:18:32", "remaining_time": "0:10:48", "throughput": 6425.53, "total_tokens": 7147584} +{"current_steps": 7450, "total_steps": 11784, "loss": 0.0768, "lr": 7.172985963539919e-07, "epoch": 0.6322131704005431, "percentage": 63.22, "elapsed_time": "0:18:32", "remaining_time": "0:10:47", "throughput": 6427.77, "total_tokens": 7152192} +{"current_steps": 7455, "total_steps": 11784, "loss": 0.0604, "lr": 7.158781428678989e-07, "epoch": 0.6326374745417516, "percentage": 63.26, "elapsed_time": "0:18:33", "remaining_time": "0:10:46", "throughput": 6429.91, "total_tokens": 7156672} +{"current_steps": 7460, "total_steps": 11784, "loss": 0.0545, "lr": 7.144583127189028e-07, "epoch": 0.63306177868296, "percentage": 63.31, "elapsed_time": "0:18:33", "remaining_time": "0:10:45", "throughput": 6432.42, "total_tokens": 7161664} +{"current_steps": 7465, "total_steps": 11784, "loss": 0.0352, "lr": 7.130391090219789e-07, "epoch": 0.6334860828241684, "percentage": 63.35, "elapsed_time": "0:18:33", "remaining_time": "0:10:44", "throughput": 6434.45, "total_tokens": 7166016} +{"current_steps": 7470, "total_steps": 11784, "loss": 0.0671, "lr": 7.116205348907298e-07, "epoch": 0.6339103869653768, "percentage": 63.39, "elapsed_time": "0:18:34", "remaining_time": "0:10:43", "throughput": 6436.76, "total_tokens": 7170752} +{"current_steps": 7475, "total_steps": 11784, "loss": 0.0546, "lr": 7.10202593437375e-07, "epoch": 0.6343346911065852, "percentage": 63.43, "elapsed_time": "0:18:34", "remaining_time": "0:10:42", "throughput": 6439.5, "total_tokens": 7176064} +{"current_steps": 7480, "total_steps": 11784, "loss": 0.0549, "lr": 7.08785287772748e-07, "epoch": 0.6347589952477937, "percentage": 63.48, "elapsed_time": "0:18:34", "remaining_time": "0:10:41", "throughput": 6442.16, "total_tokens": 7181312} +{"current_steps": 7485, "total_steps": 11784, "loss": 0.095, "lr": 7.073686210062859e-07, "epoch": 0.6351832993890021, "percentage": 63.52, "elapsed_time": "0:18:35", "remaining_time": "0:10:40", "throughput": 6444.58, "total_tokens": 7186176} +{"current_steps": 7490, "total_steps": 11784, "loss": 0.0358, "lr": 7.059525962460248e-07, "epoch": 0.6356076035302105, "percentage": 63.56, "elapsed_time": "0:18:35", "remaining_time": "0:10:39", "throughput": 6446.79, "total_tokens": 7190784} +{"current_steps": 7495, "total_steps": 11784, "loss": 0.0687, "lr": 7.045372165985919e-07, "epoch": 0.6360319076714189, "percentage": 63.6, "elapsed_time": "0:18:35", "remaining_time": "0:10:38", "throughput": 6449.34, "total_tokens": 7195840} +{"current_steps": 7500, "total_steps": 11784, "loss": 0.0347, "lr": 7.031224851691999e-07, "epoch": 0.6364562118126272, "percentage": 63.65, "elapsed_time": "0:18:36", "remaining_time": "0:10:37", "throughput": 6451.99, "total_tokens": 7201024} +{"current_steps": 7505, "total_steps": 11784, "loss": 0.0709, "lr": 7.017084050616385e-07, "epoch": 0.6368805159538357, "percentage": 63.69, "elapsed_time": "0:18:36", "remaining_time": "0:10:36", "throughput": 6454.28, "total_tokens": 7205760} +{"current_steps": 7510, "total_steps": 11784, "loss": 0.0482, "lr": 7.002949793782686e-07, "epoch": 0.6373048200950441, "percentage": 63.73, "elapsed_time": "0:18:36", "remaining_time": "0:10:35", "throughput": 6456.64, "total_tokens": 7210560} +{"current_steps": 7515, "total_steps": 11784, "loss": 0.0555, "lr": 6.988822112200156e-07, "epoch": 0.6377291242362525, "percentage": 63.77, "elapsed_time": "0:18:37", "remaining_time": "0:10:34", "throughput": 6459.09, "total_tokens": 7215488} +{"current_steps": 7520, "total_steps": 11784, "loss": 0.0843, "lr": 6.974701036863626e-07, "epoch": 0.6381534283774609, "percentage": 63.82, "elapsed_time": "0:18:37", "remaining_time": "0:10:33", "throughput": 6461.67, "total_tokens": 7220608} +{"current_steps": 7525, "total_steps": 11784, "loss": 0.029, "lr": 6.960586598753426e-07, "epoch": 0.6385777325186693, "percentage": 63.86, "elapsed_time": "0:18:37", "remaining_time": "0:10:32", "throughput": 6463.93, "total_tokens": 7225280} +{"current_steps": 7530, "total_steps": 11784, "loss": 0.0439, "lr": 6.946478828835331e-07, "epoch": 0.6390020366598778, "percentage": 63.9, "elapsed_time": "0:18:38", "remaining_time": "0:10:31", "throughput": 6465.99, "total_tokens": 7229696} +{"current_steps": 7535, "total_steps": 11784, "loss": 0.0425, "lr": 6.932377758060481e-07, "epoch": 0.6394263408010862, "percentage": 63.94, "elapsed_time": "0:18:38", "remaining_time": "0:10:30", "throughput": 6467.92, "total_tokens": 7233984} +{"current_steps": 7540, "total_steps": 11784, "loss": 0.0495, "lr": 6.91828341736533e-07, "epoch": 0.6398506449422946, "percentage": 63.99, "elapsed_time": "0:18:38", "remaining_time": "0:10:29", "throughput": 6471.16, "total_tokens": 7239936} +{"current_steps": 7545, "total_steps": 11784, "loss": 0.0334, "lr": 6.904195837671552e-07, "epoch": 0.640274949083503, "percentage": 64.03, "elapsed_time": "0:18:39", "remaining_time": "0:10:28", "throughput": 6473.29, "total_tokens": 7244480} +{"current_steps": 7550, "total_steps": 11784, "loss": 0.0664, "lr": 6.890115049885994e-07, "epoch": 0.6406992532247114, "percentage": 64.07, "elapsed_time": "0:18:39", "remaining_time": "0:10:27", "throughput": 6475.41, "total_tokens": 7248960} +{"current_steps": 7555, "total_steps": 11784, "loss": 0.0579, "lr": 6.87604108490061e-07, "epoch": 0.6411235573659199, "percentage": 64.11, "elapsed_time": "0:18:39", "remaining_time": "0:10:26", "throughput": 6477.87, "total_tokens": 7253888} +{"current_steps": 7560, "total_steps": 11784, "loss": 0.0378, "lr": 6.861973973592372e-07, "epoch": 0.6415478615071283, "percentage": 64.15, "elapsed_time": "0:18:40", "remaining_time": "0:10:25", "throughput": 6480.62, "total_tokens": 7259200} +{"current_steps": 7565, "total_steps": 11784, "loss": 0.0178, "lr": 6.847913746823227e-07, "epoch": 0.6419721656483367, "percentage": 64.2, "elapsed_time": "0:18:40", "remaining_time": "0:10:24", "throughput": 6482.8, "total_tokens": 7263808} +{"current_steps": 7570, "total_steps": 11784, "loss": 0.0296, "lr": 6.833860435440006e-07, "epoch": 0.6423964697895451, "percentage": 64.24, "elapsed_time": "0:18:40", "remaining_time": "0:10:23", "throughput": 6485.64, "total_tokens": 7269248} +{"current_steps": 7575, "total_steps": 11784, "loss": 0.0162, "lr": 6.819814070274384e-07, "epoch": 0.6428207739307535, "percentage": 64.28, "elapsed_time": "0:18:41", "remaining_time": "0:10:22", "throughput": 6488.34, "total_tokens": 7274496} +{"current_steps": 7580, "total_steps": 11784, "loss": 0.1676, "lr": 6.805774682142782e-07, "epoch": 0.643245078071962, "percentage": 64.32, "elapsed_time": "0:18:41", "remaining_time": "0:10:22", "throughput": 6490.89, "total_tokens": 7279552} +{"current_steps": 7585, "total_steps": 11784, "loss": 0.0954, "lr": 6.791742301846325e-07, "epoch": 0.6436693822131704, "percentage": 64.37, "elapsed_time": "0:18:41", "remaining_time": "0:10:21", "throughput": 6493.04, "total_tokens": 7284096} +{"current_steps": 7590, "total_steps": 11784, "loss": 0.039, "lr": 6.777716960170752e-07, "epoch": 0.6440936863543788, "percentage": 64.41, "elapsed_time": "0:18:42", "remaining_time": "0:10:20", "throughput": 6495.54, "total_tokens": 7289088} +{"current_steps": 7595, "total_steps": 11784, "loss": 0.1273, "lr": 6.763698687886372e-07, "epoch": 0.6445179904955872, "percentage": 64.45, "elapsed_time": "0:18:42", "remaining_time": "0:10:19", "throughput": 6497.71, "total_tokens": 7293696} +{"current_steps": 7600, "total_steps": 11784, "loss": 0.047, "lr": 6.749687515747977e-07, "epoch": 0.6449422946367956, "percentage": 64.49, "elapsed_time": "0:18:42", "remaining_time": "0:10:18", "throughput": 6500.28, "total_tokens": 7298816} +{"current_steps": 7605, "total_steps": 11784, "loss": 0.0821, "lr": 6.735683474494784e-07, "epoch": 0.6453665987780041, "percentage": 64.54, "elapsed_time": "0:18:43", "remaining_time": "0:10:17", "throughput": 6502.31, "total_tokens": 7303232} +{"current_steps": 7610, "total_steps": 11784, "loss": 0.0391, "lr": 6.721686594850362e-07, "epoch": 0.6457909029192125, "percentage": 64.58, "elapsed_time": "0:18:43", "remaining_time": "0:10:16", "throughput": 6504.95, "total_tokens": 7308416} +{"current_steps": 7615, "total_steps": 11784, "loss": 0.0789, "lr": 6.707696907522577e-07, "epoch": 0.6462152070604209, "percentage": 64.62, "elapsed_time": "0:18:43", "remaining_time": "0:10:15", "throughput": 6507.11, "total_tokens": 7313024} +{"current_steps": 7620, "total_steps": 11784, "loss": 0.0668, "lr": 6.693714443203507e-07, "epoch": 0.6466395112016293, "percentage": 64.66, "elapsed_time": "0:18:44", "remaining_time": "0:10:14", "throughput": 6509.39, "total_tokens": 7317760} +{"current_steps": 7625, "total_steps": 11784, "loss": 0.0315, "lr": 6.679739232569388e-07, "epoch": 0.6470638153428377, "percentage": 64.71, "elapsed_time": "0:18:44", "remaining_time": "0:10:13", "throughput": 6511.75, "total_tokens": 7322624} +{"current_steps": 7630, "total_steps": 11784, "loss": 0.0434, "lr": 6.665771306280537e-07, "epoch": 0.6474881194840462, "percentage": 64.75, "elapsed_time": "0:18:44", "remaining_time": "0:10:12", "throughput": 6513.79, "total_tokens": 7327104} +{"current_steps": 7635, "total_steps": 11784, "loss": 0.0735, "lr": 6.651810694981299e-07, "epoch": 0.6479124236252546, "percentage": 64.79, "elapsed_time": "0:18:45", "remaining_time": "0:10:11", "throughput": 6515.78, "total_tokens": 7331520} +{"current_steps": 7640, "total_steps": 11784, "loss": 0.0712, "lr": 6.637857429299958e-07, "epoch": 0.648336727766463, "percentage": 64.83, "elapsed_time": "0:18:45", "remaining_time": "0:10:10", "throughput": 6518.19, "total_tokens": 7336448} +{"current_steps": 7645, "total_steps": 11784, "loss": 0.0669, "lr": 6.623911539848697e-07, "epoch": 0.6487610319076714, "percentage": 64.88, "elapsed_time": "0:18:45", "remaining_time": "0:10:09", "throughput": 6520.47, "total_tokens": 7341248} +{"current_steps": 7650, "total_steps": 11784, "loss": 0.0657, "lr": 6.6099730572235e-07, "epoch": 0.6491853360488798, "percentage": 64.92, "elapsed_time": "0:18:46", "remaining_time": "0:10:08", "throughput": 6522.65, "total_tokens": 7345920} +{"current_steps": 7655, "total_steps": 11784, "loss": 0.0214, "lr": 6.596042012004119e-07, "epoch": 0.6496096401900883, "percentage": 64.96, "elapsed_time": "0:18:46", "remaining_time": "0:10:07", "throughput": 6524.76, "total_tokens": 7350464} +{"current_steps": 7660, "total_steps": 11784, "loss": 0.0931, "lr": 6.582118434753973e-07, "epoch": 0.6500339443312967, "percentage": 65.0, "elapsed_time": "0:18:46", "remaining_time": "0:10:06", "throughput": 6526.88, "total_tokens": 7355008} +{"current_steps": 7665, "total_steps": 11784, "loss": 0.0362, "lr": 6.568202356020108e-07, "epoch": 0.6504582484725051, "percentage": 65.05, "elapsed_time": "0:18:47", "remaining_time": "0:10:05", "throughput": 6529.1, "total_tokens": 7359680} +{"current_steps": 7670, "total_steps": 11784, "loss": 0.0404, "lr": 6.554293806333109e-07, "epoch": 0.6508825526137135, "percentage": 65.09, "elapsed_time": "0:18:47", "remaining_time": "0:10:04", "throughput": 6531.01, "total_tokens": 7363968} +{"current_steps": 7670, "total_steps": 11784, "eval_loss": 0.05561085045337677, "epoch": 0.6508825526137135, "percentage": 65.09, "elapsed_time": "0:19:03", "remaining_time": "0:10:13", "throughput": 6440.54, "total_tokens": 7363968} +{"current_steps": 7675, "total_steps": 11784, "loss": 0.108, "lr": 6.540392816207054e-07, "epoch": 0.651306856754922, "percentage": 65.13, "elapsed_time": "0:19:41", "remaining_time": "0:10:32", "throughput": 6237.6, "total_tokens": 7368064} +{"current_steps": 7680, "total_steps": 11784, "loss": 0.0423, "lr": 6.52649941613943e-07, "epoch": 0.6517311608961304, "percentage": 65.17, "elapsed_time": "0:19:41", "remaining_time": "0:10:31", "throughput": 6239.8, "total_tokens": 7372736} +{"current_steps": 7685, "total_steps": 11784, "loss": 0.0361, "lr": 6.512613636611068e-07, "epoch": 0.6521554650373388, "percentage": 65.22, "elapsed_time": "0:19:41", "remaining_time": "0:10:30", "throughput": 6242.17, "total_tokens": 7377600} +{"current_steps": 7690, "total_steps": 11784, "loss": 0.0238, "lr": 6.498735508086093e-07, "epoch": 0.6525797691785472, "percentage": 65.26, "elapsed_time": "0:19:42", "remaining_time": "0:10:29", "throughput": 6244.13, "total_tokens": 7381952} +{"current_steps": 7695, "total_steps": 11784, "loss": 0.0258, "lr": 6.484865061011829e-07, "epoch": 0.6530040733197556, "percentage": 65.3, "elapsed_time": "0:19:42", "remaining_time": "0:10:28", "throughput": 6246.83, "total_tokens": 7387264} +{"current_steps": 7700, "total_steps": 11784, "loss": 0.0708, "lr": 6.471002325818761e-07, "epoch": 0.653428377460964, "percentage": 65.34, "elapsed_time": "0:19:42", "remaining_time": "0:10:27", "throughput": 6249.04, "total_tokens": 7391936} +{"current_steps": 7705, "total_steps": 11784, "loss": 0.0622, "lr": 6.45714733292044e-07, "epoch": 0.6538526816021725, "percentage": 65.39, "elapsed_time": "0:19:43", "remaining_time": "0:10:26", "throughput": 6251.71, "total_tokens": 7397248} +{"current_steps": 7710, "total_steps": 11784, "loss": 0.0741, "lr": 6.443300112713452e-07, "epoch": 0.6542769857433809, "percentage": 65.43, "elapsed_time": "0:19:43", "remaining_time": "0:10:25", "throughput": 6253.9, "total_tokens": 7401920} +{"current_steps": 7715, "total_steps": 11784, "loss": 0.0523, "lr": 6.429460695577309e-07, "epoch": 0.6547012898845893, "percentage": 65.47, "elapsed_time": "0:19:43", "remaining_time": "0:10:24", "throughput": 6256.35, "total_tokens": 7406912} +{"current_steps": 7720, "total_steps": 11784, "loss": 0.0758, "lr": 6.415629111874418e-07, "epoch": 0.6551255940257977, "percentage": 65.51, "elapsed_time": "0:19:44", "remaining_time": "0:10:23", "throughput": 6258.69, "total_tokens": 7411776} +{"current_steps": 7725, "total_steps": 11784, "loss": 0.0585, "lr": 6.401805391949989e-07, "epoch": 0.6555498981670062, "percentage": 65.55, "elapsed_time": "0:19:44", "remaining_time": "0:10:22", "throughput": 6260.67, "total_tokens": 7416128} +{"current_steps": 7730, "total_steps": 11784, "loss": 0.0381, "lr": 6.387989566131996e-07, "epoch": 0.6559742023082146, "percentage": 65.6, "elapsed_time": "0:19:44", "remaining_time": "0:10:21", "throughput": 6263.88, "total_tokens": 7422208} +{"current_steps": 7735, "total_steps": 11784, "loss": 0.0647, "lr": 6.374181664731076e-07, "epoch": 0.656398506449423, "percentage": 65.64, "elapsed_time": "0:19:45", "remaining_time": "0:10:20", "throughput": 6266.17, "total_tokens": 7427008} +{"current_steps": 7740, "total_steps": 11784, "loss": 0.053, "lr": 6.360381718040493e-07, "epoch": 0.6568228105906314, "percentage": 65.68, "elapsed_time": "0:19:45", "remaining_time": "0:10:19", "throughput": 6268.22, "total_tokens": 7431488} +{"current_steps": 7745, "total_steps": 11784, "loss": 0.0645, "lr": 6.34658975633605e-07, "epoch": 0.6572471147318398, "percentage": 65.72, "elapsed_time": "0:19:45", "remaining_time": "0:10:18", "throughput": 6270.69, "total_tokens": 7436544} +{"current_steps": 7750, "total_steps": 11784, "loss": 0.0631, "lr": 6.332805809876041e-07, "epoch": 0.6576714188730483, "percentage": 65.77, "elapsed_time": "0:19:46", "remaining_time": "0:10:17", "throughput": 6272.65, "total_tokens": 7440896} +{"current_steps": 7755, "total_steps": 11784, "loss": 0.1176, "lr": 6.319029908901168e-07, "epoch": 0.6580957230142567, "percentage": 65.81, "elapsed_time": "0:19:46", "remaining_time": "0:10:16", "throughput": 6275.05, "total_tokens": 7445824} +{"current_steps": 7760, "total_steps": 11784, "loss": 0.0954, "lr": 6.305262083634487e-07, "epoch": 0.658520027155465, "percentage": 65.85, "elapsed_time": "0:19:46", "remaining_time": "0:10:15", "throughput": 6277.16, "total_tokens": 7450368} +{"current_steps": 7765, "total_steps": 11784, "loss": 0.0724, "lr": 6.29150236428133e-07, "epoch": 0.6589443312966734, "percentage": 65.89, "elapsed_time": "0:19:47", "remaining_time": "0:10:14", "throughput": 6279.06, "total_tokens": 7454720} +{"current_steps": 7770, "total_steps": 11784, "loss": 0.0929, "lr": 6.277750781029254e-07, "epoch": 0.6593686354378818, "percentage": 65.94, "elapsed_time": "0:19:47", "remaining_time": "0:10:13", "throughput": 6281.15, "total_tokens": 7459264} +{"current_steps": 7775, "total_steps": 11784, "loss": 0.0311, "lr": 6.26400736404796e-07, "epoch": 0.6597929395790902, "percentage": 65.98, "elapsed_time": "0:19:47", "remaining_time": "0:10:12", "throughput": 6283.21, "total_tokens": 7463744} +{"current_steps": 7780, "total_steps": 11784, "loss": 0.0185, "lr": 6.250272143489236e-07, "epoch": 0.6602172437202987, "percentage": 66.02, "elapsed_time": "0:19:48", "remaining_time": "0:10:11", "throughput": 6285.68, "total_tokens": 7468800} +{"current_steps": 7785, "total_steps": 11784, "loss": 0.0711, "lr": 6.23654514948688e-07, "epoch": 0.6606415478615071, "percentage": 66.06, "elapsed_time": "0:19:48", "remaining_time": "0:10:10", "throughput": 6288.14, "total_tokens": 7473856} +{"current_steps": 7790, "total_steps": 11784, "loss": 0.0637, "lr": 6.222826412156659e-07, "epoch": 0.6610658520027155, "percentage": 66.11, "elapsed_time": "0:19:48", "remaining_time": "0:10:09", "throughput": 6290.04, "total_tokens": 7478144} +{"current_steps": 7795, "total_steps": 11784, "loss": 0.0699, "lr": 6.209115961596207e-07, "epoch": 0.6614901561439239, "percentage": 66.15, "elapsed_time": "0:19:49", "remaining_time": "0:10:08", "throughput": 6291.94, "total_tokens": 7482432} +{"current_steps": 7800, "total_steps": 11784, "loss": 0.0804, "lr": 6.195413827884986e-07, "epoch": 0.6619144602851323, "percentage": 66.19, "elapsed_time": "0:19:49", "remaining_time": "0:10:07", "throughput": 6294.39, "total_tokens": 7487488} +{"current_steps": 7805, "total_steps": 11784, "loss": 0.0639, "lr": 6.181720041084216e-07, "epoch": 0.6623387644263408, "percentage": 66.23, "elapsed_time": "0:19:49", "remaining_time": "0:10:06", "throughput": 6296.47, "total_tokens": 7492032} +{"current_steps": 7810, "total_steps": 11784, "loss": 0.0315, "lr": 6.168034631236794e-07, "epoch": 0.6627630685675492, "percentage": 66.28, "elapsed_time": "0:19:50", "remaining_time": "0:10:05", "throughput": 6298.54, "total_tokens": 7496576} +{"current_steps": 7815, "total_steps": 11784, "loss": 0.0285, "lr": 6.154357628367251e-07, "epoch": 0.6631873727087576, "percentage": 66.32, "elapsed_time": "0:19:50", "remaining_time": "0:10:04", "throughput": 6300.58, "total_tokens": 7501056} +{"current_steps": 7820, "total_steps": 11784, "loss": 0.0801, "lr": 6.140689062481657e-07, "epoch": 0.663611676849966, "percentage": 66.36, "elapsed_time": "0:19:50", "remaining_time": "0:10:03", "throughput": 6302.8, "total_tokens": 7505792} +{"current_steps": 7825, "total_steps": 11784, "loss": 0.0378, "lr": 6.127028963567593e-07, "epoch": 0.6640359809911744, "percentage": 66.4, "elapsed_time": "0:19:51", "remaining_time": "0:10:02", "throughput": 6305.05, "total_tokens": 7510528} +{"current_steps": 7830, "total_steps": 11784, "loss": 0.0707, "lr": 6.113377361594048e-07, "epoch": 0.6644602851323829, "percentage": 66.45, "elapsed_time": "0:19:51", "remaining_time": "0:10:01", "throughput": 6307.21, "total_tokens": 7515200} +{"current_steps": 7835, "total_steps": 11784, "loss": 0.0271, "lr": 6.099734286511378e-07, "epoch": 0.6648845892735913, "percentage": 66.49, "elapsed_time": "0:19:51", "remaining_time": "0:10:00", "throughput": 6309.59, "total_tokens": 7520128} +{"current_steps": 7840, "total_steps": 11784, "loss": 0.0496, "lr": 6.086099768251222e-07, "epoch": 0.6653088934147997, "percentage": 66.53, "elapsed_time": "0:19:52", "remaining_time": "0:09:59", "throughput": 6311.56, "total_tokens": 7524544} +{"current_steps": 7845, "total_steps": 11784, "loss": 0.123, "lr": 6.072473836726461e-07, "epoch": 0.6657331975560081, "percentage": 66.57, "elapsed_time": "0:19:52", "remaining_time": "0:09:58", "throughput": 6314.06, "total_tokens": 7529664} +{"current_steps": 7850, "total_steps": 11784, "loss": 0.0509, "lr": 6.058856521831126e-07, "epoch": 0.6661575016972165, "percentage": 66.62, "elapsed_time": "0:19:52", "remaining_time": "0:09:57", "throughput": 6315.8, "total_tokens": 7533760} +{"current_steps": 7855, "total_steps": 11784, "loss": 0.0302, "lr": 6.045247853440349e-07, "epoch": 0.666581805838425, "percentage": 66.66, "elapsed_time": "0:19:53", "remaining_time": "0:09:56", "throughput": 6317.98, "total_tokens": 7538432} +{"current_steps": 7860, "total_steps": 11784, "loss": 0.0664, "lr": 6.031647861410287e-07, "epoch": 0.6670061099796334, "percentage": 66.7, "elapsed_time": "0:19:53", "remaining_time": "0:09:55", "throughput": 6320.19, "total_tokens": 7543168} +{"current_steps": 7865, "total_steps": 11784, "loss": 0.0448, "lr": 6.018056575578074e-07, "epoch": 0.6674304141208418, "percentage": 66.74, "elapsed_time": "0:19:53", "remaining_time": "0:09:54", "throughput": 6322.38, "total_tokens": 7547840} +{"current_steps": 7870, "total_steps": 11784, "loss": 0.0146, "lr": 6.004474025761723e-07, "epoch": 0.6678547182620502, "percentage": 66.79, "elapsed_time": "0:19:54", "remaining_time": "0:09:53", "throughput": 6324.71, "total_tokens": 7552768} +{"current_steps": 7875, "total_steps": 11784, "loss": 0.1316, "lr": 5.990900241760102e-07, "epoch": 0.6682790224032586, "percentage": 66.83, "elapsed_time": "0:19:54", "remaining_time": "0:09:52", "throughput": 6326.8, "total_tokens": 7557312} +{"current_steps": 7880, "total_steps": 11784, "loss": 0.0859, "lr": 5.977335253352833e-07, "epoch": 0.6687033265444671, "percentage": 66.87, "elapsed_time": "0:19:54", "remaining_time": "0:09:51", "throughput": 6328.91, "total_tokens": 7561920} +{"current_steps": 7885, "total_steps": 11784, "loss": 0.0652, "lr": 5.963779090300254e-07, "epoch": 0.6691276306856755, "percentage": 66.91, "elapsed_time": "0:19:55", "remaining_time": "0:09:50", "throughput": 6331.44, "total_tokens": 7567104} +{"current_steps": 7890, "total_steps": 11784, "loss": 0.0364, "lr": 5.950231782343326e-07, "epoch": 0.6695519348268839, "percentage": 66.96, "elapsed_time": "0:19:55", "remaining_time": "0:09:50", "throughput": 6333.46, "total_tokens": 7571584} +{"current_steps": 7895, "total_steps": 11784, "loss": 0.0182, "lr": 5.936693359203597e-07, "epoch": 0.6699762389680923, "percentage": 67.0, "elapsed_time": "0:19:55", "remaining_time": "0:09:49", "throughput": 6335.63, "total_tokens": 7576256} +{"current_steps": 7900, "total_steps": 11784, "loss": 0.0678, "lr": 5.923163850583113e-07, "epoch": 0.6704005431093008, "percentage": 67.04, "elapsed_time": "0:19:56", "remaining_time": "0:09:48", "throughput": 6338.03, "total_tokens": 7581248} +{"current_steps": 7905, "total_steps": 11784, "loss": 0.0382, "lr": 5.909643286164367e-07, "epoch": 0.6708248472505092, "percentage": 67.08, "elapsed_time": "0:19:56", "remaining_time": "0:09:47", "throughput": 6340.23, "total_tokens": 7585984} +{"current_steps": 7910, "total_steps": 11784, "loss": 0.0831, "lr": 5.896131695610223e-07, "epoch": 0.6712491513917176, "percentage": 67.12, "elapsed_time": "0:19:56", "remaining_time": "0:09:46", "throughput": 6342.31, "total_tokens": 7590528} +{"current_steps": 7915, "total_steps": 11784, "loss": 0.0263, "lr": 5.88262910856387e-07, "epoch": 0.671673455532926, "percentage": 67.17, "elapsed_time": "0:19:57", "remaining_time": "0:09:45", "throughput": 6344.55, "total_tokens": 7595328} +{"current_steps": 7920, "total_steps": 11784, "loss": 0.0661, "lr": 5.869135554648728e-07, "epoch": 0.6720977596741344, "percentage": 67.21, "elapsed_time": "0:19:57", "remaining_time": "0:09:44", "throughput": 6346.79, "total_tokens": 7600128} +{"current_steps": 7925, "total_steps": 11784, "loss": 0.0523, "lr": 5.855651063468411e-07, "epoch": 0.6725220638153429, "percentage": 67.25, "elapsed_time": "0:19:57", "remaining_time": "0:09:43", "throughput": 6348.54, "total_tokens": 7604288} +{"current_steps": 7930, "total_steps": 11784, "loss": 0.0387, "lr": 5.84217566460665e-07, "epoch": 0.6729463679565513, "percentage": 67.29, "elapsed_time": "0:19:58", "remaining_time": "0:09:42", "throughput": 6350.93, "total_tokens": 7609280} +{"current_steps": 7935, "total_steps": 11784, "loss": 0.0498, "lr": 5.828709387627217e-07, "epoch": 0.6733706720977597, "percentage": 67.34, "elapsed_time": "0:19:58", "remaining_time": "0:09:41", "throughput": 6353.49, "total_tokens": 7614528} +{"current_steps": 7940, "total_steps": 11784, "loss": 0.112, "lr": 5.815252262073891e-07, "epoch": 0.6737949762389681, "percentage": 67.38, "elapsed_time": "0:19:58", "remaining_time": "0:09:40", "throughput": 6355.69, "total_tokens": 7619264} +{"current_steps": 7945, "total_steps": 11784, "loss": 0.0041, "lr": 5.801804317470349e-07, "epoch": 0.6742192803801765, "percentage": 67.42, "elapsed_time": "0:19:59", "remaining_time": "0:09:39", "throughput": 6358.02, "total_tokens": 7624192} +{"current_steps": 7950, "total_steps": 11784, "loss": 0.0446, "lr": 5.788365583320144e-07, "epoch": 0.674643584521385, "percentage": 67.46, "elapsed_time": "0:19:59", "remaining_time": "0:09:38", "throughput": 6360.33, "total_tokens": 7629120} +{"current_steps": 7955, "total_steps": 11784, "loss": 0.0664, "lr": 5.774936089106617e-07, "epoch": 0.6750678886625934, "percentage": 67.51, "elapsed_time": "0:19:59", "remaining_time": "0:09:37", "throughput": 6362.59, "total_tokens": 7633984} +{"current_steps": 7960, "total_steps": 11784, "loss": 0.0222, "lr": 5.761515864292835e-07, "epoch": 0.6754921928038018, "percentage": 67.55, "elapsed_time": "0:20:00", "remaining_time": "0:09:36", "throughput": 6364.95, "total_tokens": 7638976} +{"current_steps": 7965, "total_steps": 11784, "loss": 0.001, "lr": 5.748104938321534e-07, "epoch": 0.6759164969450102, "percentage": 67.59, "elapsed_time": "0:20:00", "remaining_time": "0:09:35", "throughput": 6367.0, "total_tokens": 7643520} +{"current_steps": 7970, "total_steps": 11784, "loss": 0.0733, "lr": 5.734703340615049e-07, "epoch": 0.6763408010862186, "percentage": 67.63, "elapsed_time": "0:20:00", "remaining_time": "0:09:34", "throughput": 6369.4, "total_tokens": 7648576} +{"current_steps": 7975, "total_steps": 11784, "loss": 0.0419, "lr": 5.721311100575235e-07, "epoch": 0.676765105227427, "percentage": 67.68, "elapsed_time": "0:20:01", "remaining_time": "0:09:33", "throughput": 6371.78, "total_tokens": 7653632} +{"current_steps": 7980, "total_steps": 11784, "loss": 0.0034, "lr": 5.707928247583444e-07, "epoch": 0.6771894093686355, "percentage": 67.72, "elapsed_time": "0:20:01", "remaining_time": "0:09:32", "throughput": 6373.82, "total_tokens": 7658176} +{"current_steps": 7985, "total_steps": 11784, "loss": 0.1114, "lr": 5.694554811000407e-07, "epoch": 0.6776137135098439, "percentage": 67.76, "elapsed_time": "0:20:01", "remaining_time": "0:09:31", "throughput": 6376.09, "total_tokens": 7663040} +{"current_steps": 7990, "total_steps": 11784, "loss": 0.0707, "lr": 5.681190820166213e-07, "epoch": 0.6780380176510523, "percentage": 67.8, "elapsed_time": "0:20:02", "remaining_time": "0:09:30", "throughput": 6378.53, "total_tokens": 7668160} +{"current_steps": 7995, "total_steps": 11784, "loss": 0.0181, "lr": 5.667836304400221e-07, "epoch": 0.6784623217922607, "percentage": 67.85, "elapsed_time": "0:20:02", "remaining_time": "0:09:29", "throughput": 6380.79, "total_tokens": 7673024} +{"current_steps": 8000, "total_steps": 11784, "loss": 0.0702, "lr": 5.654491293001005e-07, "epoch": 0.6788866259334692, "percentage": 67.89, "elapsed_time": "0:20:02", "remaining_time": "0:09:28", "throughput": 6382.91, "total_tokens": 7677696} +{"current_steps": 8005, "total_steps": 11784, "loss": 0.046, "lr": 5.641155815246289e-07, "epoch": 0.6793109300746776, "percentage": 67.93, "elapsed_time": "0:20:03", "remaining_time": "0:09:28", "throughput": 6385.33, "total_tokens": 7682752} +{"current_steps": 8010, "total_steps": 11784, "loss": 0.0835, "lr": 5.62782990039288e-07, "epoch": 0.679735234215886, "percentage": 67.97, "elapsed_time": "0:20:03", "remaining_time": "0:09:27", "throughput": 6387.76, "total_tokens": 7687872} +{"current_steps": 8015, "total_steps": 11784, "loss": 0.0774, "lr": 5.614513577676592e-07, "epoch": 0.6801595383570944, "percentage": 68.02, "elapsed_time": "0:20:03", "remaining_time": "0:09:26", "throughput": 6390.04, "total_tokens": 7692800} +{"current_steps": 8020, "total_steps": 11784, "loss": 0.0339, "lr": 5.601206876312223e-07, "epoch": 0.6805838424983028, "percentage": 68.06, "elapsed_time": "0:20:04", "remaining_time": "0:09:25", "throughput": 6392.13, "total_tokens": 7697472} +{"current_steps": 8025, "total_steps": 11784, "loss": 0.0687, "lr": 5.587909825493433e-07, "epoch": 0.6810081466395111, "percentage": 68.1, "elapsed_time": "0:20:04", "remaining_time": "0:09:24", "throughput": 6394.36, "total_tokens": 7702336} +{"current_steps": 8030, "total_steps": 11784, "loss": 0.0828, "lr": 5.57462245439273e-07, "epoch": 0.6814324507807196, "percentage": 68.14, "elapsed_time": "0:20:04", "remaining_time": "0:09:23", "throughput": 6396.56, "total_tokens": 7707136} +{"current_steps": 8035, "total_steps": 11784, "loss": 0.1116, "lr": 5.561344792161373e-07, "epoch": 0.681856754921928, "percentage": 68.19, "elapsed_time": "0:20:05", "remaining_time": "0:09:22", "throughput": 6398.66, "total_tokens": 7711808} +{"current_steps": 8040, "total_steps": 11784, "loss": 0.0646, "lr": 5.54807686792933e-07, "epoch": 0.6822810590631364, "percentage": 68.23, "elapsed_time": "0:20:05", "remaining_time": "0:09:21", "throughput": 6400.67, "total_tokens": 7716352} +{"current_steps": 8045, "total_steps": 11784, "loss": 0.1109, "lr": 5.534818710805198e-07, "epoch": 0.6827053632043448, "percentage": 68.27, "elapsed_time": "0:20:05", "remaining_time": "0:09:20", "throughput": 6402.77, "total_tokens": 7721024} +{"current_steps": 8050, "total_steps": 11784, "loss": 0.0793, "lr": 5.52157034987615e-07, "epoch": 0.6831296673455532, "percentage": 68.31, "elapsed_time": "0:20:06", "remaining_time": "0:09:19", "throughput": 6405.21, "total_tokens": 7726208} +{"current_steps": 8055, "total_steps": 11784, "loss": 0.0826, "lr": 5.508331814207864e-07, "epoch": 0.6835539714867617, "percentage": 68.36, "elapsed_time": "0:20:06", "remaining_time": "0:09:18", "throughput": 6407.48, "total_tokens": 7731136} +{"current_steps": 8060, "total_steps": 11784, "loss": 0.1033, "lr": 5.495103132844466e-07, "epoch": 0.6839782756279701, "percentage": 68.4, "elapsed_time": "0:20:06", "remaining_time": "0:09:17", "throughput": 6409.4, "total_tokens": 7735552} +{"current_steps": 8065, "total_steps": 11784, "loss": 0.0839, "lr": 5.481884334808463e-07, "epoch": 0.6844025797691785, "percentage": 68.44, "elapsed_time": "0:20:07", "remaining_time": "0:09:16", "throughput": 6411.33, "total_tokens": 7740032} +{"current_steps": 8070, "total_steps": 11784, "loss": 0.0441, "lr": 5.468675449100664e-07, "epoch": 0.6848268839103869, "percentage": 68.48, "elapsed_time": "0:20:07", "remaining_time": "0:09:15", "throughput": 6413.57, "total_tokens": 7744960} +{"current_steps": 8075, "total_steps": 11784, "loss": 0.1324, "lr": 5.455476504700161e-07, "epoch": 0.6852511880515954, "percentage": 68.53, "elapsed_time": "0:20:07", "remaining_time": "0:09:14", "throughput": 6415.64, "total_tokens": 7749632} +{"current_steps": 8080, "total_steps": 11784, "loss": 0.0765, "lr": 5.442287530564203e-07, "epoch": 0.6856754921928038, "percentage": 68.57, "elapsed_time": "0:20:08", "remaining_time": "0:09:13", "throughput": 6417.64, "total_tokens": 7754176} +{"current_steps": 8085, "total_steps": 11784, "loss": 0.1193, "lr": 5.429108555628186e-07, "epoch": 0.6860997963340122, "percentage": 68.61, "elapsed_time": "0:20:08", "remaining_time": "0:09:12", "throughput": 6420.09, "total_tokens": 7759360} +{"current_steps": 8090, "total_steps": 11784, "loss": 0.0654, "lr": 5.415939608805564e-07, "epoch": 0.6865241004752206, "percentage": 68.65, "elapsed_time": "0:20:08", "remaining_time": "0:09:12", "throughput": 6422.1, "total_tokens": 7763904} +{"current_steps": 8095, "total_steps": 11784, "loss": 0.0189, "lr": 5.402780718987789e-07, "epoch": 0.686948404616429, "percentage": 68.69, "elapsed_time": "0:20:09", "remaining_time": "0:09:11", "throughput": 6424.0, "total_tokens": 7768320} +{"current_steps": 8100, "total_steps": 11784, "loss": 0.0697, "lr": 5.389631915044249e-07, "epoch": 0.6873727087576375, "percentage": 68.74, "elapsed_time": "0:20:09", "remaining_time": "0:09:10", "throughput": 6426.07, "total_tokens": 7772928} +{"current_steps": 8105, "total_steps": 11784, "loss": 0.0421, "lr": 5.376493225822208e-07, "epoch": 0.6877970128988459, "percentage": 68.78, "elapsed_time": "0:20:09", "remaining_time": "0:09:09", "throughput": 6427.94, "total_tokens": 7777280} +{"current_steps": 8110, "total_steps": 11784, "loss": 0.0558, "lr": 5.363364680146725e-07, "epoch": 0.6882213170400543, "percentage": 68.82, "elapsed_time": "0:20:10", "remaining_time": "0:09:08", "throughput": 6430.13, "total_tokens": 7782080} +{"current_steps": 8115, "total_steps": 11784, "loss": 0.0597, "lr": 5.350246306820632e-07, "epoch": 0.6886456211812627, "percentage": 68.86, "elapsed_time": "0:20:10", "remaining_time": "0:09:07", "throughput": 6432.32, "total_tokens": 7786880} +{"current_steps": 8120, "total_steps": 11784, "loss": 0.0316, "lr": 5.337138134624412e-07, "epoch": 0.6890699253224711, "percentage": 68.91, "elapsed_time": "0:20:10", "remaining_time": "0:09:06", "throughput": 6434.18, "total_tokens": 7791232} +{"current_steps": 8125, "total_steps": 11784, "loss": 0.0082, "lr": 5.324040192316193e-07, "epoch": 0.6894942294636796, "percentage": 68.95, "elapsed_time": "0:20:11", "remaining_time": "0:09:05", "throughput": 6436.45, "total_tokens": 7796160} +{"current_steps": 8130, "total_steps": 11784, "loss": 0.0278, "lr": 5.310952508631645e-07, "epoch": 0.689918533604888, "percentage": 68.99, "elapsed_time": "0:20:11", "remaining_time": "0:09:04", "throughput": 6438.68, "total_tokens": 7801024} +{"current_steps": 8135, "total_steps": 11784, "loss": 0.0599, "lr": 5.297875112283941e-07, "epoch": 0.6903428377460964, "percentage": 69.03, "elapsed_time": "0:20:11", "remaining_time": "0:09:03", "throughput": 6440.95, "total_tokens": 7805952} +{"current_steps": 8140, "total_steps": 11784, "loss": 0.0434, "lr": 5.284808031963676e-07, "epoch": 0.6907671418873048, "percentage": 69.08, "elapsed_time": "0:20:12", "remaining_time": "0:09:02", "throughput": 6443.28, "total_tokens": 7811008} +{"current_steps": 8145, "total_steps": 11784, "loss": 0.0109, "lr": 5.271751296338822e-07, "epoch": 0.6911914460285132, "percentage": 69.12, "elapsed_time": "0:20:12", "remaining_time": "0:09:01", "throughput": 6445.08, "total_tokens": 7815296} +{"current_steps": 8150, "total_steps": 11784, "loss": 0.0378, "lr": 5.25870493405464e-07, "epoch": 0.6916157501697217, "percentage": 69.16, "elapsed_time": "0:20:12", "remaining_time": "0:09:00", "throughput": 6447.78, "total_tokens": 7820864} +{"current_steps": 8155, "total_steps": 11784, "loss": 0.0569, "lr": 5.245668973733657e-07, "epoch": 0.6920400543109301, "percentage": 69.2, "elapsed_time": "0:20:13", "remaining_time": "0:08:59", "throughput": 6450.07, "total_tokens": 7825856} +{"current_steps": 8160, "total_steps": 11784, "loss": 0.0184, "lr": 5.232643443975553e-07, "epoch": 0.6924643584521385, "percentage": 69.25, "elapsed_time": "0:20:13", "remaining_time": "0:08:58", "throughput": 6452.05, "total_tokens": 7830400} +{"current_steps": 8165, "total_steps": 11784, "loss": 0.0032, "lr": 5.219628373357142e-07, "epoch": 0.6928886625933469, "percentage": 69.29, "elapsed_time": "0:20:13", "remaining_time": "0:08:58", "throughput": 6454.6, "total_tokens": 7835776} +{"current_steps": 8170, "total_steps": 11784, "loss": 0.0689, "lr": 5.206623790432285e-07, "epoch": 0.6933129667345553, "percentage": 69.33, "elapsed_time": "0:20:14", "remaining_time": "0:08:57", "throughput": 6456.77, "total_tokens": 7840640} +{"current_steps": 8175, "total_steps": 11784, "loss": 0.0707, "lr": 5.193629723731837e-07, "epoch": 0.6937372708757638, "percentage": 69.37, "elapsed_time": "0:20:14", "remaining_time": "0:08:56", "throughput": 6458.71, "total_tokens": 7845120} +{"current_steps": 8180, "total_steps": 11784, "loss": 0.0528, "lr": 5.180646201763577e-07, "epoch": 0.6941615750169722, "percentage": 69.42, "elapsed_time": "0:20:14", "remaining_time": "0:08:55", "throughput": 6460.71, "total_tokens": 7849664} +{"current_steps": 8185, "total_steps": 11784, "loss": 0.008, "lr": 5.167673253012152e-07, "epoch": 0.6945858791581806, "percentage": 69.46, "elapsed_time": "0:20:15", "remaining_time": "0:08:54", "throughput": 6462.59, "total_tokens": 7854080} +{"current_steps": 8190, "total_steps": 11784, "loss": 0.0412, "lr": 5.154710905939015e-07, "epoch": 0.695010183299389, "percentage": 69.5, "elapsed_time": "0:20:15", "remaining_time": "0:08:53", "throughput": 6464.2, "total_tokens": 7858176} +{"current_steps": 8195, "total_steps": 11784, "loss": 0.0572, "lr": 5.141759188982356e-07, "epoch": 0.6954344874405974, "percentage": 69.54, "elapsed_time": "0:20:15", "remaining_time": "0:08:52", "throughput": 6466.32, "total_tokens": 7862912} +{"current_steps": 8200, "total_steps": 11784, "loss": 0.0708, "lr": 5.12881813055705e-07, "epoch": 0.6958587915818059, "percentage": 69.59, "elapsed_time": "0:20:16", "remaining_time": "0:08:51", "throughput": 6468.41, "total_tokens": 7867648} +{"current_steps": 8205, "total_steps": 11784, "loss": 0.1211, "lr": 5.115887759054571e-07, "epoch": 0.6962830957230143, "percentage": 69.63, "elapsed_time": "0:20:16", "remaining_time": "0:08:50", "throughput": 6471.04, "total_tokens": 7873152} +{"current_steps": 8210, "total_steps": 11784, "loss": 0.0675, "lr": 5.102968102842973e-07, "epoch": 0.6967073998642227, "percentage": 69.67, "elapsed_time": "0:20:17", "remaining_time": "0:08:49", "throughput": 6473.1, "total_tokens": 7877824} +{"current_steps": 8215, "total_steps": 11784, "loss": 0.0948, "lr": 5.090059190266779e-07, "epoch": 0.6971317040054311, "percentage": 69.71, "elapsed_time": "0:20:17", "remaining_time": "0:08:48", "throughput": 6475.3, "total_tokens": 7882688} +{"current_steps": 8220, "total_steps": 11784, "loss": 0.0272, "lr": 5.077161049646951e-07, "epoch": 0.6975560081466395, "percentage": 69.76, "elapsed_time": "0:20:17", "remaining_time": "0:08:47", "throughput": 6477.4, "total_tokens": 7887424} +{"current_steps": 8225, "total_steps": 11784, "loss": 0.0328, "lr": 5.06427370928082e-07, "epoch": 0.697980312287848, "percentage": 69.8, "elapsed_time": "0:20:18", "remaining_time": "0:08:47", "throughput": 6479.09, "total_tokens": 7891584} +{"current_steps": 8230, "total_steps": 11784, "loss": 0.1217, "lr": 5.05139719744202e-07, "epoch": 0.6984046164290564, "percentage": 69.84, "elapsed_time": "0:20:18", "remaining_time": "0:08:46", "throughput": 6481.37, "total_tokens": 7896576} +{"current_steps": 8235, "total_steps": 11784, "loss": 0.0685, "lr": 5.038531542380425e-07, "epoch": 0.6988289205702648, "percentage": 69.88, "elapsed_time": "0:20:18", "remaining_time": "0:08:45", "throughput": 6483.52, "total_tokens": 7901376} +{"current_steps": 8240, "total_steps": 11784, "loss": 0.0305, "lr": 5.025676772322099e-07, "epoch": 0.6992532247114732, "percentage": 69.93, "elapsed_time": "0:20:19", "remaining_time": "0:08:44", "throughput": 6485.79, "total_tokens": 7906368} +{"current_steps": 8245, "total_steps": 11784, "loss": 0.0503, "lr": 5.012832915469207e-07, "epoch": 0.6996775288526816, "percentage": 69.97, "elapsed_time": "0:20:19", "remaining_time": "0:08:43", "throughput": 6487.98, "total_tokens": 7911232} +{"current_steps": 8250, "total_steps": 11784, "loss": 0.0654, "lr": 5.000000000000002e-07, "epoch": 0.7001018329938901, "percentage": 70.01, "elapsed_time": "0:20:19", "remaining_time": "0:08:42", "throughput": 6489.93, "total_tokens": 7915776} +{"current_steps": 8255, "total_steps": 11784, "loss": 0.0226, "lr": 4.987178054068699e-07, "epoch": 0.7005261371350985, "percentage": 70.05, "elapsed_time": "0:20:20", "remaining_time": "0:08:41", "throughput": 6491.78, "total_tokens": 7920192} +{"current_steps": 8260, "total_steps": 11784, "loss": 0.0633, "lr": 4.97436710580547e-07, "epoch": 0.7009504412763069, "percentage": 70.1, "elapsed_time": "0:20:20", "remaining_time": "0:08:40", "throughput": 6493.77, "total_tokens": 7924800} +{"current_steps": 8260, "total_steps": 11784, "eval_loss": 0.0545908585190773, "epoch": 0.7009504412763069, "percentage": 70.1, "elapsed_time": "0:20:36", "remaining_time": "0:08:47", "throughput": 6410.12, "total_tokens": 7924800} +{"current_steps": 8265, "total_steps": 11784, "loss": 0.0334, "lr": 4.961567183316348e-07, "epoch": 0.7013747454175153, "percentage": 70.14, "elapsed_time": "0:21:08", "remaining_time": "0:09:00", "throughput": 6252.26, "total_tokens": 7929664} +{"current_steps": 8270, "total_steps": 11784, "loss": 0.1168, "lr": 4.948778314683183e-07, "epoch": 0.7017990495587237, "percentage": 70.18, "elapsed_time": "0:21:08", "remaining_time": "0:08:59", "throughput": 6254.41, "total_tokens": 7934464} +{"current_steps": 8275, "total_steps": 11784, "loss": 0.0608, "lr": 4.93600052796357e-07, "epoch": 0.7022233536999322, "percentage": 70.22, "elapsed_time": "0:21:08", "remaining_time": "0:08:58", "throughput": 6256.56, "total_tokens": 7939264} +{"current_steps": 8280, "total_steps": 11784, "loss": 0.0751, "lr": 4.923233851190794e-07, "epoch": 0.7026476578411406, "percentage": 70.26, "elapsed_time": "0:21:09", "remaining_time": "0:08:57", "throughput": 6258.33, "total_tokens": 7943552} +{"current_steps": 8285, "total_steps": 11784, "loss": 0.0151, "lr": 4.910478312373756e-07, "epoch": 0.703071961982349, "percentage": 70.31, "elapsed_time": "0:21:09", "remaining_time": "0:08:56", "throughput": 6260.25, "total_tokens": 7948032} +{"current_steps": 8290, "total_steps": 11784, "loss": 0.0642, "lr": 4.897733939496942e-07, "epoch": 0.7034962661235573, "percentage": 70.35, "elapsed_time": "0:21:09", "remaining_time": "0:08:55", "throughput": 6262.78, "total_tokens": 7953408} +{"current_steps": 8295, "total_steps": 11784, "loss": 0.0675, "lr": 4.885000760520317e-07, "epoch": 0.7039205702647657, "percentage": 70.39, "elapsed_time": "0:21:10", "remaining_time": "0:08:54", "throughput": 6264.6, "total_tokens": 7957760} +{"current_steps": 8300, "total_steps": 11784, "loss": 0.035, "lr": 4.872278803379299e-07, "epoch": 0.7043448744059742, "percentage": 70.43, "elapsed_time": "0:21:10", "remaining_time": "0:08:53", "throughput": 6267.5, "total_tokens": 7963712} +{"current_steps": 8305, "total_steps": 11784, "loss": 0.0596, "lr": 4.8595680959847e-07, "epoch": 0.7047691785471826, "percentage": 70.48, "elapsed_time": "0:21:10", "remaining_time": "0:08:52", "throughput": 6269.69, "total_tokens": 7968576} +{"current_steps": 8310, "total_steps": 11784, "loss": 0.0847, "lr": 4.846868666222622e-07, "epoch": 0.705193482688391, "percentage": 70.52, "elapsed_time": "0:21:11", "remaining_time": "0:08:51", "throughput": 6271.69, "total_tokens": 7973184} +{"current_steps": 8315, "total_steps": 11784, "loss": 0.0375, "lr": 4.834180541954447e-07, "epoch": 0.7056177868295994, "percentage": 70.56, "elapsed_time": "0:21:11", "remaining_time": "0:08:50", "throughput": 6273.68, "total_tokens": 7977792} +{"current_steps": 8320, "total_steps": 11784, "loss": 0.0239, "lr": 4.821503751016745e-07, "epoch": 0.7060420909708078, "percentage": 70.6, "elapsed_time": "0:21:11", "remaining_time": "0:08:49", "throughput": 6275.26, "total_tokens": 7981824} +{"current_steps": 8325, "total_steps": 11784, "loss": 0.0527, "lr": 4.808838321221226e-07, "epoch": 0.7064663951120163, "percentage": 70.65, "elapsed_time": "0:21:12", "remaining_time": "0:08:48", "throughput": 6277.03, "total_tokens": 7986112} +{"current_steps": 8330, "total_steps": 11784, "loss": 0.1099, "lr": 4.79618428035467e-07, "epoch": 0.7068906992532247, "percentage": 70.69, "elapsed_time": "0:21:12", "remaining_time": "0:08:47", "throughput": 6278.95, "total_tokens": 7990592} +{"current_steps": 8335, "total_steps": 11784, "loss": 0.1138, "lr": 4.78354165617888e-07, "epoch": 0.7073150033944331, "percentage": 70.73, "elapsed_time": "0:21:12", "remaining_time": "0:08:46", "throughput": 6280.9, "total_tokens": 7995136} +{"current_steps": 8340, "total_steps": 11784, "loss": 0.0542, "lr": 4.77091047643059e-07, "epoch": 0.7077393075356415, "percentage": 70.77, "elapsed_time": "0:21:13", "remaining_time": "0:08:45", "throughput": 6283.01, "total_tokens": 7999872} +{"current_steps": 8345, "total_steps": 11784, "loss": 0.0385, "lr": 4.7582907688214593e-07, "epoch": 0.7081636116768499, "percentage": 70.82, "elapsed_time": "0:21:13", "remaining_time": "0:08:44", "throughput": 6285.09, "total_tokens": 8004608} +{"current_steps": 8350, "total_steps": 11784, "loss": 0.0206, "lr": 4.745682561037947e-07, "epoch": 0.7085879158180584, "percentage": 70.86, "elapsed_time": "0:21:13", "remaining_time": "0:08:43", "throughput": 6286.96, "total_tokens": 8009024} +{"current_steps": 8355, "total_steps": 11784, "loss": 0.0727, "lr": 4.733085880741301e-07, "epoch": 0.7090122199592668, "percentage": 70.9, "elapsed_time": "0:21:14", "remaining_time": "0:08:42", "throughput": 6288.87, "total_tokens": 8013504} +{"current_steps": 8360, "total_steps": 11784, "loss": 0.0952, "lr": 4.7205007555674714e-07, "epoch": 0.7094365241004752, "percentage": 70.94, "elapsed_time": "0:21:14", "remaining_time": "0:08:42", "throughput": 6291.09, "total_tokens": 8018432} +{"current_steps": 8365, "total_steps": 11784, "loss": 0.0025, "lr": 4.707927213127062e-07, "epoch": 0.7098608282416836, "percentage": 70.99, "elapsed_time": "0:21:14", "remaining_time": "0:08:41", "throughput": 6292.95, "total_tokens": 8022848} +{"current_steps": 8370, "total_steps": 11784, "loss": 0.0645, "lr": 4.6953652810052615e-07, "epoch": 0.710285132382892, "percentage": 71.03, "elapsed_time": "0:21:15", "remaining_time": "0:08:40", "throughput": 6294.88, "total_tokens": 8027392} +{"current_steps": 8375, "total_steps": 11784, "loss": 0.154, "lr": 4.682814986761792e-07, "epoch": 0.7107094365241005, "percentage": 71.07, "elapsed_time": "0:21:15", "remaining_time": "0:08:39", "throughput": 6297.29, "total_tokens": 8032640} +{"current_steps": 8380, "total_steps": 11784, "loss": 0.0511, "lr": 4.670276357930828e-07, "epoch": 0.7111337406653089, "percentage": 71.11, "elapsed_time": "0:21:15", "remaining_time": "0:08:38", "throughput": 6299.33, "total_tokens": 8037312} +{"current_steps": 8385, "total_steps": 11784, "loss": 0.1108, "lr": 4.657749422020979e-07, "epoch": 0.7115580448065173, "percentage": 71.16, "elapsed_time": "0:21:16", "remaining_time": "0:08:37", "throughput": 6301.54, "total_tokens": 8042240} +{"current_steps": 8390, "total_steps": 11784, "loss": 0.0449, "lr": 4.6452342065151704e-07, "epoch": 0.7119823489477257, "percentage": 71.2, "elapsed_time": "0:21:16", "remaining_time": "0:08:36", "throughput": 6303.87, "total_tokens": 8047360} +{"current_steps": 8395, "total_steps": 11784, "loss": 0.0763, "lr": 4.632730738870634e-07, "epoch": 0.7124066530889341, "percentage": 71.24, "elapsed_time": "0:21:16", "remaining_time": "0:08:35", "throughput": 6305.94, "total_tokens": 8052096} +{"current_steps": 8400, "total_steps": 11784, "loss": 0.0305, "lr": 4.6202390465188225e-07, "epoch": 0.7128309572301426, "percentage": 71.28, "elapsed_time": "0:21:17", "remaining_time": "0:08:34", "throughput": 6308.1, "total_tokens": 8056960} +{"current_steps": 8405, "total_steps": 11784, "loss": 0.0582, "lr": 4.6077591568653575e-07, "epoch": 0.713255261371351, "percentage": 71.33, "elapsed_time": "0:21:17", "remaining_time": "0:08:33", "throughput": 6310.08, "total_tokens": 8061568} +{"current_steps": 8410, "total_steps": 11784, "loss": 0.0432, "lr": 4.595291097289952e-07, "epoch": 0.7136795655125594, "percentage": 71.37, "elapsed_time": "0:21:17", "remaining_time": "0:08:32", "throughput": 6312.36, "total_tokens": 8066624} +{"current_steps": 8415, "total_steps": 11784, "loss": 0.0634, "lr": 4.582834895146391e-07, "epoch": 0.7141038696537678, "percentage": 71.41, "elapsed_time": "0:21:18", "remaining_time": "0:08:31", "throughput": 6314.64, "total_tokens": 8071680} +{"current_steps": 8420, "total_steps": 11784, "loss": 0.061, "lr": 4.5703905777624184e-07, "epoch": 0.7145281737949762, "percentage": 71.45, "elapsed_time": "0:21:18", "remaining_time": "0:08:30", "throughput": 6316.72, "total_tokens": 8076480} +{"current_steps": 8425, "total_steps": 11784, "loss": 0.0268, "lr": 4.5579581724397255e-07, "epoch": 0.7149524779361847, "percentage": 71.5, "elapsed_time": "0:21:18", "remaining_time": "0:08:29", "throughput": 6318.48, "total_tokens": 8080768} +{"current_steps": 8430, "total_steps": 11784, "loss": 0.0509, "lr": 4.5455377064538603e-07, "epoch": 0.7153767820773931, "percentage": 71.54, "elapsed_time": "0:21:19", "remaining_time": "0:08:28", "throughput": 6320.39, "total_tokens": 8085248} +{"current_steps": 8435, "total_steps": 11784, "loss": 0.0485, "lr": 4.533129207054167e-07, "epoch": 0.7158010862186015, "percentage": 71.58, "elapsed_time": "0:21:19", "remaining_time": "0:08:28", "throughput": 6322.62, "total_tokens": 8090176} +{"current_steps": 8440, "total_steps": 11784, "loss": 0.0764, "lr": 4.520732701463762e-07, "epoch": 0.7162253903598099, "percentage": 71.62, "elapsed_time": "0:21:19", "remaining_time": "0:08:27", "throughput": 6324.38, "total_tokens": 8094464} +{"current_steps": 8445, "total_steps": 11784, "loss": 0.1021, "lr": 4.508348216879421e-07, "epoch": 0.7166496945010183, "percentage": 71.66, "elapsed_time": "0:21:20", "remaining_time": "0:08:26", "throughput": 6326.32, "total_tokens": 8099008} +{"current_steps": 8450, "total_steps": 11784, "loss": 0.0745, "lr": 4.4959757804715613e-07, "epoch": 0.7170739986422268, "percentage": 71.71, "elapsed_time": "0:21:20", "remaining_time": "0:08:25", "throughput": 6328.73, "total_tokens": 8104256} +{"current_steps": 8455, "total_steps": 11784, "loss": 0.0703, "lr": 4.483615419384167e-07, "epoch": 0.7174983027834352, "percentage": 71.75, "elapsed_time": "0:21:20", "remaining_time": "0:08:24", "throughput": 6330.82, "total_tokens": 8109056} +{"current_steps": 8460, "total_steps": 11784, "loss": 0.1058, "lr": 4.4712671607347307e-07, "epoch": 0.7179226069246436, "percentage": 71.79, "elapsed_time": "0:21:21", "remaining_time": "0:08:23", "throughput": 6333.14, "total_tokens": 8114176} +{"current_steps": 8465, "total_steps": 11784, "loss": 0.0812, "lr": 4.458931031614179e-07, "epoch": 0.718346911065852, "percentage": 71.83, "elapsed_time": "0:21:21", "remaining_time": "0:08:22", "throughput": 6335.42, "total_tokens": 8119232} +{"current_steps": 8470, "total_steps": 11784, "loss": 0.0612, "lr": 4.4466070590868543e-07, "epoch": 0.7187712152070604, "percentage": 71.88, "elapsed_time": "0:21:21", "remaining_time": "0:08:21", "throughput": 6337.39, "total_tokens": 8123840} +{"current_steps": 8475, "total_steps": 11784, "loss": 0.1036, "lr": 4.434295270190402e-07, "epoch": 0.7191955193482689, "percentage": 71.92, "elapsed_time": "0:21:22", "remaining_time": "0:08:20", "throughput": 6339.24, "total_tokens": 8128256} +{"current_steps": 8480, "total_steps": 11784, "loss": 0.0321, "lr": 4.4219956919357546e-07, "epoch": 0.7196198234894773, "percentage": 71.96, "elapsed_time": "0:21:22", "remaining_time": "0:08:19", "throughput": 6341.63, "total_tokens": 8133504} +{"current_steps": 8485, "total_steps": 11784, "loss": 0.0123, "lr": 4.409708351307049e-07, "epoch": 0.7200441276306857, "percentage": 72.0, "elapsed_time": "0:21:22", "remaining_time": "0:08:18", "throughput": 6343.64, "total_tokens": 8138176} +{"current_steps": 8490, "total_steps": 11784, "loss": 0.0502, "lr": 4.3974332752615727e-07, "epoch": 0.7204684317718941, "percentage": 72.05, "elapsed_time": "0:21:23", "remaining_time": "0:08:17", "throughput": 6345.83, "total_tokens": 8143104} +{"current_steps": 8495, "total_steps": 11784, "loss": 0.028, "lr": 4.385170490729712e-07, "epoch": 0.7208927359131025, "percentage": 72.09, "elapsed_time": "0:21:23", "remaining_time": "0:08:16", "throughput": 6347.7, "total_tokens": 8147584} +{"current_steps": 8500, "total_steps": 11784, "loss": 0.1012, "lr": 4.3729200246148835e-07, "epoch": 0.721317040054311, "percentage": 72.13, "elapsed_time": "0:21:23", "remaining_time": "0:08:16", "throughput": 6350.13, "total_tokens": 8152896} +{"current_steps": 8505, "total_steps": 11784, "loss": 0.0263, "lr": 4.3606819037934673e-07, "epoch": 0.7217413441955194, "percentage": 72.17, "elapsed_time": "0:21:24", "remaining_time": "0:08:15", "throughput": 6352.69, "total_tokens": 8158400} +{"current_steps": 8510, "total_steps": 11784, "loss": 0.0805, "lr": 4.348456155114786e-07, "epoch": 0.7221656483367278, "percentage": 72.22, "elapsed_time": "0:21:24", "remaining_time": "0:08:14", "throughput": 6355.01, "total_tokens": 8163520} +{"current_steps": 8515, "total_steps": 11784, "loss": 0.0636, "lr": 4.336242805400989e-07, "epoch": 0.7225899524779362, "percentage": 72.26, "elapsed_time": "0:21:24", "remaining_time": "0:08:13", "throughput": 6357.05, "total_tokens": 8168256} +{"current_steps": 8520, "total_steps": 11784, "loss": 0.0225, "lr": 4.324041881447041e-07, "epoch": 0.7230142566191446, "percentage": 72.3, "elapsed_time": "0:21:25", "remaining_time": "0:08:12", "throughput": 6359.07, "total_tokens": 8172928} +{"current_steps": 8525, "total_steps": 11784, "loss": 0.033, "lr": 4.311853410020643e-07, "epoch": 0.7234385607603531, "percentage": 72.34, "elapsed_time": "0:21:25", "remaining_time": "0:08:11", "throughput": 6361.15, "total_tokens": 8177728} +{"current_steps": 8530, "total_steps": 11784, "loss": 0.0376, "lr": 4.299677417862173e-07, "epoch": 0.7238628649015615, "percentage": 72.39, "elapsed_time": "0:21:25", "remaining_time": "0:08:10", "throughput": 6363.08, "total_tokens": 8182272} +{"current_steps": 8535, "total_steps": 11784, "loss": 0.142, "lr": 4.287513931684634e-07, "epoch": 0.7242871690427699, "percentage": 72.43, "elapsed_time": "0:21:26", "remaining_time": "0:08:09", "throughput": 6365.64, "total_tokens": 8187776} +{"current_steps": 8540, "total_steps": 11784, "loss": 0.0742, "lr": 4.2753629781735936e-07, "epoch": 0.7247114731839783, "percentage": 72.47, "elapsed_time": "0:21:26", "remaining_time": "0:08:08", "throughput": 6368.0, "total_tokens": 8192960} +{"current_steps": 8545, "total_steps": 11784, "loss": 0.1659, "lr": 4.2632245839871095e-07, "epoch": 0.7251357773251867, "percentage": 72.51, "elapsed_time": "0:21:26", "remaining_time": "0:08:07", "throughput": 6370.13, "total_tokens": 8197824} +{"current_steps": 8550, "total_steps": 11784, "loss": 0.0614, "lr": 4.251098775755708e-07, "epoch": 0.7255600814663951, "percentage": 72.56, "elapsed_time": "0:21:27", "remaining_time": "0:08:06", "throughput": 6372.39, "total_tokens": 8202880} +{"current_steps": 8555, "total_steps": 11784, "loss": 0.1057, "lr": 4.238985580082293e-07, "epoch": 0.7259843856076035, "percentage": 72.6, "elapsed_time": "0:21:27", "remaining_time": "0:08:05", "throughput": 6374.38, "total_tokens": 8207552} +{"current_steps": 8560, "total_steps": 11784, "loss": 0.0801, "lr": 4.2268850235420827e-07, "epoch": 0.7264086897488119, "percentage": 72.64, "elapsed_time": "0:21:27", "remaining_time": "0:08:05", "throughput": 6376.67, "total_tokens": 8212672} +{"current_steps": 8565, "total_steps": 11784, "loss": 0.0461, "lr": 4.214797132682596e-07, "epoch": 0.7268329938900203, "percentage": 72.68, "elapsed_time": "0:21:28", "remaining_time": "0:08:04", "throughput": 6379.99, "total_tokens": 8219392} +{"current_steps": 8570, "total_steps": 11784, "loss": 0.0472, "lr": 4.202721934023536e-07, "epoch": 0.7272572980312287, "percentage": 72.73, "elapsed_time": "0:21:28", "remaining_time": "0:08:03", "throughput": 6381.96, "total_tokens": 8224064} +{"current_steps": 8575, "total_steps": 11784, "loss": 0.0636, "lr": 4.19065945405678e-07, "epoch": 0.7276816021724372, "percentage": 72.77, "elapsed_time": "0:21:28", "remaining_time": "0:08:02", "throughput": 6384.36, "total_tokens": 8229376} +{"current_steps": 8580, "total_steps": 11784, "loss": 0.0224, "lr": 4.17860971924629e-07, "epoch": 0.7281059063136456, "percentage": 72.81, "elapsed_time": "0:21:29", "remaining_time": "0:08:01", "throughput": 6386.84, "total_tokens": 8234816} +{"current_steps": 8585, "total_steps": 11784, "loss": 0.0674, "lr": 4.166572756028073e-07, "epoch": 0.728530210454854, "percentage": 72.85, "elapsed_time": "0:21:29", "remaining_time": "0:08:00", "throughput": 6388.88, "total_tokens": 8239552} +{"current_steps": 8590, "total_steps": 11784, "loss": 0.0569, "lr": 4.154548590810113e-07, "epoch": 0.7289545145960624, "percentage": 72.9, "elapsed_time": "0:21:29", "remaining_time": "0:07:59", "throughput": 6390.6, "total_tokens": 8243840} +{"current_steps": 8595, "total_steps": 11784, "loss": 0.1121, "lr": 4.14253724997232e-07, "epoch": 0.7293788187372708, "percentage": 72.94, "elapsed_time": "0:21:30", "remaining_time": "0:07:58", "throughput": 6392.85, "total_tokens": 8248960} +{"current_steps": 8600, "total_steps": 11784, "loss": 0.0483, "lr": 4.1305387598664567e-07, "epoch": 0.7298031228784793, "percentage": 72.98, "elapsed_time": "0:21:30", "remaining_time": "0:07:57", "throughput": 6395.0, "total_tokens": 8253888} +{"current_steps": 8605, "total_steps": 11784, "loss": 0.0628, "lr": 4.118553146816115e-07, "epoch": 0.7302274270196877, "percentage": 73.02, "elapsed_time": "0:21:31", "remaining_time": "0:07:56", "throughput": 6397.15, "total_tokens": 8258816} +{"current_steps": 8610, "total_steps": 11784, "loss": 0.0508, "lr": 4.1065804371166114e-07, "epoch": 0.7306517311608961, "percentage": 73.07, "elapsed_time": "0:21:31", "remaining_time": "0:07:56", "throughput": 6399.08, "total_tokens": 8263424} +{"current_steps": 8615, "total_steps": 11784, "loss": 0.0554, "lr": 4.0946206570349685e-07, "epoch": 0.7310760353021045, "percentage": 73.11, "elapsed_time": "0:21:31", "remaining_time": "0:07:55", "throughput": 6401.17, "total_tokens": 8268288} +{"current_steps": 8620, "total_steps": 11784, "loss": 0.0945, "lr": 4.082673832809838e-07, "epoch": 0.7315003394433129, "percentage": 73.15, "elapsed_time": "0:21:32", "remaining_time": "0:07:54", "throughput": 6403.3, "total_tokens": 8273152} +{"current_steps": 8625, "total_steps": 11784, "loss": 0.0166, "lr": 4.0707399906514483e-07, "epoch": 0.7319246435845214, "percentage": 73.19, "elapsed_time": "0:21:32", "remaining_time": "0:07:53", "throughput": 6405.41, "total_tokens": 8278016} +{"current_steps": 8630, "total_steps": 11784, "loss": 0.0345, "lr": 4.058819156741545e-07, "epoch": 0.7323489477257298, "percentage": 73.23, "elapsed_time": "0:21:32", "remaining_time": "0:07:52", "throughput": 6407.85, "total_tokens": 8283392} +{"current_steps": 8635, "total_steps": 11784, "loss": 0.0519, "lr": 4.0469113572333426e-07, "epoch": 0.7327732518669382, "percentage": 73.28, "elapsed_time": "0:21:33", "remaining_time": "0:07:51", "throughput": 6409.82, "total_tokens": 8288064} +{"current_steps": 8640, "total_steps": 11784, "loss": 0.0699, "lr": 4.03501661825144e-07, "epoch": 0.7331975560081466, "percentage": 73.32, "elapsed_time": "0:21:33", "remaining_time": "0:07:50", "throughput": 6411.75, "total_tokens": 8292672} +{"current_steps": 8645, "total_steps": 11784, "loss": 0.09, "lr": 4.023134965891809e-07, "epoch": 0.733621860149355, "percentage": 73.36, "elapsed_time": "0:21:33", "remaining_time": "0:07:49", "throughput": 6413.7, "total_tokens": 8297344} +{"current_steps": 8650, "total_steps": 11784, "loss": 0.0495, "lr": 4.0112664262216866e-07, "epoch": 0.7340461642905635, "percentage": 73.4, "elapsed_time": "0:21:34", "remaining_time": "0:07:48", "throughput": 6415.7, "total_tokens": 8302080} +{"current_steps": 8655, "total_steps": 11784, "loss": 0.1092, "lr": 3.9994110252795563e-07, "epoch": 0.7344704684317719, "percentage": 73.45, "elapsed_time": "0:21:34", "remaining_time": "0:07:47", "throughput": 6417.82, "total_tokens": 8307008} +{"current_steps": 8660, "total_steps": 11784, "loss": 0.0748, "lr": 3.987568789075072e-07, "epoch": 0.7348947725729803, "percentage": 73.49, "elapsed_time": "0:21:34", "remaining_time": "0:07:47", "throughput": 6420.03, "total_tokens": 8312064} +{"current_steps": 8665, "total_steps": 11784, "loss": 0.0264, "lr": 3.975739743589004e-07, "epoch": 0.7353190767141887, "percentage": 73.53, "elapsed_time": "0:21:35", "remaining_time": "0:07:46", "throughput": 6421.82, "total_tokens": 8316480} +{"current_steps": 8670, "total_steps": 11784, "loss": 0.024, "lr": 3.9639239147731864e-07, "epoch": 0.7357433808553971, "percentage": 73.57, "elapsed_time": "0:21:35", "remaining_time": "0:07:45", "throughput": 6424.13, "total_tokens": 8321664} +{"current_steps": 8675, "total_steps": 11784, "loss": 0.0706, "lr": 3.952121328550455e-07, "epoch": 0.7361676849966056, "percentage": 73.62, "elapsed_time": "0:21:35", "remaining_time": "0:07:44", "throughput": 6425.87, "total_tokens": 8326016} +{"current_steps": 8680, "total_steps": 11784, "loss": 0.01, "lr": 3.9403320108145943e-07, "epoch": 0.736591989137814, "percentage": 73.66, "elapsed_time": "0:21:36", "remaining_time": "0:07:43", "throughput": 6427.82, "total_tokens": 8330688} +{"current_steps": 8685, "total_steps": 11784, "loss": 0.0251, "lr": 3.928555987430275e-07, "epoch": 0.7370162932790224, "percentage": 73.7, "elapsed_time": "0:21:36", "remaining_time": "0:07:42", "throughput": 6429.93, "total_tokens": 8335552} +{"current_steps": 8690, "total_steps": 11784, "loss": 0.0294, "lr": 3.916793284233011e-07, "epoch": 0.7374405974202308, "percentage": 73.74, "elapsed_time": "0:21:36", "remaining_time": "0:07:41", "throughput": 6431.86, "total_tokens": 8340224} +{"current_steps": 8695, "total_steps": 11784, "loss": 0.0556, "lr": 3.9050439270290733e-07, "epoch": 0.7378649015614392, "percentage": 73.79, "elapsed_time": "0:21:37", "remaining_time": "0:07:40", "throughput": 6433.89, "total_tokens": 8345024} +{"current_steps": 8700, "total_steps": 11784, "loss": 0.0438, "lr": 3.8933079415954805e-07, "epoch": 0.7382892057026477, "percentage": 73.83, "elapsed_time": "0:21:37", "remaining_time": "0:07:39", "throughput": 6435.79, "total_tokens": 8349632} +{"current_steps": 8705, "total_steps": 11784, "loss": 0.0502, "lr": 3.8815853536798905e-07, "epoch": 0.7387135098438561, "percentage": 73.87, "elapsed_time": "0:21:37", "remaining_time": "0:07:39", "throughput": 6437.94, "total_tokens": 8354624} +{"current_steps": 8710, "total_steps": 11784, "loss": 0.0513, "lr": 3.8698761890005794e-07, "epoch": 0.7391378139850645, "percentage": 73.91, "elapsed_time": "0:21:38", "remaining_time": "0:07:38", "throughput": 6439.81, "total_tokens": 8359168} +{"current_steps": 8715, "total_steps": 11784, "loss": 0.0154, "lr": 3.858180473246373e-07, "epoch": 0.7395621181262729, "percentage": 73.96, "elapsed_time": "0:21:38", "remaining_time": "0:07:37", "throughput": 6441.82, "total_tokens": 8363968} +{"current_steps": 8720, "total_steps": 11784, "loss": 0.0438, "lr": 3.8464982320765906e-07, "epoch": 0.7399864222674813, "percentage": 74.0, "elapsed_time": "0:21:38", "remaining_time": "0:07:36", "throughput": 6444.09, "total_tokens": 8369152} +{"current_steps": 8725, "total_steps": 11784, "loss": 0.0391, "lr": 3.834829491120991e-07, "epoch": 0.7404107264086898, "percentage": 74.04, "elapsed_time": "0:21:39", "remaining_time": "0:07:35", "throughput": 6446.1, "total_tokens": 8373952} +{"current_steps": 8730, "total_steps": 11784, "loss": 0.0883, "lr": 3.8231742759797157e-07, "epoch": 0.7408350305498982, "percentage": 74.08, "elapsed_time": "0:21:39", "remaining_time": "0:07:34", "throughput": 6448.59, "total_tokens": 8379456} +{"current_steps": 8735, "total_steps": 11784, "loss": 0.0386, "lr": 3.811532612223219e-07, "epoch": 0.7412593346911066, "percentage": 74.13, "elapsed_time": "0:21:39", "remaining_time": "0:07:33", "throughput": 6450.41, "total_tokens": 8383936} +{"current_steps": 8740, "total_steps": 11784, "loss": 0.0699, "lr": 3.7999045253922504e-07, "epoch": 0.741683638832315, "percentage": 74.17, "elapsed_time": "0:21:40", "remaining_time": "0:07:32", "throughput": 6452.52, "total_tokens": 8388864} +{"current_steps": 8745, "total_steps": 11784, "loss": 0.0511, "lr": 3.788290040997746e-07, "epoch": 0.7421079429735234, "percentage": 74.21, "elapsed_time": "0:21:40", "remaining_time": "0:07:31", "throughput": 6454.7, "total_tokens": 8393920} +{"current_steps": 8750, "total_steps": 11784, "loss": 0.0927, "lr": 3.776689184520815e-07, "epoch": 0.7425322471147319, "percentage": 74.25, "elapsed_time": "0:21:40", "remaining_time": "0:07:31", "throughput": 6456.8, "total_tokens": 8398848} +{"current_steps": 8755, "total_steps": 11784, "loss": 0.1256, "lr": 3.765101981412665e-07, "epoch": 0.7429565512559403, "percentage": 74.3, "elapsed_time": "0:21:41", "remaining_time": "0:07:30", "throughput": 6459.16, "total_tokens": 8404160} +{"current_steps": 8760, "total_steps": 11784, "loss": 0.0433, "lr": 3.753528457094548e-07, "epoch": 0.7433808553971487, "percentage": 74.34, "elapsed_time": "0:21:41", "remaining_time": "0:07:29", "throughput": 6461.12, "total_tokens": 8408896} +{"current_steps": 8765, "total_steps": 11784, "loss": 0.1051, "lr": 3.7419686369577076e-07, "epoch": 0.7438051595383571, "percentage": 74.38, "elapsed_time": "0:21:41", "remaining_time": "0:07:28", "throughput": 6462.87, "total_tokens": 8413312} +{"current_steps": 8770, "total_steps": 11784, "loss": 0.0251, "lr": 3.730422546363323e-07, "epoch": 0.7442294636795656, "percentage": 74.42, "elapsed_time": "0:21:42", "remaining_time": "0:07:27", "throughput": 6464.77, "total_tokens": 8417920} +{"current_steps": 8775, "total_steps": 11784, "loss": 0.068, "lr": 3.7188902106424414e-07, "epoch": 0.744653767820774, "percentage": 74.47, "elapsed_time": "0:21:42", "remaining_time": "0:07:26", "throughput": 6466.79, "total_tokens": 8422720} +{"current_steps": 8780, "total_steps": 11784, "loss": 0.0595, "lr": 3.7073716550959533e-07, "epoch": 0.7450780719619824, "percentage": 74.51, "elapsed_time": "0:21:42", "remaining_time": "0:07:25", "throughput": 6468.43, "total_tokens": 8426944} +{"current_steps": 8785, "total_steps": 11784, "loss": 0.0491, "lr": 3.6958669049944956e-07, "epoch": 0.7455023761031908, "percentage": 74.55, "elapsed_time": "0:21:43", "remaining_time": "0:07:24", "throughput": 6470.0, "total_tokens": 8431104} +{"current_steps": 8790, "total_steps": 11784, "loss": 0.0572, "lr": 3.684375985578431e-07, "epoch": 0.7459266802443992, "percentage": 74.59, "elapsed_time": "0:21:43", "remaining_time": "0:07:23", "throughput": 6471.77, "total_tokens": 8435520} +{"current_steps": 8795, "total_steps": 11784, "loss": 0.0451, "lr": 3.672898922057773e-07, "epoch": 0.7463509843856077, "percentage": 74.64, "elapsed_time": "0:21:43", "remaining_time": "0:07:23", "throughput": 6474.35, "total_tokens": 8441152} +{"current_steps": 8800, "total_steps": 11784, "loss": 0.0423, "lr": 3.66143573961214e-07, "epoch": 0.7467752885268161, "percentage": 74.68, "elapsed_time": "0:21:44", "remaining_time": "0:07:22", "throughput": 6476.28, "total_tokens": 8445824} +{"current_steps": 8805, "total_steps": 11784, "loss": 0.0749, "lr": 3.649986463390694e-07, "epoch": 0.7471995926680245, "percentage": 74.72, "elapsed_time": "0:21:44", "remaining_time": "0:07:21", "throughput": 6478.38, "total_tokens": 8450752} +{"current_steps": 8810, "total_steps": 11784, "loss": 0.0542, "lr": 3.6385511185120885e-07, "epoch": 0.7476238968092329, "percentage": 74.76, "elapsed_time": "0:21:44", "remaining_time": "0:07:20", "throughput": 6480.24, "total_tokens": 8455296} +{"current_steps": 8815, "total_steps": 11784, "loss": 0.0453, "lr": 3.6271297300644156e-07, "epoch": 0.7480482009504412, "percentage": 74.8, "elapsed_time": "0:21:45", "remaining_time": "0:07:19", "throughput": 6482.52, "total_tokens": 8460480} +{"current_steps": 8820, "total_steps": 11784, "loss": 0.0876, "lr": 3.6157223231051426e-07, "epoch": 0.7484725050916496, "percentage": 74.85, "elapsed_time": "0:21:45", "remaining_time": "0:07:18", "throughput": 6484.46, "total_tokens": 8465152} +{"current_steps": 8825, "total_steps": 11784, "loss": 0.0494, "lr": 3.6043289226610717e-07, "epoch": 0.7488968092328581, "percentage": 74.89, "elapsed_time": "0:21:45", "remaining_time": "0:07:17", "throughput": 6486.61, "total_tokens": 8470144} +{"current_steps": 8830, "total_steps": 11784, "loss": 0.0376, "lr": 3.5929495537282596e-07, "epoch": 0.7493211133740665, "percentage": 74.93, "elapsed_time": "0:21:46", "remaining_time": "0:07:16", "throughput": 6489.23, "total_tokens": 8475840} +{"current_steps": 8835, "total_steps": 11784, "loss": 0.0778, "lr": 3.5815842412720045e-07, "epoch": 0.7497454175152749, "percentage": 74.97, "elapsed_time": "0:21:46", "remaining_time": "0:07:16", "throughput": 6491.0, "total_tokens": 8480256} +{"current_steps": 8840, "total_steps": 11784, "loss": 0.0507, "lr": 3.57023301022674e-07, "epoch": 0.7501697216564833, "percentage": 75.02, "elapsed_time": "0:21:46", "remaining_time": "0:07:15", "throughput": 6493.37, "total_tokens": 8485568} +{"current_steps": 8845, "total_steps": 11784, "loss": 0.0751, "lr": 3.558895885496023e-07, "epoch": 0.7505940257976917, "percentage": 75.06, "elapsed_time": "0:21:47", "remaining_time": "0:07:14", "throughput": 6495.19, "total_tokens": 8490048} +{"current_steps": 8850, "total_steps": 11784, "loss": 0.0632, "lr": 3.547572891952456e-07, "epoch": 0.7510183299389002, "percentage": 75.1, "elapsed_time": "0:21:47", "remaining_time": "0:07:13", "throughput": 6497.13, "total_tokens": 8494720} +{"current_steps": 8850, "total_steps": 11784, "eval_loss": 0.05398377776145935, "epoch": 0.7510183299389002, "percentage": 75.1, "elapsed_time": "0:22:03", "remaining_time": "0:07:18", "throughput": 6419.78, "total_tokens": 8494720} +{"current_steps": 8855, "total_steps": 11784, "loss": 0.0387, "lr": 3.536264054437641e-07, "epoch": 0.7514426340801086, "percentage": 75.14, "elapsed_time": "0:22:35", "remaining_time": "0:07:28", "throughput": 6270.34, "total_tokens": 8499392} +{"current_steps": 8860, "total_steps": 11784, "loss": 0.0444, "lr": 3.524969397762122e-07, "epoch": 0.751866938221317, "percentage": 75.19, "elapsed_time": "0:22:35", "remaining_time": "0:07:27", "throughput": 6272.37, "total_tokens": 8504256} +{"current_steps": 8865, "total_steps": 11784, "loss": 0.074, "lr": 3.5136889467053353e-07, "epoch": 0.7522912423625254, "percentage": 75.23, "elapsed_time": "0:22:36", "remaining_time": "0:07:26", "throughput": 6274.25, "total_tokens": 8508864} +{"current_steps": 8870, "total_steps": 11784, "loss": 0.0805, "lr": 3.5024227260155383e-07, "epoch": 0.7527155465037338, "percentage": 75.27, "elapsed_time": "0:22:36", "remaining_time": "0:07:25", "throughput": 6276.49, "total_tokens": 8514048} +{"current_steps": 8875, "total_steps": 11784, "loss": 0.0212, "lr": 3.4911707604097916e-07, "epoch": 0.7531398506449423, "percentage": 75.31, "elapsed_time": "0:22:36", "remaining_time": "0:07:24", "throughput": 6278.37, "total_tokens": 8518656} +{"current_steps": 8880, "total_steps": 11784, "loss": 0.0455, "lr": 3.4799330745738573e-07, "epoch": 0.7535641547861507, "percentage": 75.36, "elapsed_time": "0:22:37", "remaining_time": "0:07:23", "throughput": 6280.41, "total_tokens": 8523520} +{"current_steps": 8885, "total_steps": 11784, "loss": 0.1758, "lr": 3.468709693162183e-07, "epoch": 0.7539884589273591, "percentage": 75.4, "elapsed_time": "0:22:37", "remaining_time": "0:07:22", "throughput": 6282.4, "total_tokens": 8528320} +{"current_steps": 8890, "total_steps": 11784, "loss": 0.0379, "lr": 3.4575006407978304e-07, "epoch": 0.7544127630685675, "percentage": 75.44, "elapsed_time": "0:22:37", "remaining_time": "0:07:22", "throughput": 6284.43, "total_tokens": 8533184} +{"current_steps": 8895, "total_steps": 11784, "loss": 0.0292, "lr": 3.446305942072425e-07, "epoch": 0.754837067209776, "percentage": 75.48, "elapsed_time": "0:22:38", "remaining_time": "0:07:21", "throughput": 6286.16, "total_tokens": 8537536} +{"current_steps": 8900, "total_steps": 11784, "loss": 0.0689, "lr": 3.4351256215461e-07, "epoch": 0.7552613713509844, "percentage": 75.53, "elapsed_time": "0:22:38", "remaining_time": "0:07:20", "throughput": 6287.74, "total_tokens": 8541696} +{"current_steps": 8905, "total_steps": 11784, "loss": 0.0226, "lr": 3.423959703747449e-07, "epoch": 0.7556856754921928, "percentage": 75.57, "elapsed_time": "0:22:38", "remaining_time": "0:07:19", "throughput": 6289.54, "total_tokens": 8546176} +{"current_steps": 8910, "total_steps": 11784, "loss": 0.0418, "lr": 3.4128082131734535e-07, "epoch": 0.7561099796334012, "percentage": 75.61, "elapsed_time": "0:22:39", "remaining_time": "0:07:18", "throughput": 6291.28, "total_tokens": 8550592} +{"current_steps": 8915, "total_steps": 11784, "loss": 0.0512, "lr": 3.401671174289469e-07, "epoch": 0.7565342837746096, "percentage": 75.65, "elapsed_time": "0:22:39", "remaining_time": "0:07:17", "throughput": 6293.45, "total_tokens": 8555648} +{"current_steps": 8920, "total_steps": 11784, "loss": 0.0859, "lr": 3.390548611529116e-07, "epoch": 0.756958587915818, "percentage": 75.7, "elapsed_time": "0:22:39", "remaining_time": "0:07:16", "throughput": 6295.57, "total_tokens": 8560640} +{"current_steps": 8925, "total_steps": 11784, "loss": 0.0721, "lr": 3.3794405492942713e-07, "epoch": 0.7573828920570265, "percentage": 75.74, "elapsed_time": "0:22:40", "remaining_time": "0:07:15", "throughput": 6297.52, "total_tokens": 8565376} +{"current_steps": 8930, "total_steps": 11784, "loss": 0.0562, "lr": 3.368347011955006e-07, "epoch": 0.7578071961982349, "percentage": 75.78, "elapsed_time": "0:22:40", "remaining_time": "0:07:14", "throughput": 6299.23, "total_tokens": 8569728} +{"current_steps": 8935, "total_steps": 11784, "loss": 0.0676, "lr": 3.3572680238495064e-07, "epoch": 0.7582315003394433, "percentage": 75.82, "elapsed_time": "0:22:40", "remaining_time": "0:07:13", "throughput": 6301.29, "total_tokens": 8574656} +{"current_steps": 8940, "total_steps": 11784, "loss": 0.0675, "lr": 3.346203609284053e-07, "epoch": 0.7586558044806517, "percentage": 75.87, "elapsed_time": "0:22:41", "remaining_time": "0:07:12", "throughput": 6303.04, "total_tokens": 8579072} +{"current_steps": 8945, "total_steps": 11784, "loss": 0.0307, "lr": 3.335153792532945e-07, "epoch": 0.7590801086218602, "percentage": 75.91, "elapsed_time": "0:22:41", "remaining_time": "0:07:12", "throughput": 6305.1, "total_tokens": 8584000} +{"current_steps": 8950, "total_steps": 11784, "loss": 0.0874, "lr": 3.324118597838463e-07, "epoch": 0.7595044127630686, "percentage": 75.95, "elapsed_time": "0:22:41", "remaining_time": "0:07:11", "throughput": 6307.37, "total_tokens": 8589248} +{"current_steps": 8955, "total_steps": 11784, "loss": 0.0565, "lr": 3.313098049410803e-07, "epoch": 0.759928716904277, "percentage": 75.99, "elapsed_time": "0:22:42", "remaining_time": "0:07:10", "throughput": 6309.0, "total_tokens": 8593472} +{"current_steps": 8960, "total_steps": 11784, "loss": 0.0134, "lr": 3.3020921714280325e-07, "epoch": 0.7603530210454854, "percentage": 76.04, "elapsed_time": "0:22:42", "remaining_time": "0:07:09", "throughput": 6310.77, "total_tokens": 8597952} +{"current_steps": 8965, "total_steps": 11784, "loss": 0.0331, "lr": 3.291100988036022e-07, "epoch": 0.7607773251866938, "percentage": 76.08, "elapsed_time": "0:22:42", "remaining_time": "0:07:08", "throughput": 6312.8, "total_tokens": 8602816} +{"current_steps": 8970, "total_steps": 11784, "loss": 0.0146, "lr": 3.280124523348423e-07, "epoch": 0.7612016293279023, "percentage": 76.12, "elapsed_time": "0:22:43", "remaining_time": "0:07:07", "throughput": 6315.19, "total_tokens": 8608256} +{"current_steps": 8975, "total_steps": 11784, "loss": 0.0495, "lr": 3.269162801446578e-07, "epoch": 0.7616259334691107, "percentage": 76.16, "elapsed_time": "0:22:43", "remaining_time": "0:07:06", "throughput": 6317.11, "total_tokens": 8612928} +{"current_steps": 8980, "total_steps": 11784, "loss": 0.1099, "lr": 3.258215846379492e-07, "epoch": 0.7620502376103191, "percentage": 76.21, "elapsed_time": "0:22:43", "remaining_time": "0:07:05", "throughput": 6318.82, "total_tokens": 8617280} +{"current_steps": 8985, "total_steps": 11784, "loss": 0.0041, "lr": 3.247283682163774e-07, "epoch": 0.7624745417515275, "percentage": 76.25, "elapsed_time": "0:22:44", "remaining_time": "0:07:04", "throughput": 6320.73, "total_tokens": 8621952} +{"current_steps": 8990, "total_steps": 11784, "loss": 0.0261, "lr": 3.2363663327835855e-07, "epoch": 0.7628988458927359, "percentage": 76.29, "elapsed_time": "0:22:44", "remaining_time": "0:07:04", "throughput": 6322.96, "total_tokens": 8627136} +{"current_steps": 8995, "total_steps": 11784, "loss": 0.0975, "lr": 3.2254638221905716e-07, "epoch": 0.7633231500339444, "percentage": 76.33, "elapsed_time": "0:22:44", "remaining_time": "0:07:03", "throughput": 6324.9, "total_tokens": 8631872} +{"current_steps": 9000, "total_steps": 11784, "loss": 0.0871, "lr": 3.214576174303846e-07, "epoch": 0.7637474541751528, "percentage": 76.37, "elapsed_time": "0:22:45", "remaining_time": "0:07:02", "throughput": 6327.0, "total_tokens": 8636864} +{"current_steps": 9005, "total_steps": 11784, "loss": 0.033, "lr": 3.2037034130098905e-07, "epoch": 0.7641717583163612, "percentage": 76.42, "elapsed_time": "0:22:45", "remaining_time": "0:07:01", "throughput": 6329.05, "total_tokens": 8641792} +{"current_steps": 9010, "total_steps": 11784, "loss": 0.0412, "lr": 3.192845562162549e-07, "epoch": 0.7645960624575696, "percentage": 76.46, "elapsed_time": "0:22:45", "remaining_time": "0:07:00", "throughput": 6330.91, "total_tokens": 8646400} +{"current_steps": 9015, "total_steps": 11784, "loss": 0.0528, "lr": 3.1820026455829353e-07, "epoch": 0.765020366598778, "percentage": 76.5, "elapsed_time": "0:22:46", "remaining_time": "0:06:59", "throughput": 6332.73, "total_tokens": 8650944} +{"current_steps": 9020, "total_steps": 11784, "loss": 0.0714, "lr": 3.171174687059408e-07, "epoch": 0.7654446707399865, "percentage": 76.54, "elapsed_time": "0:22:46", "remaining_time": "0:06:58", "throughput": 6335.31, "total_tokens": 8656704} +{"current_steps": 9025, "total_steps": 11784, "loss": 0.0878, "lr": 3.160361710347508e-07, "epoch": 0.7658689748811949, "percentage": 76.59, "elapsed_time": "0:22:46", "remaining_time": "0:06:57", "throughput": 6337.05, "total_tokens": 8661120} +{"current_steps": 9030, "total_steps": 11784, "loss": 0.0299, "lr": 3.14956373916991e-07, "epoch": 0.7662932790224033, "percentage": 76.63, "elapsed_time": "0:22:47", "remaining_time": "0:06:56", "throughput": 6338.86, "total_tokens": 8665664} +{"current_steps": 9035, "total_steps": 11784, "loss": 0.0576, "lr": 3.138780797216356e-07, "epoch": 0.7667175831636117, "percentage": 76.67, "elapsed_time": "0:22:47", "remaining_time": "0:06:56", "throughput": 6340.92, "total_tokens": 8670592} +{"current_steps": 9040, "total_steps": 11784, "loss": 0.0568, "lr": 3.128012908143636e-07, "epoch": 0.7671418873048201, "percentage": 76.71, "elapsed_time": "0:22:47", "remaining_time": "0:06:55", "throughput": 6343.94, "total_tokens": 8677120} +{"current_steps": 9045, "total_steps": 11784, "loss": 0.0727, "lr": 3.1172600955754935e-07, "epoch": 0.7675661914460286, "percentage": 76.76, "elapsed_time": "0:22:48", "remaining_time": "0:06:54", "throughput": 6346.07, "total_tokens": 8682176} +{"current_steps": 9050, "total_steps": 11784, "loss": 0.0849, "lr": 3.1065223831026066e-07, "epoch": 0.767990495587237, "percentage": 76.8, "elapsed_time": "0:22:48", "remaining_time": "0:06:53", "throughput": 6348.04, "total_tokens": 8686976} +{"current_steps": 9055, "total_steps": 11784, "loss": 0.0669, "lr": 3.095799794282533e-07, "epoch": 0.7684147997284454, "percentage": 76.84, "elapsed_time": "0:22:48", "remaining_time": "0:06:52", "throughput": 6350.07, "total_tokens": 8691904} +{"current_steps": 9060, "total_steps": 11784, "loss": 0.0465, "lr": 3.0850923526396334e-07, "epoch": 0.7688391038696538, "percentage": 76.88, "elapsed_time": "0:22:49", "remaining_time": "0:06:51", "throughput": 6354.6, "total_tokens": 8700928} +{"current_steps": 9065, "total_steps": 11784, "loss": 0.0227, "lr": 3.0744000816650464e-07, "epoch": 0.7692634080108622, "percentage": 76.93, "elapsed_time": "0:22:49", "remaining_time": "0:06:50", "throughput": 6356.31, "total_tokens": 8705344} +{"current_steps": 9070, "total_steps": 11784, "loss": 0.0562, "lr": 3.0637230048166263e-07, "epoch": 0.7696877121520707, "percentage": 76.97, "elapsed_time": "0:22:49", "remaining_time": "0:06:49", "throughput": 6358.69, "total_tokens": 8710784} +{"current_steps": 9075, "total_steps": 11784, "loss": 0.0205, "lr": 3.0530611455188946e-07, "epoch": 0.770112016293279, "percentage": 77.01, "elapsed_time": "0:22:50", "remaining_time": "0:06:49", "throughput": 6360.93, "total_tokens": 8716032} +{"current_steps": 9080, "total_steps": 11784, "loss": 0.0332, "lr": 3.0424145271629844e-07, "epoch": 0.7705363204344874, "percentage": 77.05, "elapsed_time": "0:22:50", "remaining_time": "0:06:48", "throughput": 6363.05, "total_tokens": 8721088} +{"current_steps": 9085, "total_steps": 11784, "loss": 0.0212, "lr": 3.031783173106596e-07, "epoch": 0.7709606245756958, "percentage": 77.1, "elapsed_time": "0:22:50", "remaining_time": "0:06:47", "throughput": 6364.83, "total_tokens": 8725632} +{"current_steps": 9090, "total_steps": 11784, "loss": 0.0318, "lr": 3.0211671066739276e-07, "epoch": 0.7713849287169042, "percentage": 77.14, "elapsed_time": "0:22:51", "remaining_time": "0:06:46", "throughput": 6366.88, "total_tokens": 8730560} +{"current_steps": 9095, "total_steps": 11784, "loss": 0.0695, "lr": 3.01056635115566e-07, "epoch": 0.7718092328581126, "percentage": 77.18, "elapsed_time": "0:22:51", "remaining_time": "0:06:45", "throughput": 6369.62, "total_tokens": 8736640} +{"current_steps": 9100, "total_steps": 11784, "loss": 0.0152, "lr": 2.999980929808863e-07, "epoch": 0.7722335369993211, "percentage": 77.22, "elapsed_time": "0:22:51", "remaining_time": "0:06:44", "throughput": 6371.44, "total_tokens": 8741248} +{"current_steps": 9105, "total_steps": 11784, "loss": 0.0392, "lr": 2.989410865856975e-07, "epoch": 0.7726578411405295, "percentage": 77.27, "elapsed_time": "0:22:52", "remaining_time": "0:06:43", "throughput": 6373.28, "total_tokens": 8745856} +{"current_steps": 9110, "total_steps": 11784, "loss": 0.1469, "lr": 2.9788561824897397e-07, "epoch": 0.7730821452817379, "percentage": 77.31, "elapsed_time": "0:22:52", "remaining_time": "0:06:42", "throughput": 6374.83, "total_tokens": 8750016} +{"current_steps": 9115, "total_steps": 11784, "loss": 0.0212, "lr": 2.968316902863157e-07, "epoch": 0.7735064494229463, "percentage": 77.35, "elapsed_time": "0:22:52", "remaining_time": "0:06:42", "throughput": 6376.85, "total_tokens": 8754944} +{"current_steps": 9120, "total_steps": 11784, "loss": 0.0967, "lr": 2.957793050099433e-07, "epoch": 0.7739307535641547, "percentage": 77.39, "elapsed_time": "0:22:53", "remaining_time": "0:06:41", "throughput": 6378.64, "total_tokens": 8759488} +{"current_steps": 9125, "total_steps": 11784, "loss": 0.0815, "lr": 2.9472846472869295e-07, "epoch": 0.7743550577053632, "percentage": 77.44, "elapsed_time": "0:22:53", "remaining_time": "0:06:40", "throughput": 6380.31, "total_tokens": 8763840} +{"current_steps": 9130, "total_steps": 11784, "loss": 0.0108, "lr": 2.936791717480104e-07, "epoch": 0.7747793618465716, "percentage": 77.48, "elapsed_time": "0:22:53", "remaining_time": "0:06:39", "throughput": 6382.06, "total_tokens": 8768320} +{"current_steps": 9135, "total_steps": 11784, "loss": 0.0252, "lr": 2.9263142836994845e-07, "epoch": 0.77520366598778, "percentage": 77.52, "elapsed_time": "0:22:54", "remaining_time": "0:06:38", "throughput": 6384.29, "total_tokens": 8773568} +{"current_steps": 9140, "total_steps": 11784, "loss": 0.0947, "lr": 2.915852368931585e-07, "epoch": 0.7756279701289884, "percentage": 77.56, "elapsed_time": "0:22:54", "remaining_time": "0:06:37", "throughput": 6386.12, "total_tokens": 8778176} +{"current_steps": 9145, "total_steps": 11784, "loss": 0.111, "lr": 2.905405996128882e-07, "epoch": 0.7760522742701969, "percentage": 77.61, "elapsed_time": "0:22:54", "remaining_time": "0:06:36", "throughput": 6387.94, "total_tokens": 8782784} +{"current_steps": 9150, "total_steps": 11784, "loss": 0.0308, "lr": 2.894975188209754e-07, "epoch": 0.7764765784114053, "percentage": 77.65, "elapsed_time": "0:22:55", "remaining_time": "0:06:35", "throughput": 6389.83, "total_tokens": 8787456} +{"current_steps": 9155, "total_steps": 11784, "loss": 0.0682, "lr": 2.8845599680584265e-07, "epoch": 0.7769008825526137, "percentage": 77.69, "elapsed_time": "0:22:55", "remaining_time": "0:06:35", "throughput": 6391.77, "total_tokens": 8792256} +{"current_steps": 9160, "total_steps": 11784, "loss": 0.1251, "lr": 2.8741603585249306e-07, "epoch": 0.7773251866938221, "percentage": 77.73, "elapsed_time": "0:22:55", "remaining_time": "0:06:34", "throughput": 6394.03, "total_tokens": 8797568} +{"current_steps": 9165, "total_steps": 11784, "loss": 0.0504, "lr": 2.8637763824250507e-07, "epoch": 0.7777494908350305, "percentage": 77.77, "elapsed_time": "0:22:56", "remaining_time": "0:06:33", "throughput": 6395.8, "total_tokens": 8802112} +{"current_steps": 9170, "total_steps": 11784, "loss": 0.0153, "lr": 2.8534080625402677e-07, "epoch": 0.778173794976239, "percentage": 77.82, "elapsed_time": "0:22:56", "remaining_time": "0:06:32", "throughput": 6397.66, "total_tokens": 8806784} +{"current_steps": 9175, "total_steps": 11784, "loss": 0.0108, "lr": 2.8430554216177203e-07, "epoch": 0.7785980991174474, "percentage": 77.86, "elapsed_time": "0:22:56", "remaining_time": "0:06:31", "throughput": 6399.94, "total_tokens": 8812096} +{"current_steps": 9180, "total_steps": 11784, "loss": 0.11, "lr": 2.8327184823701464e-07, "epoch": 0.7790224032586558, "percentage": 77.9, "elapsed_time": "0:22:57", "remaining_time": "0:06:30", "throughput": 6401.96, "total_tokens": 8817024} +{"current_steps": 9185, "total_steps": 11784, "loss": 0.1664, "lr": 2.822397267475827e-07, "epoch": 0.7794467073998642, "percentage": 77.94, "elapsed_time": "0:22:57", "remaining_time": "0:06:29", "throughput": 6403.8, "total_tokens": 8821696} +{"current_steps": 9190, "total_steps": 11784, "loss": 0.1155, "lr": 2.812091799578566e-07, "epoch": 0.7798710115410726, "percentage": 77.99, "elapsed_time": "0:22:57", "remaining_time": "0:06:28", "throughput": 6405.78, "total_tokens": 8826560} +{"current_steps": 9195, "total_steps": 11784, "loss": 0.0301, "lr": 2.8018021012875995e-07, "epoch": 0.780295315682281, "percentage": 78.03, "elapsed_time": "0:22:58", "remaining_time": "0:06:28", "throughput": 6407.84, "total_tokens": 8831552} +{"current_steps": 9200, "total_steps": 11784, "loss": 0.0673, "lr": 2.791528195177576e-07, "epoch": 0.7807196198234895, "percentage": 78.07, "elapsed_time": "0:22:58", "remaining_time": "0:06:27", "throughput": 6409.87, "total_tokens": 8836480} +{"current_steps": 9205, "total_steps": 11784, "loss": 0.0681, "lr": 2.7812701037884964e-07, "epoch": 0.7811439239646979, "percentage": 78.11, "elapsed_time": "0:22:58", "remaining_time": "0:06:26", "throughput": 6411.52, "total_tokens": 8840832} +{"current_steps": 9210, "total_steps": 11784, "loss": 0.0717, "lr": 2.7710278496256665e-07, "epoch": 0.7815682281059063, "percentage": 78.16, "elapsed_time": "0:22:59", "remaining_time": "0:06:25", "throughput": 6413.3, "total_tokens": 8845376} +{"current_steps": 9215, "total_steps": 11784, "loss": 0.0424, "lr": 2.7608014551596437e-07, "epoch": 0.7819925322471147, "percentage": 78.2, "elapsed_time": "0:22:59", "remaining_time": "0:06:24", "throughput": 6415.24, "total_tokens": 8850176} +{"current_steps": 9220, "total_steps": 11784, "loss": 0.0697, "lr": 2.7505909428261956e-07, "epoch": 0.7824168363883232, "percentage": 78.24, "elapsed_time": "0:22:59", "remaining_time": "0:06:23", "throughput": 6417.16, "total_tokens": 8854976} +{"current_steps": 9225, "total_steps": 11784, "loss": 0.0415, "lr": 2.740396335026234e-07, "epoch": 0.7828411405295316, "percentage": 78.28, "elapsed_time": "0:23:00", "remaining_time": "0:06:22", "throughput": 6418.85, "total_tokens": 8859392} +{"current_steps": 9230, "total_steps": 11784, "loss": 0.0155, "lr": 2.7302176541257984e-07, "epoch": 0.78326544467074, "percentage": 78.33, "elapsed_time": "0:23:00", "remaining_time": "0:06:22", "throughput": 6420.63, "total_tokens": 8863936} +{"current_steps": 9235, "total_steps": 11784, "loss": 0.1092, "lr": 2.720054922455964e-07, "epoch": 0.7836897488119484, "percentage": 78.37, "elapsed_time": "0:23:00", "remaining_time": "0:06:21", "throughput": 6422.42, "total_tokens": 8868480} +{"current_steps": 9240, "total_steps": 11784, "loss": 0.0854, "lr": 2.7099081623128294e-07, "epoch": 0.7841140529531568, "percentage": 78.41, "elapsed_time": "0:23:01", "remaining_time": "0:06:20", "throughput": 6424.4, "total_tokens": 8873344} +{"current_steps": 9245, "total_steps": 11784, "loss": 0.0661, "lr": 2.699777395957449e-07, "epoch": 0.7845383570943653, "percentage": 78.45, "elapsed_time": "0:23:01", "remaining_time": "0:06:19", "throughput": 6426.24, "total_tokens": 8878016} +{"current_steps": 9250, "total_steps": 11784, "loss": 0.1429, "lr": 2.6896626456157846e-07, "epoch": 0.7849626612355737, "percentage": 78.5, "elapsed_time": "0:23:01", "remaining_time": "0:06:18", "throughput": 6428.17, "total_tokens": 8882816} +{"current_steps": 9255, "total_steps": 11784, "loss": 0.0025, "lr": 2.679563933478667e-07, "epoch": 0.7853869653767821, "percentage": 78.54, "elapsed_time": "0:23:02", "remaining_time": "0:06:17", "throughput": 6430.3, "total_tokens": 8887936} +{"current_steps": 9260, "total_steps": 11784, "loss": 0.0219, "lr": 2.6694812817017387e-07, "epoch": 0.7858112695179905, "percentage": 78.58, "elapsed_time": "0:23:02", "remaining_time": "0:06:16", "throughput": 6432.18, "total_tokens": 8892672} +{"current_steps": 9265, "total_steps": 11784, "loss": 0.053, "lr": 2.659414712405398e-07, "epoch": 0.7862355736591989, "percentage": 78.62, "elapsed_time": "0:23:02", "remaining_time": "0:06:15", "throughput": 6433.9, "total_tokens": 8897152} +{"current_steps": 9270, "total_steps": 11784, "loss": 0.0312, "lr": 2.649364247674779e-07, "epoch": 0.7866598778004074, "percentage": 78.67, "elapsed_time": "0:23:03", "remaining_time": "0:06:15", "throughput": 6435.54, "total_tokens": 8901504} +{"current_steps": 9275, "total_steps": 11784, "loss": 0.073, "lr": 2.639329909559662e-07, "epoch": 0.7870841819416158, "percentage": 78.71, "elapsed_time": "0:23:03", "remaining_time": "0:06:14", "throughput": 6437.38, "total_tokens": 8906176} +{"current_steps": 9280, "total_steps": 11784, "loss": 0.0315, "lr": 2.6293117200744643e-07, "epoch": 0.7875084860828242, "percentage": 78.75, "elapsed_time": "0:23:03", "remaining_time": "0:06:13", "throughput": 6439.13, "total_tokens": 8910720} +{"current_steps": 9285, "total_steps": 11784, "loss": 0.0515, "lr": 2.6193097011981635e-07, "epoch": 0.7879327902240326, "percentage": 78.79, "elapsed_time": "0:23:04", "remaining_time": "0:06:12", "throughput": 6440.86, "total_tokens": 8915200} +{"current_steps": 9290, "total_steps": 11784, "loss": 0.0686, "lr": 2.609323874874266e-07, "epoch": 0.788357094365241, "percentage": 78.84, "elapsed_time": "0:23:04", "remaining_time": "0:06:11", "throughput": 6442.93, "total_tokens": 8920256} +{"current_steps": 9295, "total_steps": 11784, "loss": 0.0547, "lr": 2.5993542630107533e-07, "epoch": 0.7887813985064495, "percentage": 78.88, "elapsed_time": "0:23:04", "remaining_time": "0:06:10", "throughput": 6444.71, "total_tokens": 8924864} +{"current_steps": 9300, "total_steps": 11784, "loss": 0.0615, "lr": 2.589400887480032e-07, "epoch": 0.7892057026476579, "percentage": 78.92, "elapsed_time": "0:23:05", "remaining_time": "0:06:09", "throughput": 6446.73, "total_tokens": 8929856} +{"current_steps": 9305, "total_steps": 11784, "loss": 0.0191, "lr": 2.579463770118887e-07, "epoch": 0.7896300067888663, "percentage": 78.96, "elapsed_time": "0:23:05", "remaining_time": "0:06:09", "throughput": 6449.84, "total_tokens": 8936640} +{"current_steps": 9310, "total_steps": 11784, "loss": 0.0344, "lr": 2.569542932728436e-07, "epoch": 0.7900543109300747, "percentage": 79.01, "elapsed_time": "0:23:05", "remaining_time": "0:06:08", "throughput": 6451.45, "total_tokens": 8940992} +{"current_steps": 9315, "total_steps": 11784, "loss": 0.0789, "lr": 2.5596383970740833e-07, "epoch": 0.7904786150712831, "percentage": 79.05, "elapsed_time": "0:23:06", "remaining_time": "0:06:07", "throughput": 6453.27, "total_tokens": 8945664} +{"current_steps": 9320, "total_steps": 11784, "loss": 0.1219, "lr": 2.549750184885454e-07, "epoch": 0.7909029192124916, "percentage": 79.09, "elapsed_time": "0:23:06", "remaining_time": "0:06:06", "throughput": 6454.89, "total_tokens": 8950016} +{"current_steps": 9325, "total_steps": 11784, "loss": 0.0785, "lr": 2.5398783178563844e-07, "epoch": 0.7913272233537, "percentage": 79.13, "elapsed_time": "0:23:06", "remaining_time": "0:06:05", "throughput": 6456.83, "total_tokens": 8954880} +{"current_steps": 9330, "total_steps": 11784, "loss": 0.058, "lr": 2.5300228176448304e-07, "epoch": 0.7917515274949084, "percentage": 79.18, "elapsed_time": "0:23:07", "remaining_time": "0:06:04", "throughput": 6459.01, "total_tokens": 8960128} +{"current_steps": 9335, "total_steps": 11784, "loss": 0.0552, "lr": 2.52018370587285e-07, "epoch": 0.7921758316361168, "percentage": 79.22, "elapsed_time": "0:23:07", "remaining_time": "0:06:04", "throughput": 6460.87, "total_tokens": 8964864} +{"current_steps": 9340, "total_steps": 11784, "loss": 0.0933, "lr": 2.5103610041265475e-07, "epoch": 0.7926001357773251, "percentage": 79.26, "elapsed_time": "0:23:07", "remaining_time": "0:06:03", "throughput": 6463.04, "total_tokens": 8970112} +{"current_steps": 9345, "total_steps": 11784, "loss": 0.0475, "lr": 2.5005547339560207e-07, "epoch": 0.7930244399185336, "percentage": 79.3, "elapsed_time": "0:23:08", "remaining_time": "0:06:02", "throughput": 6465.05, "total_tokens": 8975104} +{"current_steps": 9350, "total_steps": 11784, "loss": 0.0478, "lr": 2.4907649168753197e-07, "epoch": 0.793448744059742, "percentage": 79.34, "elapsed_time": "0:23:08", "remaining_time": "0:06:01", "throughput": 6467.11, "total_tokens": 8980160} +{"current_steps": 9355, "total_steps": 11784, "loss": 0.1039, "lr": 2.480991574362403e-07, "epoch": 0.7938730482009504, "percentage": 79.39, "elapsed_time": "0:23:08", "remaining_time": "0:06:00", "throughput": 6468.6, "total_tokens": 8984320} +{"current_steps": 9360, "total_steps": 11784, "loss": 0.0495, "lr": 2.471234727859072e-07, "epoch": 0.7942973523421588, "percentage": 79.43, "elapsed_time": "0:23:09", "remaining_time": "0:05:59", "throughput": 6470.66, "total_tokens": 8989376} +{"current_steps": 9365, "total_steps": 11784, "loss": 0.026, "lr": 2.461494398770957e-07, "epoch": 0.7947216564833672, "percentage": 79.47, "elapsed_time": "0:23:09", "remaining_time": "0:05:58", "throughput": 6472.39, "total_tokens": 8993920} +{"current_steps": 9370, "total_steps": 11784, "loss": 0.0662, "lr": 2.4517706084674316e-07, "epoch": 0.7951459606245757, "percentage": 79.51, "elapsed_time": "0:23:09", "remaining_time": "0:05:58", "throughput": 6474.35, "total_tokens": 8998848} +{"current_steps": 9375, "total_steps": 11784, "loss": 0.0314, "lr": 2.4420633782815945e-07, "epoch": 0.7955702647657841, "percentage": 79.56, "elapsed_time": "0:23:10", "remaining_time": "0:05:57", "throughput": 6476.3, "total_tokens": 9003712} +{"current_steps": 9380, "total_steps": 11784, "loss": 0.0552, "lr": 2.432372729510214e-07, "epoch": 0.7959945689069925, "percentage": 79.6, "elapsed_time": "0:23:10", "remaining_time": "0:05:56", "throughput": 6478.72, "total_tokens": 9009472} +{"current_steps": 9385, "total_steps": 11784, "loss": 0.0265, "lr": 2.4226986834136763e-07, "epoch": 0.7964188730482009, "percentage": 79.64, "elapsed_time": "0:23:10", "remaining_time": "0:05:55", "throughput": 6480.53, "total_tokens": 9014144} +{"current_steps": 9390, "total_steps": 11784, "loss": 0.0193, "lr": 2.4130412612159445e-07, "epoch": 0.7968431771894093, "percentage": 79.68, "elapsed_time": "0:23:11", "remaining_time": "0:05:54", "throughput": 6482.24, "total_tokens": 9018688} +{"current_steps": 9395, "total_steps": 11784, "loss": 0.0334, "lr": 2.403400484104514e-07, "epoch": 0.7972674813306178, "percentage": 79.73, "elapsed_time": "0:23:11", "remaining_time": "0:05:53", "throughput": 6483.83, "total_tokens": 9023040} +{"current_steps": 9400, "total_steps": 11784, "loss": 0.0262, "lr": 2.3937763732303504e-07, "epoch": 0.7976917854718262, "percentage": 79.77, "elapsed_time": "0:23:11", "remaining_time": "0:05:53", "throughput": 6485.39, "total_tokens": 9027328} +{"current_steps": 9405, "total_steps": 11784, "loss": 0.0333, "lr": 2.3841689497078742e-07, "epoch": 0.7981160896130346, "percentage": 79.81, "elapsed_time": "0:23:12", "remaining_time": "0:05:52", "throughput": 6486.89, "total_tokens": 9031552} +{"current_steps": 9410, "total_steps": 11784, "loss": 0.0224, "lr": 2.3745782346148756e-07, "epoch": 0.798540393754243, "percentage": 79.85, "elapsed_time": "0:23:12", "remaining_time": "0:05:51", "throughput": 6488.96, "total_tokens": 9036672} +{"current_steps": 9415, "total_steps": 11784, "loss": 0.0491, "lr": 2.3650042489924992e-07, "epoch": 0.7989646978954514, "percentage": 79.9, "elapsed_time": "0:23:12", "remaining_time": "0:05:50", "throughput": 6490.82, "total_tokens": 9041472} +{"current_steps": 9420, "total_steps": 11784, "loss": 0.0686, "lr": 2.3554470138451909e-07, "epoch": 0.7993890020366599, "percentage": 79.94, "elapsed_time": "0:23:13", "remaining_time": "0:05:49", "throughput": 6493.1, "total_tokens": 9046912} +{"current_steps": 9425, "total_steps": 11784, "loss": 0.0481, "lr": 2.345906550140634e-07, "epoch": 0.7998133061778683, "percentage": 79.98, "elapsed_time": "0:23:13", "remaining_time": "0:05:48", "throughput": 6494.98, "total_tokens": 9051712} +{"current_steps": 9430, "total_steps": 11784, "loss": 0.0909, "lr": 2.3363828788097274e-07, "epoch": 0.8002376103190767, "percentage": 80.02, "elapsed_time": "0:23:13", "remaining_time": "0:05:47", "throughput": 6496.73, "total_tokens": 9056256} +{"current_steps": 9435, "total_steps": 11784, "loss": 0.0345, "lr": 2.3268760207465244e-07, "epoch": 0.8006619144602851, "percentage": 80.07, "elapsed_time": "0:23:14", "remaining_time": "0:05:47", "throughput": 6498.81, "total_tokens": 9061376} +{"current_steps": 9440, "total_steps": 11784, "loss": 0.1023, "lr": 2.3173859968081944e-07, "epoch": 0.8010862186014935, "percentage": 80.11, "elapsed_time": "0:23:14", "remaining_time": "0:05:46", "throughput": 6500.61, "total_tokens": 9066048} +{"current_steps": 9440, "total_steps": 11784, "eval_loss": 0.05470386520028114, "epoch": 0.8010862186014935, "percentage": 80.11, "elapsed_time": "0:23:30", "remaining_time": "0:05:50", "throughput": 6427.11, "total_tokens": 9066048} +{"current_steps": 9445, "total_steps": 11784, "loss": 0.0286, "lr": 2.3079128278149717e-07, "epoch": 0.801510522742702, "percentage": 80.15, "elapsed_time": "0:24:00", "remaining_time": "0:05:56", "throughput": 6297.92, "total_tokens": 9071232} +{"current_steps": 9450, "total_steps": 11784, "loss": 0.0474, "lr": 2.2984565345501172e-07, "epoch": 0.8019348268839104, "percentage": 80.19, "elapsed_time": "0:24:00", "remaining_time": "0:05:55", "throughput": 6299.49, "total_tokens": 9075520} +{"current_steps": 9455, "total_steps": 11784, "loss": 0.0605, "lr": 2.2890171377598556e-07, "epoch": 0.8023591310251188, "percentage": 80.24, "elapsed_time": "0:24:01", "remaining_time": "0:05:54", "throughput": 6301.29, "total_tokens": 9080192} +{"current_steps": 9460, "total_steps": 11784, "loss": 0.0041, "lr": 2.2795946581533632e-07, "epoch": 0.8027834351663272, "percentage": 80.28, "elapsed_time": "0:24:01", "remaining_time": "0:05:54", "throughput": 6303.6, "total_tokens": 9085696} +{"current_steps": 9465, "total_steps": 11784, "loss": 0.0716, "lr": 2.27018911640268e-07, "epoch": 0.8032077393075356, "percentage": 80.32, "elapsed_time": "0:24:01", "remaining_time": "0:05:53", "throughput": 6305.45, "total_tokens": 9090432} +{"current_steps": 9470, "total_steps": 11784, "loss": 0.0832, "lr": 2.2608005331426982e-07, "epoch": 0.8036320434487441, "percentage": 80.36, "elapsed_time": "0:24:02", "remaining_time": "0:05:52", "throughput": 6307.17, "total_tokens": 9094976} +{"current_steps": 9475, "total_steps": 11784, "loss": 0.0863, "lr": 2.251428928971102e-07, "epoch": 0.8040563475899525, "percentage": 80.41, "elapsed_time": "0:24:02", "remaining_time": "0:05:51", "throughput": 6309.25, "total_tokens": 9100096} +{"current_steps": 9480, "total_steps": 11784, "loss": 0.0752, "lr": 2.2420743244483253e-07, "epoch": 0.8044806517311609, "percentage": 80.45, "elapsed_time": "0:24:02", "remaining_time": "0:05:50", "throughput": 6311.42, "total_tokens": 9105408} +{"current_steps": 9485, "total_steps": 11784, "loss": 0.0346, "lr": 2.2327367400975051e-07, "epoch": 0.8049049558723693, "percentage": 80.49, "elapsed_time": "0:24:03", "remaining_time": "0:05:49", "throughput": 6313.25, "total_tokens": 9110144} +{"current_steps": 9490, "total_steps": 11784, "loss": 0.0509, "lr": 2.2234161964044417e-07, "epoch": 0.8053292600135777, "percentage": 80.53, "elapsed_time": "0:24:03", "remaining_time": "0:05:48", "throughput": 6315.46, "total_tokens": 9115520} +{"current_steps": 9495, "total_steps": 11784, "loss": 0.0249, "lr": 2.2141127138175386e-07, "epoch": 0.8057535641547862, "percentage": 80.58, "elapsed_time": "0:24:03", "remaining_time": "0:05:48", "throughput": 6317.03, "total_tokens": 9119808} +{"current_steps": 9500, "total_steps": 11784, "loss": 0.0707, "lr": 2.2048263127477861e-07, "epoch": 0.8061778682959946, "percentage": 80.62, "elapsed_time": "0:24:04", "remaining_time": "0:05:47", "throughput": 6318.94, "total_tokens": 9124672} +{"current_steps": 9505, "total_steps": 11784, "loss": 0.0903, "lr": 2.195557013568684e-07, "epoch": 0.806602172437203, "percentage": 80.66, "elapsed_time": "0:24:04", "remaining_time": "0:05:46", "throughput": 6320.65, "total_tokens": 9129216} +{"current_steps": 9510, "total_steps": 11784, "loss": 0.0704, "lr": 2.1863048366162207e-07, "epoch": 0.8070264765784114, "percentage": 80.7, "elapsed_time": "0:24:04", "remaining_time": "0:05:45", "throughput": 6322.49, "total_tokens": 9133952} +{"current_steps": 9515, "total_steps": 11784, "loss": 0.0464, "lr": 2.1770698021888145e-07, "epoch": 0.8074507807196198, "percentage": 80.75, "elapsed_time": "0:24:04", "remaining_time": "0:05:44", "throughput": 6324.05, "total_tokens": 9138240} +{"current_steps": 9520, "total_steps": 11784, "loss": 0.1019, "lr": 2.167851930547283e-07, "epoch": 0.8078750848608283, "percentage": 80.79, "elapsed_time": "0:24:05", "remaining_time": "0:05:43", "throughput": 6325.69, "total_tokens": 9142656} +{"current_steps": 9525, "total_steps": 11784, "loss": 0.0595, "lr": 2.1586512419147763e-07, "epoch": 0.8082993890020367, "percentage": 80.83, "elapsed_time": "0:24:05", "remaining_time": "0:05:42", "throughput": 6327.55, "total_tokens": 9147456} +{"current_steps": 9530, "total_steps": 11784, "loss": 0.0314, "lr": 2.149467756476765e-07, "epoch": 0.8087236931432451, "percentage": 80.87, "elapsed_time": "0:24:05", "remaining_time": "0:05:41", "throughput": 6329.31, "total_tokens": 9152064} +{"current_steps": 9535, "total_steps": 11784, "loss": 0.0694, "lr": 2.140301494380956e-07, "epoch": 0.8091479972844535, "percentage": 80.91, "elapsed_time": "0:24:06", "remaining_time": "0:05:41", "throughput": 6330.97, "total_tokens": 9156544} +{"current_steps": 9540, "total_steps": 11784, "loss": 0.0385, "lr": 2.1311524757372901e-07, "epoch": 0.8095723014256619, "percentage": 80.96, "elapsed_time": "0:24:06", "remaining_time": "0:05:40", "throughput": 6332.69, "total_tokens": 9161088} +{"current_steps": 9545, "total_steps": 11784, "loss": 0.05, "lr": 2.1220207206178685e-07, "epoch": 0.8099966055668704, "percentage": 81.0, "elapsed_time": "0:24:06", "remaining_time": "0:05:39", "throughput": 6334.29, "total_tokens": 9165440} +{"current_steps": 9550, "total_steps": 11784, "loss": 0.0143, "lr": 2.1129062490569106e-07, "epoch": 0.8104209097080788, "percentage": 81.04, "elapsed_time": "0:24:07", "remaining_time": "0:05:38", "throughput": 6336.3, "total_tokens": 9170496} +{"current_steps": 9555, "total_steps": 11784, "loss": 0.034, "lr": 2.1038090810507348e-07, "epoch": 0.8108452138492872, "percentage": 81.08, "elapsed_time": "0:24:07", "remaining_time": "0:05:37", "throughput": 6338.2, "total_tokens": 9175360} +{"current_steps": 9560, "total_steps": 11784, "loss": 0.057, "lr": 2.0947292365576785e-07, "epoch": 0.8112695179904956, "percentage": 81.13, "elapsed_time": "0:24:07", "remaining_time": "0:05:36", "throughput": 6339.84, "total_tokens": 9179776} +{"current_steps": 9565, "total_steps": 11784, "loss": 0.0305, "lr": 2.085666735498085e-07, "epoch": 0.811693822131704, "percentage": 81.17, "elapsed_time": "0:24:08", "remaining_time": "0:05:35", "throughput": 6342.27, "total_tokens": 9185536} +{"current_steps": 9570, "total_steps": 11784, "loss": 0.0328, "lr": 2.0766215977542435e-07, "epoch": 0.8121181262729125, "percentage": 81.21, "elapsed_time": "0:24:08", "remaining_time": "0:05:35", "throughput": 6344.25, "total_tokens": 9190528} +{"current_steps": 9575, "total_steps": 11784, "loss": 0.0937, "lr": 2.0675938431703532e-07, "epoch": 0.8125424304141209, "percentage": 81.25, "elapsed_time": "0:24:08", "remaining_time": "0:05:34", "throughput": 6346.06, "total_tokens": 9195264} +{"current_steps": 9580, "total_steps": 11784, "loss": 0.065, "lr": 2.0585834915524646e-07, "epoch": 0.8129667345553293, "percentage": 81.3, "elapsed_time": "0:24:09", "remaining_time": "0:05:33", "throughput": 6347.99, "total_tokens": 9200192} +{"current_steps": 9585, "total_steps": 11784, "loss": 0.0184, "lr": 2.0495905626684674e-07, "epoch": 0.8133910386965377, "percentage": 81.34, "elapsed_time": "0:24:09", "remaining_time": "0:05:32", "throughput": 6349.88, "total_tokens": 9205056} +{"current_steps": 9590, "total_steps": 11784, "loss": 0.0328, "lr": 2.0406150762480089e-07, "epoch": 0.8138153428377461, "percentage": 81.38, "elapsed_time": "0:24:09", "remaining_time": "0:05:31", "throughput": 6351.75, "total_tokens": 9209856} +{"current_steps": 9595, "total_steps": 11784, "loss": 0.033, "lr": 2.0316570519824806e-07, "epoch": 0.8142396469789546, "percentage": 81.42, "elapsed_time": "0:24:10", "remaining_time": "0:05:30", "throughput": 6353.5, "total_tokens": 9214464} +{"current_steps": 9600, "total_steps": 11784, "loss": 0.0432, "lr": 2.0227165095249564e-07, "epoch": 0.814663951120163, "percentage": 81.47, "elapsed_time": "0:24:10", "remaining_time": "0:05:30", "throughput": 6355.24, "total_tokens": 9219072} +{"current_steps": 9605, "total_steps": 11784, "loss": 0.0675, "lr": 2.0137934684901636e-07, "epoch": 0.8150882552613713, "percentage": 81.51, "elapsed_time": "0:24:10", "remaining_time": "0:05:29", "throughput": 6357.64, "total_tokens": 9224768} +{"current_steps": 9610, "total_steps": 11784, "loss": 0.0464, "lr": 2.0048879484544279e-07, "epoch": 0.8155125594025797, "percentage": 81.55, "elapsed_time": "0:24:11", "remaining_time": "0:05:28", "throughput": 6359.56, "total_tokens": 9229696} +{"current_steps": 9615, "total_steps": 11784, "loss": 0.0971, "lr": 1.9959999689556407e-07, "epoch": 0.8159368635437881, "percentage": 81.59, "elapsed_time": "0:24:11", "remaining_time": "0:05:27", "throughput": 6361.75, "total_tokens": 9235072} +{"current_steps": 9620, "total_steps": 11784, "loss": 0.0947, "lr": 1.9871295494931994e-07, "epoch": 0.8163611676849966, "percentage": 81.64, "elapsed_time": "0:24:11", "remaining_time": "0:05:26", "throughput": 6363.87, "total_tokens": 9240320} +{"current_steps": 9625, "total_steps": 11784, "loss": 0.0287, "lr": 1.978276709527994e-07, "epoch": 0.816785471826205, "percentage": 81.68, "elapsed_time": "0:24:12", "remaining_time": "0:05:25", "throughput": 6365.6, "total_tokens": 9244928} +{"current_steps": 9630, "total_steps": 11784, "loss": 0.1166, "lr": 1.9694414684823313e-07, "epoch": 0.8172097759674134, "percentage": 81.72, "elapsed_time": "0:24:12", "remaining_time": "0:05:24", "throughput": 6367.49, "total_tokens": 9249792} +{"current_steps": 9635, "total_steps": 11784, "loss": 0.0569, "lr": 1.960623845739914e-07, "epoch": 0.8176340801086218, "percentage": 81.76, "elapsed_time": "0:24:13", "remaining_time": "0:05:24", "throughput": 6369.48, "total_tokens": 9254848} +{"current_steps": 9640, "total_steps": 11784, "loss": 0.0463, "lr": 1.9518238606457925e-07, "epoch": 0.8180583842498302, "percentage": 81.81, "elapsed_time": "0:24:13", "remaining_time": "0:05:23", "throughput": 6371.17, "total_tokens": 9259392} +{"current_steps": 9645, "total_steps": 11784, "loss": 0.066, "lr": 1.943041532506322e-07, "epoch": 0.8184826883910387, "percentage": 81.85, "elapsed_time": "0:24:13", "remaining_time": "0:05:22", "throughput": 6372.83, "total_tokens": 9263872} +{"current_steps": 9650, "total_steps": 11784, "loss": 0.0389, "lr": 1.9342768805891173e-07, "epoch": 0.8189069925322471, "percentage": 81.89, "elapsed_time": "0:24:13", "remaining_time": "0:05:21", "throughput": 6374.74, "total_tokens": 9268800} +{"current_steps": 9655, "total_steps": 11784, "loss": 0.1183, "lr": 1.9255299241230182e-07, "epoch": 0.8193312966734555, "percentage": 81.93, "elapsed_time": "0:24:14", "remaining_time": "0:05:20", "throughput": 6376.46, "total_tokens": 9273408} +{"current_steps": 9660, "total_steps": 11784, "loss": 0.0679, "lr": 1.91680068229803e-07, "epoch": 0.8197556008146639, "percentage": 81.98, "elapsed_time": "0:24:14", "remaining_time": "0:05:19", "throughput": 6378.3, "total_tokens": 9278208} +{"current_steps": 9665, "total_steps": 11784, "loss": 0.0643, "lr": 1.9080891742653105e-07, "epoch": 0.8201799049558723, "percentage": 82.02, "elapsed_time": "0:24:14", "remaining_time": "0:05:18", "throughput": 6380.09, "total_tokens": 9282944} +{"current_steps": 9670, "total_steps": 11784, "loss": 0.0074, "lr": 1.8993954191371042e-07, "epoch": 0.8206042090970808, "percentage": 82.06, "elapsed_time": "0:24:15", "remaining_time": "0:05:18", "throughput": 6382.12, "total_tokens": 9288064} +{"current_steps": 9675, "total_steps": 11784, "loss": 0.0289, "lr": 1.8907194359866986e-07, "epoch": 0.8210285132382892, "percentage": 82.1, "elapsed_time": "0:24:15", "remaining_time": "0:05:17", "throughput": 6384.1, "total_tokens": 9293120} +{"current_steps": 9680, "total_steps": 11784, "loss": 0.0717, "lr": 1.8820612438484075e-07, "epoch": 0.8214528173794976, "percentage": 82.15, "elapsed_time": "0:24:15", "remaining_time": "0:05:16", "throughput": 6385.67, "total_tokens": 9297472} +{"current_steps": 9685, "total_steps": 11784, "loss": 0.0743, "lr": 1.8734208617174986e-07, "epoch": 0.821877121520706, "percentage": 82.19, "elapsed_time": "0:24:16", "remaining_time": "0:05:15", "throughput": 6387.44, "total_tokens": 9302144} +{"current_steps": 9690, "total_steps": 11784, "loss": 0.0671, "lr": 1.864798308550173e-07, "epoch": 0.8223014256619144, "percentage": 82.23, "elapsed_time": "0:24:16", "remaining_time": "0:05:14", "throughput": 6389.41, "total_tokens": 9307200} +{"current_steps": 9695, "total_steps": 11784, "loss": 0.0212, "lr": 1.856193603263515e-07, "epoch": 0.8227257298031229, "percentage": 82.27, "elapsed_time": "0:24:17", "remaining_time": "0:05:13", "throughput": 6391.47, "total_tokens": 9312384} +{"current_steps": 9700, "total_steps": 11784, "loss": 0.0079, "lr": 1.8476067647354553e-07, "epoch": 0.8231500339443313, "percentage": 82.32, "elapsed_time": "0:24:17", "remaining_time": "0:05:13", "throughput": 6393.28, "total_tokens": 9317120} +{"current_steps": 9705, "total_steps": 11784, "loss": 0.0715, "lr": 1.8390378118047213e-07, "epoch": 0.8235743380855397, "percentage": 82.36, "elapsed_time": "0:24:17", "remaining_time": "0:05:12", "throughput": 6394.95, "total_tokens": 9321664} +{"current_steps": 9710, "total_steps": 11784, "loss": 0.0367, "lr": 1.8304867632708077e-07, "epoch": 0.8239986422267481, "percentage": 82.4, "elapsed_time": "0:24:17", "remaining_time": "0:05:11", "throughput": 6396.64, "total_tokens": 9326208} +{"current_steps": 9715, "total_steps": 11784, "loss": 0.0819, "lr": 1.821953637893917e-07, "epoch": 0.8244229463679565, "percentage": 82.44, "elapsed_time": "0:24:18", "remaining_time": "0:05:10", "throughput": 6398.61, "total_tokens": 9331264} +{"current_steps": 9720, "total_steps": 11784, "loss": 0.0681, "lr": 1.8134384543949478e-07, "epoch": 0.824847250509165, "percentage": 82.48, "elapsed_time": "0:24:18", "remaining_time": "0:05:09", "throughput": 6400.82, "total_tokens": 9336704} +{"current_steps": 9725, "total_steps": 11784, "loss": 0.0809, "lr": 1.804941231455417e-07, "epoch": 0.8252715546503734, "percentage": 82.53, "elapsed_time": "0:24:19", "remaining_time": "0:05:08", "throughput": 6402.94, "total_tokens": 9342016} +{"current_steps": 9730, "total_steps": 11784, "loss": 0.0543, "lr": 1.7964619877174513e-07, "epoch": 0.8256958587915818, "percentage": 82.57, "elapsed_time": "0:24:19", "remaining_time": "0:05:08", "throughput": 6404.74, "total_tokens": 9346752} +{"current_steps": 9735, "total_steps": 11784, "loss": 0.045, "lr": 1.788000741783725e-07, "epoch": 0.8261201629327902, "percentage": 82.61, "elapsed_time": "0:24:19", "remaining_time": "0:05:07", "throughput": 6406.42, "total_tokens": 9351296} +{"current_steps": 9740, "total_steps": 11784, "loss": 0.078, "lr": 1.7795575122174323e-07, "epoch": 0.8265444670739986, "percentage": 82.65, "elapsed_time": "0:24:20", "remaining_time": "0:05:06", "throughput": 6408.01, "total_tokens": 9355712} +{"current_steps": 9745, "total_steps": 11784, "loss": 0.0929, "lr": 1.7711323175422376e-07, "epoch": 0.8269687712152071, "percentage": 82.7, "elapsed_time": "0:24:20", "remaining_time": "0:05:05", "throughput": 6409.75, "total_tokens": 9360384} +{"current_steps": 9750, "total_steps": 11784, "loss": 0.0472, "lr": 1.7627251762422413e-07, "epoch": 0.8273930753564155, "percentage": 82.74, "elapsed_time": "0:24:20", "remaining_time": "0:05:04", "throughput": 6411.24, "total_tokens": 9364608} +{"current_steps": 9755, "total_steps": 11784, "loss": 0.0203, "lr": 1.7543361067619267e-07, "epoch": 0.8278173794976239, "percentage": 82.78, "elapsed_time": "0:24:20", "remaining_time": "0:05:03", "throughput": 6413.24, "total_tokens": 9369728} +{"current_steps": 9760, "total_steps": 11784, "loss": 0.0865, "lr": 1.7459651275061483e-07, "epoch": 0.8282416836388323, "percentage": 82.82, "elapsed_time": "0:24:21", "remaining_time": "0:05:03", "throughput": 6415.07, "total_tokens": 9374592} +{"current_steps": 9765, "total_steps": 11784, "loss": 0.0433, "lr": 1.737612256840053e-07, "epoch": 0.8286659877800407, "percentage": 82.87, "elapsed_time": "0:24:21", "remaining_time": "0:05:02", "throughput": 6417.34, "total_tokens": 9380160} +{"current_steps": 9770, "total_steps": 11784, "loss": 0.0528, "lr": 1.729277513089068e-07, "epoch": 0.8290902919212492, "percentage": 82.91, "elapsed_time": "0:24:22", "remaining_time": "0:05:01", "throughput": 6419.31, "total_tokens": 9385216} +{"current_steps": 9775, "total_steps": 11784, "loss": 0.0437, "lr": 1.7209609145388538e-07, "epoch": 0.8295145960624576, "percentage": 82.95, "elapsed_time": "0:24:22", "remaining_time": "0:05:00", "throughput": 6421.17, "total_tokens": 9390080} +{"current_steps": 9780, "total_steps": 11784, "loss": 0.0108, "lr": 1.7126624794352563e-07, "epoch": 0.829938900203666, "percentage": 82.99, "elapsed_time": "0:24:22", "remaining_time": "0:04:59", "throughput": 6422.65, "total_tokens": 9394304} +{"current_steps": 9785, "total_steps": 11784, "loss": 0.0646, "lr": 1.7043822259842766e-07, "epoch": 0.8303632043448744, "percentage": 83.04, "elapsed_time": "0:24:23", "remaining_time": "0:04:58", "throughput": 6424.62, "total_tokens": 9399360} +{"current_steps": 9790, "total_steps": 11784, "loss": 0.0338, "lr": 1.6961201723520247e-07, "epoch": 0.8307875084860828, "percentage": 83.08, "elapsed_time": "0:24:23", "remaining_time": "0:04:58", "throughput": 6426.56, "total_tokens": 9404352} +{"current_steps": 9795, "total_steps": 11784, "loss": 0.0434, "lr": 1.6878763366646832e-07, "epoch": 0.8312118126272913, "percentage": 83.12, "elapsed_time": "0:24:23", "remaining_time": "0:04:57", "throughput": 6428.36, "total_tokens": 9409152} +{"current_steps": 9800, "total_steps": 11784, "loss": 0.0449, "lr": 1.6796507370084656e-07, "epoch": 0.8316361167684997, "percentage": 83.16, "elapsed_time": "0:24:24", "remaining_time": "0:04:56", "throughput": 6430.34, "total_tokens": 9414208} +{"current_steps": 9805, "total_steps": 11784, "loss": 0.0646, "lr": 1.671443391429581e-07, "epoch": 0.8320604209097081, "percentage": 83.21, "elapsed_time": "0:24:24", "remaining_time": "0:04:55", "throughput": 6432.15, "total_tokens": 9419008} +{"current_steps": 9810, "total_steps": 11784, "loss": 0.0368, "lr": 1.6632543179341772e-07, "epoch": 0.8324847250509165, "percentage": 83.25, "elapsed_time": "0:24:24", "remaining_time": "0:04:54", "throughput": 6434.18, "total_tokens": 9424192} +{"current_steps": 9815, "total_steps": 11784, "loss": 0.0235, "lr": 1.6550835344883364e-07, "epoch": 0.832909029192125, "percentage": 83.29, "elapsed_time": "0:24:25", "remaining_time": "0:04:53", "throughput": 6436.01, "total_tokens": 9429056} +{"current_steps": 9820, "total_steps": 11784, "loss": 0.053, "lr": 1.646931059017994e-07, "epoch": 0.8333333333333334, "percentage": 83.33, "elapsed_time": "0:24:25", "remaining_time": "0:04:53", "throughput": 6437.32, "total_tokens": 9433024} +{"current_steps": 9825, "total_steps": 11784, "loss": 0.0435, "lr": 1.6387969094089317e-07, "epoch": 0.8337576374745418, "percentage": 83.38, "elapsed_time": "0:24:25", "remaining_time": "0:04:52", "throughput": 6439.05, "total_tokens": 9437696} +{"current_steps": 9830, "total_steps": 11784, "loss": 0.0472, "lr": 1.6306811035067203e-07, "epoch": 0.8341819416157502, "percentage": 83.42, "elapsed_time": "0:24:26", "remaining_time": "0:04:51", "throughput": 6441.11, "total_tokens": 9442944} +{"current_steps": 9835, "total_steps": 11784, "loss": 0.0394, "lr": 1.6225836591166886e-07, "epoch": 0.8346062457569586, "percentage": 83.46, "elapsed_time": "0:24:26", "remaining_time": "0:04:50", "throughput": 6442.9, "total_tokens": 9447680} +{"current_steps": 9840, "total_steps": 11784, "loss": 0.0774, "lr": 1.6145045940038803e-07, "epoch": 0.835030549898167, "percentage": 83.5, "elapsed_time": "0:24:26", "remaining_time": "0:04:49", "throughput": 6444.6, "total_tokens": 9452288} +{"current_steps": 9845, "total_steps": 11784, "loss": 0.063, "lr": 1.6064439258930217e-07, "epoch": 0.8354548540393755, "percentage": 83.55, "elapsed_time": "0:24:27", "remaining_time": "0:04:48", "throughput": 6446.28, "total_tokens": 9456896} +{"current_steps": 9850, "total_steps": 11784, "loss": 0.0847, "lr": 1.5984016724684658e-07, "epoch": 0.8358791581805839, "percentage": 83.59, "elapsed_time": "0:24:27", "remaining_time": "0:04:48", "throughput": 6448.07, "total_tokens": 9461632} +{"current_steps": 9855, "total_steps": 11784, "loss": 0.077, "lr": 1.5903778513741816e-07, "epoch": 0.8363034623217923, "percentage": 83.63, "elapsed_time": "0:24:27", "remaining_time": "0:04:47", "throughput": 6449.92, "total_tokens": 9466560} +{"current_steps": 9860, "total_steps": 11784, "loss": 0.0369, "lr": 1.5823724802136862e-07, "epoch": 0.8367277664630007, "percentage": 83.67, "elapsed_time": "0:24:28", "remaining_time": "0:04:46", "throughput": 6451.62, "total_tokens": 9471168} +{"current_steps": 9865, "total_steps": 11784, "loss": 0.105, "lr": 1.5743855765500258e-07, "epoch": 0.837152070604209, "percentage": 83.72, "elapsed_time": "0:24:28", "remaining_time": "0:04:45", "throughput": 6453.4, "total_tokens": 9475968} +{"current_steps": 9870, "total_steps": 11784, "loss": 0.0345, "lr": 1.5664171579057273e-07, "epoch": 0.8375763747454175, "percentage": 83.76, "elapsed_time": "0:24:28", "remaining_time": "0:04:44", "throughput": 6455.5, "total_tokens": 9481280} +{"current_steps": 9875, "total_steps": 11784, "loss": 0.0235, "lr": 1.5584672417627665e-07, "epoch": 0.8380006788866259, "percentage": 83.8, "elapsed_time": "0:24:29", "remaining_time": "0:04:43", "throughput": 6457.22, "total_tokens": 9485952} +{"current_steps": 9880, "total_steps": 11784, "loss": 0.0437, "lr": 1.5505358455625229e-07, "epoch": 0.8384249830278343, "percentage": 83.84, "elapsed_time": "0:24:29", "remaining_time": "0:04:43", "throughput": 6458.61, "total_tokens": 9490048} +{"current_steps": 9885, "total_steps": 11784, "loss": 0.0855, "lr": 1.5426229867057516e-07, "epoch": 0.8388492871690427, "percentage": 83.88, "elapsed_time": "0:24:29", "remaining_time": "0:04:42", "throughput": 6460.7, "total_tokens": 9495360} +{"current_steps": 9890, "total_steps": 11784, "loss": 0.0929, "lr": 1.5347286825525252e-07, "epoch": 0.8392735913102511, "percentage": 83.93, "elapsed_time": "0:24:30", "remaining_time": "0:04:41", "throughput": 6462.39, "total_tokens": 9499968} +{"current_steps": 9895, "total_steps": 11784, "loss": 0.0571, "lr": 1.526852950422226e-07, "epoch": 0.8396978954514596, "percentage": 83.97, "elapsed_time": "0:24:30", "remaining_time": "0:04:40", "throughput": 6464.17, "total_tokens": 9504704} +{"current_steps": 9900, "total_steps": 11784, "loss": 0.0243, "lr": 1.5189958075934771e-07, "epoch": 0.840122199592668, "percentage": 84.01, "elapsed_time": "0:24:30", "remaining_time": "0:04:39", "throughput": 6465.78, "total_tokens": 9509184} +{"current_steps": 9905, "total_steps": 11784, "loss": 0.047, "lr": 1.5111572713041253e-07, "epoch": 0.8405465037338764, "percentage": 84.05, "elapsed_time": "0:24:31", "remaining_time": "0:04:39", "throughput": 6467.61, "total_tokens": 9514048} +{"current_steps": 9910, "total_steps": 11784, "loss": 0.0214, "lr": 1.5033373587511944e-07, "epoch": 0.8409708078750848, "percentage": 84.1, "elapsed_time": "0:24:31", "remaining_time": "0:04:38", "throughput": 6469.56, "total_tokens": 9519104} +{"current_steps": 9915, "total_steps": 11784, "loss": 0.0717, "lr": 1.4955360870908505e-07, "epoch": 0.8413951120162932, "percentage": 84.14, "elapsed_time": "0:24:31", "remaining_time": "0:04:37", "throughput": 6471.3, "total_tokens": 9523840} +{"current_steps": 9920, "total_steps": 11784, "loss": 0.0594, "lr": 1.4877534734383624e-07, "epoch": 0.8418194161575017, "percentage": 84.18, "elapsed_time": "0:24:32", "remaining_time": "0:04:36", "throughput": 6472.96, "total_tokens": 9528384} +{"current_steps": 9925, "total_steps": 11784, "loss": 0.0608, "lr": 1.4799895348680647e-07, "epoch": 0.8422437202987101, "percentage": 84.22, "elapsed_time": "0:24:32", "remaining_time": "0:04:35", "throughput": 6474.77, "total_tokens": 9533184} +{"current_steps": 9930, "total_steps": 11784, "loss": 0.0261, "lr": 1.4722442884133214e-07, "epoch": 0.8426680244399185, "percentage": 84.27, "elapsed_time": "0:24:32", "remaining_time": "0:04:34", "throughput": 6477.12, "total_tokens": 9538944} +{"current_steps": 9935, "total_steps": 11784, "loss": 0.0125, "lr": 1.4645177510664886e-07, "epoch": 0.8430923285811269, "percentage": 84.31, "elapsed_time": "0:24:33", "remaining_time": "0:04:34", "throughput": 6478.66, "total_tokens": 9543296} +{"current_steps": 9940, "total_steps": 11784, "loss": 0.0706, "lr": 1.4568099397788746e-07, "epoch": 0.8435166327223353, "percentage": 84.35, "elapsed_time": "0:24:33", "remaining_time": "0:04:33", "throughput": 6480.3, "total_tokens": 9547840} +{"current_steps": 9945, "total_steps": 11784, "loss": 0.0964, "lr": 1.4491208714607016e-07, "epoch": 0.8439409368635438, "percentage": 84.39, "elapsed_time": "0:24:33", "remaining_time": "0:04:32", "throughput": 6482.12, "total_tokens": 9552704} +{"current_steps": 9950, "total_steps": 11784, "loss": 0.0629, "lr": 1.4414505629810813e-07, "epoch": 0.8443652410047522, "percentage": 84.44, "elapsed_time": "0:24:34", "remaining_time": "0:04:31", "throughput": 6484.35, "total_tokens": 9558272} +{"current_steps": 9955, "total_steps": 11784, "loss": 0.0944, "lr": 1.433799031167957e-07, "epoch": 0.8447895451459606, "percentage": 84.48, "elapsed_time": "0:24:34", "remaining_time": "0:04:30", "throughput": 6486.26, "total_tokens": 9563264} +{"current_steps": 9960, "total_steps": 11784, "loss": 0.0298, "lr": 1.426166292808083e-07, "epoch": 0.845213849287169, "percentage": 84.52, "elapsed_time": "0:24:34", "remaining_time": "0:04:30", "throughput": 6487.8, "total_tokens": 9567680} +{"current_steps": 9965, "total_steps": 11784, "loss": 0.0373, "lr": 1.4185523646469821e-07, "epoch": 0.8456381534283774, "percentage": 84.56, "elapsed_time": "0:24:35", "remaining_time": "0:04:29", "throughput": 6489.66, "total_tokens": 9572608} +{"current_steps": 9970, "total_steps": 11784, "loss": 0.0676, "lr": 1.410957263388909e-07, "epoch": 0.8460624575695859, "percentage": 84.61, "elapsed_time": "0:24:35", "remaining_time": "0:04:28", "throughput": 6491.26, "total_tokens": 9577088} +{"current_steps": 9975, "total_steps": 11784, "loss": 0.0421, "lr": 1.4033810056968155e-07, "epoch": 0.8464867617107943, "percentage": 84.65, "elapsed_time": "0:24:35", "remaining_time": "0:04:27", "throughput": 6493.07, "total_tokens": 9581952} +{"current_steps": 9980, "total_steps": 11784, "loss": 0.096, "lr": 1.3958236081923102e-07, "epoch": 0.8469110658520027, "percentage": 84.69, "elapsed_time": "0:24:36", "remaining_time": "0:04:26", "throughput": 6494.69, "total_tokens": 9586496} +{"current_steps": 9985, "total_steps": 11784, "loss": 0.0315, "lr": 1.3882850874556207e-07, "epoch": 0.8473353699932111, "percentage": 84.73, "elapsed_time": "0:24:36", "remaining_time": "0:04:26", "throughput": 6496.48, "total_tokens": 9591296} +{"current_steps": 9990, "total_steps": 11784, "loss": 0.0639, "lr": 1.3807654600255713e-07, "epoch": 0.8477596741344195, "percentage": 84.78, "elapsed_time": "0:24:36", "remaining_time": "0:04:25", "throughput": 6498.15, "total_tokens": 9595904} +{"current_steps": 9995, "total_steps": 11784, "loss": 0.1177, "lr": 1.373264742399526e-07, "epoch": 0.848183978275628, "percentage": 84.82, "elapsed_time": "0:24:37", "remaining_time": "0:04:24", "throughput": 6499.89, "total_tokens": 9600640} +{"current_steps": 10000, "total_steps": 11784, "loss": 0.0569, "lr": 1.3657829510333652e-07, "epoch": 0.8486082824168364, "percentage": 84.86, "elapsed_time": "0:24:37", "remaining_time": "0:04:23", "throughput": 6501.43, "total_tokens": 9605056} +{"current_steps": 10005, "total_steps": 11784, "loss": 0.0102, "lr": 1.3583201023414493e-07, "epoch": 0.8490325865580448, "percentage": 84.9, "elapsed_time": "0:24:37", "remaining_time": "0:04:22", "throughput": 6503.33, "total_tokens": 9610112} +{"current_steps": 10010, "total_steps": 11784, "loss": 0.0276, "lr": 1.350876212696579e-07, "epoch": 0.8494568906992532, "percentage": 84.95, "elapsed_time": "0:24:38", "remaining_time": "0:04:21", "throughput": 6505.54, "total_tokens": 9615744} +{"current_steps": 10015, "total_steps": 11784, "loss": 0.0089, "lr": 1.3434512984299596e-07, "epoch": 0.8498811948404617, "percentage": 84.99, "elapsed_time": "0:24:38", "remaining_time": "0:04:21", "throughput": 6507.15, "total_tokens": 9620288} +{"current_steps": 10020, "total_steps": 11784, "loss": 0.0775, "lr": 1.3360453758311686e-07, "epoch": 0.8503054989816701, "percentage": 85.03, "elapsed_time": "0:24:38", "remaining_time": "0:04:20", "throughput": 6508.86, "total_tokens": 9625024} +{"current_steps": 10025, "total_steps": 11784, "loss": 0.0471, "lr": 1.32865846114811e-07, "epoch": 0.8507298031228785, "percentage": 85.07, "elapsed_time": "0:24:39", "remaining_time": "0:04:19", "throughput": 6510.79, "total_tokens": 9630144} +{"current_steps": 10030, "total_steps": 11784, "loss": 0.0665, "lr": 1.321290570586999e-07, "epoch": 0.8511541072640869, "percentage": 85.12, "elapsed_time": "0:24:39", "remaining_time": "0:04:18", "throughput": 6512.37, "total_tokens": 9634624} +{"current_steps": 10030, "total_steps": 11784, "eval_loss": 0.05264058709144592, "epoch": 0.8511541072640869, "percentage": 85.12, "elapsed_time": "0:24:55", "remaining_time": "0:04:21", "throughput": 6442.69, "total_tokens": 9634624} +{"current_steps": 10035, "total_steps": 11784, "loss": 0.0181, "lr": 1.3139417203123027e-07, "epoch": 0.8515784114052953, "percentage": 85.16, "elapsed_time": "0:25:11", "remaining_time": "0:04:23", "throughput": 6378.51, "total_tokens": 9639744} +{"current_steps": 10040, "total_steps": 11784, "loss": 0.009, "lr": 1.306611926446718e-07, "epoch": 0.8520027155465038, "percentage": 85.2, "elapsed_time": "0:25:11", "remaining_time": "0:04:22", "throughput": 6380.23, "total_tokens": 9644480} +{"current_steps": 10045, "total_steps": 11784, "loss": 0.0478, "lr": 1.2993012050711406e-07, "epoch": 0.8524270196877122, "percentage": 85.24, "elapsed_time": "0:25:11", "remaining_time": "0:04:21", "throughput": 6382.07, "total_tokens": 9649408} +{"current_steps": 10050, "total_steps": 11784, "loss": 0.0887, "lr": 1.292009572224614e-07, "epoch": 0.8528513238289206, "percentage": 85.29, "elapsed_time": "0:25:12", "remaining_time": "0:04:20", "throughput": 6383.39, "total_tokens": 9653440} +{"current_steps": 10055, "total_steps": 11784, "loss": 0.0318, "lr": 1.284737043904306e-07, "epoch": 0.853275627970129, "percentage": 85.33, "elapsed_time": "0:25:12", "remaining_time": "0:04:20", "throughput": 6385.11, "total_tokens": 9658176} +{"current_steps": 10060, "total_steps": 11784, "loss": 0.0806, "lr": 1.2774836360654717e-07, "epoch": 0.8536999321113374, "percentage": 85.37, "elapsed_time": "0:25:12", "remaining_time": "0:04:19", "throughput": 6386.81, "total_tokens": 9662848} +{"current_steps": 10065, "total_steps": 11784, "loss": 0.0707, "lr": 1.2702493646214207e-07, "epoch": 0.8541242362525459, "percentage": 85.41, "elapsed_time": "0:25:13", "remaining_time": "0:04:18", "throughput": 6388.45, "total_tokens": 9667392} +{"current_steps": 10070, "total_steps": 11784, "loss": 0.1539, "lr": 1.2630342454434728e-07, "epoch": 0.8545485403937543, "percentage": 85.45, "elapsed_time": "0:25:13", "remaining_time": "0:04:17", "throughput": 6390.32, "total_tokens": 9672384} +{"current_steps": 10075, "total_steps": 11784, "loss": 0.0521, "lr": 1.2558382943609357e-07, "epoch": 0.8549728445349627, "percentage": 85.5, "elapsed_time": "0:25:13", "remaining_time": "0:04:16", "throughput": 6392.13, "total_tokens": 9677248} +{"current_steps": 10080, "total_steps": 11784, "loss": 0.0558, "lr": 1.2486615271610558e-07, "epoch": 0.8553971486761711, "percentage": 85.54, "elapsed_time": "0:25:14", "remaining_time": "0:04:15", "throughput": 6393.59, "total_tokens": 9681536} +{"current_steps": 10085, "total_steps": 11784, "loss": 0.081, "lr": 1.241503959589003e-07, "epoch": 0.8558214528173795, "percentage": 85.58, "elapsed_time": "0:25:14", "remaining_time": "0:04:15", "throughput": 6395.51, "total_tokens": 9686592} +{"current_steps": 10090, "total_steps": 11784, "loss": 0.0423, "lr": 1.234365607347816e-07, "epoch": 0.856245756958588, "percentage": 85.62, "elapsed_time": "0:25:14", "remaining_time": "0:04:14", "throughput": 6397.43, "total_tokens": 9691648} +{"current_steps": 10095, "total_steps": 11784, "loss": 0.0433, "lr": 1.22724648609838e-07, "epoch": 0.8566700610997964, "percentage": 85.67, "elapsed_time": "0:25:15", "remaining_time": "0:04:13", "throughput": 6399.38, "total_tokens": 9696768} +{"current_steps": 10100, "total_steps": 11784, "loss": 0.0689, "lr": 1.2201466114593884e-07, "epoch": 0.8570943652410048, "percentage": 85.71, "elapsed_time": "0:25:15", "remaining_time": "0:04:12", "throughput": 6401.04, "total_tokens": 9701376} +{"current_steps": 10105, "total_steps": 11784, "loss": 0.0659, "lr": 1.2130659990073144e-07, "epoch": 0.8575186693822132, "percentage": 85.75, "elapsed_time": "0:25:15", "remaining_time": "0:04:11", "throughput": 6402.37, "total_tokens": 9705408} +{"current_steps": 10110, "total_steps": 11784, "loss": 0.064, "lr": 1.206004664276359e-07, "epoch": 0.8579429735234216, "percentage": 85.79, "elapsed_time": "0:25:16", "remaining_time": "0:04:11", "throughput": 6403.91, "total_tokens": 9709824} +{"current_steps": 10115, "total_steps": 11784, "loss": 0.0826, "lr": 1.198962622758447e-07, "epoch": 0.8583672776646301, "percentage": 85.84, "elapsed_time": "0:25:16", "remaining_time": "0:04:10", "throughput": 6405.92, "total_tokens": 9715072} +{"current_steps": 10120, "total_steps": 11784, "loss": 0.0706, "lr": 1.1919398899031585e-07, "epoch": 0.8587915818058385, "percentage": 85.88, "elapsed_time": "0:25:16", "remaining_time": "0:04:09", "throughput": 6407.77, "total_tokens": 9720000} +{"current_steps": 10125, "total_steps": 11784, "loss": 0.0041, "lr": 1.1849364811177288e-07, "epoch": 0.8592158859470469, "percentage": 85.92, "elapsed_time": "0:25:17", "remaining_time": "0:04:08", "throughput": 6409.25, "total_tokens": 9724288} +{"current_steps": 10130, "total_steps": 11784, "loss": 0.0681, "lr": 1.1779524117669837e-07, "epoch": 0.8596401900882552, "percentage": 85.96, "elapsed_time": "0:25:17", "remaining_time": "0:04:07", "throughput": 6411.1, "total_tokens": 9729280} +{"current_steps": 10135, "total_steps": 11784, "loss": 0.0764, "lr": 1.1709876971733269e-07, "epoch": 0.8600644942294636, "percentage": 86.01, "elapsed_time": "0:25:17", "remaining_time": "0:04:06", "throughput": 6412.63, "total_tokens": 9733696} +{"current_steps": 10140, "total_steps": 11784, "loss": 0.016, "lr": 1.1640423526166987e-07, "epoch": 0.860488798370672, "percentage": 86.05, "elapsed_time": "0:25:18", "remaining_time": "0:04:06", "throughput": 6414.44, "total_tokens": 9738624} +{"current_steps": 10145, "total_steps": 11784, "loss": 0.0557, "lr": 1.1571163933345462e-07, "epoch": 0.8609131025118805, "percentage": 86.09, "elapsed_time": "0:25:18", "remaining_time": "0:04:05", "throughput": 6416.23, "total_tokens": 9743488} +{"current_steps": 10150, "total_steps": 11784, "loss": 0.0133, "lr": 1.150209834521777e-07, "epoch": 0.8613374066530889, "percentage": 86.13, "elapsed_time": "0:25:18", "remaining_time": "0:04:04", "throughput": 6418.72, "total_tokens": 9749632} +{"current_steps": 10155, "total_steps": 11784, "loss": 0.0291, "lr": 1.1433226913307514e-07, "epoch": 0.8617617107942973, "percentage": 86.18, "elapsed_time": "0:25:19", "remaining_time": "0:04:03", "throughput": 6420.48, "total_tokens": 9754432} +{"current_steps": 10160, "total_steps": 11784, "loss": 0.0293, "lr": 1.1364549788712185e-07, "epoch": 0.8621860149355057, "percentage": 86.22, "elapsed_time": "0:25:19", "remaining_time": "0:04:02", "throughput": 6422.21, "total_tokens": 9759168} +{"current_steps": 10165, "total_steps": 11784, "loss": 0.012, "lr": 1.1296067122103059e-07, "epoch": 0.8626103190767141, "percentage": 86.26, "elapsed_time": "0:25:19", "remaining_time": "0:04:02", "throughput": 6424.04, "total_tokens": 9764096} +{"current_steps": 10170, "total_steps": 11784, "loss": 0.0581, "lr": 1.1227779063724818e-07, "epoch": 0.8630346232179226, "percentage": 86.3, "elapsed_time": "0:25:20", "remaining_time": "0:04:01", "throughput": 6425.72, "total_tokens": 9768768} +{"current_steps": 10175, "total_steps": 11784, "loss": 0.0188, "lr": 1.115968576339511e-07, "epoch": 0.863458927359131, "percentage": 86.35, "elapsed_time": "0:25:20", "remaining_time": "0:04:00", "throughput": 6427.25, "total_tokens": 9773184} +{"current_steps": 10180, "total_steps": 11784, "loss": 0.0713, "lr": 1.1091787370504347e-07, "epoch": 0.8638832315003394, "percentage": 86.39, "elapsed_time": "0:25:20", "remaining_time": "0:03:59", "throughput": 6429.41, "total_tokens": 9778688} +{"current_steps": 10185, "total_steps": 11784, "loss": 0.0388, "lr": 1.1024084034015347e-07, "epoch": 0.8643075356415478, "percentage": 86.43, "elapsed_time": "0:25:21", "remaining_time": "0:03:58", "throughput": 6430.99, "total_tokens": 9783168} +{"current_steps": 10190, "total_steps": 11784, "loss": 0.0622, "lr": 1.095657590246295e-07, "epoch": 0.8647318397827563, "percentage": 86.47, "elapsed_time": "0:25:21", "remaining_time": "0:03:58", "throughput": 6432.61, "total_tokens": 9787712} +{"current_steps": 10195, "total_steps": 11784, "loss": 0.0461, "lr": 1.0889263123953773e-07, "epoch": 0.8651561439239647, "percentage": 86.52, "elapsed_time": "0:25:21", "remaining_time": "0:03:57", "throughput": 6434.3, "total_tokens": 9792384} +{"current_steps": 10200, "total_steps": 11784, "loss": 0.1011, "lr": 1.0822145846165853e-07, "epoch": 0.8655804480651731, "percentage": 86.56, "elapsed_time": "0:25:22", "remaining_time": "0:03:56", "throughput": 6436.41, "total_tokens": 9797824} +{"current_steps": 10205, "total_steps": 11784, "loss": 0.1421, "lr": 1.0755224216348235e-07, "epoch": 0.8660047522063815, "percentage": 86.6, "elapsed_time": "0:25:22", "remaining_time": "0:03:55", "throughput": 6438.3, "total_tokens": 9802880} +{"current_steps": 10210, "total_steps": 11784, "loss": 0.088, "lr": 1.0688498381320854e-07, "epoch": 0.8664290563475899, "percentage": 86.64, "elapsed_time": "0:25:22", "remaining_time": "0:03:54", "throughput": 6439.88, "total_tokens": 9807424} +{"current_steps": 10215, "total_steps": 11784, "loss": 0.0545, "lr": 1.0621968487473975e-07, "epoch": 0.8668533604887984, "percentage": 86.69, "elapsed_time": "0:25:23", "remaining_time": "0:03:53", "throughput": 6441.78, "total_tokens": 9812480} +{"current_steps": 10220, "total_steps": 11784, "loss": 0.0356, "lr": 1.0555634680768066e-07, "epoch": 0.8672776646300068, "percentage": 86.73, "elapsed_time": "0:25:23", "remaining_time": "0:03:53", "throughput": 6443.35, "total_tokens": 9816960} +{"current_steps": 10225, "total_steps": 11784, "loss": 0.0259, "lr": 1.0489497106733347e-07, "epoch": 0.8677019687712152, "percentage": 86.77, "elapsed_time": "0:25:23", "remaining_time": "0:03:52", "throughput": 6444.98, "total_tokens": 9821568} +{"current_steps": 10230, "total_steps": 11784, "loss": 0.0805, "lr": 1.0423555910469561e-07, "epoch": 0.8681262729124236, "percentage": 86.81, "elapsed_time": "0:25:24", "remaining_time": "0:03:51", "throughput": 6446.54, "total_tokens": 9826048} +{"current_steps": 10235, "total_steps": 11784, "loss": 0.0412, "lr": 1.0357811236645597e-07, "epoch": 0.868550577053632, "percentage": 86.86, "elapsed_time": "0:25:24", "remaining_time": "0:03:50", "throughput": 6448.23, "total_tokens": 9830720} +{"current_steps": 10240, "total_steps": 11784, "loss": 0.0653, "lr": 1.0292263229499209e-07, "epoch": 0.8689748811948405, "percentage": 86.9, "elapsed_time": "0:25:24", "remaining_time": "0:03:49", "throughput": 6450.05, "total_tokens": 9835648} +{"current_steps": 10245, "total_steps": 11784, "loss": 0.1175, "lr": 1.022691203283661e-07, "epoch": 0.8693991853360489, "percentage": 86.94, "elapsed_time": "0:25:25", "remaining_time": "0:03:49", "throughput": 6451.5, "total_tokens": 9839936} +{"current_steps": 10250, "total_steps": 11784, "loss": 0.0568, "lr": 1.0161757790032355e-07, "epoch": 0.8698234894772573, "percentage": 86.98, "elapsed_time": "0:25:25", "remaining_time": "0:03:48", "throughput": 6453.18, "total_tokens": 9844608} +{"current_steps": 10255, "total_steps": 11784, "loss": 0.0191, "lr": 1.0096800644028791e-07, "epoch": 0.8702477936184657, "percentage": 87.02, "elapsed_time": "0:25:25", "remaining_time": "0:03:47", "throughput": 6454.62, "total_tokens": 9848896} +{"current_steps": 10260, "total_steps": 11784, "loss": 0.0822, "lr": 1.003204073733589e-07, "epoch": 0.8706720977596741, "percentage": 87.07, "elapsed_time": "0:25:26", "remaining_time": "0:03:46", "throughput": 6456.08, "total_tokens": 9853184} +{"current_steps": 10265, "total_steps": 11784, "loss": 0.0627, "lr": 9.967478212030923e-08, "epoch": 0.8710964019008826, "percentage": 87.11, "elapsed_time": "0:25:26", "remaining_time": "0:03:45", "throughput": 6457.75, "total_tokens": 9857856} +{"current_steps": 10270, "total_steps": 11784, "loss": 0.0556, "lr": 9.903113209758096e-08, "epoch": 0.871520706042091, "percentage": 87.15, "elapsed_time": "0:25:26", "remaining_time": "0:03:45", "throughput": 6459.47, "total_tokens": 9862592} +{"current_steps": 10275, "total_steps": 11784, "loss": 0.0582, "lr": 9.838945871728266e-08, "epoch": 0.8719450101832994, "percentage": 87.19, "elapsed_time": "0:25:27", "remaining_time": "0:03:44", "throughput": 6461.3, "total_tokens": 9867584} +{"current_steps": 10280, "total_steps": 11784, "loss": 0.0089, "lr": 9.774976338718677e-08, "epoch": 0.8723693143245078, "percentage": 87.24, "elapsed_time": "0:25:27", "remaining_time": "0:03:43", "throughput": 6463.04, "total_tokens": 9872384} +{"current_steps": 10285, "total_steps": 11784, "loss": 0.0969, "lr": 9.711204751072499e-08, "epoch": 0.8727936184657162, "percentage": 87.28, "elapsed_time": "0:25:27", "remaining_time": "0:03:42", "throughput": 6464.5, "total_tokens": 9876672} +{"current_steps": 10290, "total_steps": 11784, "loss": 0.0259, "lr": 9.647631248698773e-08, "epoch": 0.8732179226069247, "percentage": 87.32, "elapsed_time": "0:25:28", "remaining_time": "0:03:41", "throughput": 6466.42, "total_tokens": 9881792} +{"current_steps": 10295, "total_steps": 11784, "loss": 0.1201, "lr": 9.584255971071886e-08, "epoch": 0.8736422267481331, "percentage": 87.36, "elapsed_time": "0:25:28", "remaining_time": "0:03:41", "throughput": 6468.08, "total_tokens": 9886464} +{"current_steps": 10300, "total_steps": 11784, "loss": 0.0121, "lr": 9.521079057231274e-08, "epoch": 0.8740665308893415, "percentage": 87.41, "elapsed_time": "0:25:28", "remaining_time": "0:03:40", "throughput": 6469.81, "total_tokens": 9891264} +{"current_steps": 10305, "total_steps": 11784, "loss": 0.0298, "lr": 9.45810064578133e-08, "epoch": 0.8744908350305499, "percentage": 87.45, "elapsed_time": "0:25:29", "remaining_time": "0:03:39", "throughput": 6471.69, "total_tokens": 9896320} +{"current_steps": 10310, "total_steps": 11784, "loss": 0.0864, "lr": 9.39532087489081e-08, "epoch": 0.8749151391717583, "percentage": 87.49, "elapsed_time": "0:25:29", "remaining_time": "0:03:38", "throughput": 6473.65, "total_tokens": 9901504} +{"current_steps": 10315, "total_steps": 11784, "loss": 0.0504, "lr": 9.33273988229275e-08, "epoch": 0.8753394433129668, "percentage": 87.53, "elapsed_time": "0:25:29", "remaining_time": "0:03:37", "throughput": 6475.77, "total_tokens": 9907008} +{"current_steps": 10320, "total_steps": 11784, "loss": 0.0989, "lr": 9.270357805284057e-08, "epoch": 0.8757637474541752, "percentage": 87.58, "elapsed_time": "0:25:30", "remaining_time": "0:03:37", "throughput": 6477.47, "total_tokens": 9911744} +{"current_steps": 10325, "total_steps": 11784, "loss": 0.1047, "lr": 9.208174780725253e-08, "epoch": 0.8761880515953836, "percentage": 87.62, "elapsed_time": "0:25:30", "remaining_time": "0:03:36", "throughput": 6478.95, "total_tokens": 9916096} +{"current_steps": 10330, "total_steps": 11784, "loss": 0.0295, "lr": 9.146190945040145e-08, "epoch": 0.876612355736592, "percentage": 87.66, "elapsed_time": "0:25:30", "remaining_time": "0:03:35", "throughput": 6480.41, "total_tokens": 9920448} +{"current_steps": 10335, "total_steps": 11784, "loss": 0.0886, "lr": 9.084406434215553e-08, "epoch": 0.8770366598778004, "percentage": 87.7, "elapsed_time": "0:25:31", "remaining_time": "0:03:34", "throughput": 6482.17, "total_tokens": 9925312} +{"current_steps": 10340, "total_steps": 11784, "loss": 0.0846, "lr": 9.022821383800926e-08, "epoch": 0.8774609640190089, "percentage": 87.75, "elapsed_time": "0:25:31", "remaining_time": "0:03:33", "throughput": 6483.78, "total_tokens": 9929920} +{"current_steps": 10345, "total_steps": 11784, "loss": 0.0035, "lr": 8.961435928908267e-08, "epoch": 0.8778852681602173, "percentage": 87.79, "elapsed_time": "0:25:31", "remaining_time": "0:03:33", "throughput": 6485.61, "total_tokens": 9934912} +{"current_steps": 10350, "total_steps": 11784, "loss": 0.0809, "lr": 8.900250204211513e-08, "epoch": 0.8783095723014257, "percentage": 87.83, "elapsed_time": "0:25:32", "remaining_time": "0:03:32", "throughput": 6487.25, "total_tokens": 9939520} +{"current_steps": 10355, "total_steps": 11784, "loss": 0.0441, "lr": 8.839264343946506e-08, "epoch": 0.8787338764426341, "percentage": 87.87, "elapsed_time": "0:25:32", "remaining_time": "0:03:31", "throughput": 6489.01, "total_tokens": 9944384} +{"current_steps": 10360, "total_steps": 11784, "loss": 0.0557, "lr": 8.778478481910611e-08, "epoch": 0.8791581805838425, "percentage": 87.92, "elapsed_time": "0:25:32", "remaining_time": "0:03:30", "throughput": 6490.67, "total_tokens": 9949056} +{"current_steps": 10365, "total_steps": 11784, "loss": 0.0435, "lr": 8.717892751462363e-08, "epoch": 0.879582484725051, "percentage": 87.96, "elapsed_time": "0:25:33", "remaining_time": "0:03:29", "throughput": 6492.59, "total_tokens": 9954176} +{"current_steps": 10370, "total_steps": 11784, "loss": 0.0449, "lr": 8.657507285521281e-08, "epoch": 0.8800067888662594, "percentage": 88.0, "elapsed_time": "0:25:33", "remaining_time": "0:03:29", "throughput": 6494.29, "total_tokens": 9958912} +{"current_steps": 10375, "total_steps": 11784, "loss": 0.0793, "lr": 8.597322216567493e-08, "epoch": 0.8804310930074678, "percentage": 88.04, "elapsed_time": "0:25:33", "remaining_time": "0:03:28", "throughput": 6495.97, "total_tokens": 9963648} +{"current_steps": 10380, "total_steps": 11784, "loss": 0.0145, "lr": 8.537337676641442e-08, "epoch": 0.8808553971486762, "percentage": 88.09, "elapsed_time": "0:25:34", "remaining_time": "0:03:27", "throughput": 6497.6, "total_tokens": 9968256} +{"current_steps": 10385, "total_steps": 11784, "loss": 0.0439, "lr": 8.477553797343728e-08, "epoch": 0.8812797012898846, "percentage": 88.13, "elapsed_time": "0:25:34", "remaining_time": "0:03:26", "throughput": 6499.5, "total_tokens": 9973376} +{"current_steps": 10390, "total_steps": 11784, "loss": 0.0804, "lr": 8.41797070983461e-08, "epoch": 0.881704005431093, "percentage": 88.17, "elapsed_time": "0:25:34", "remaining_time": "0:03:25", "throughput": 6501.27, "total_tokens": 9978240} +{"current_steps": 10395, "total_steps": 11784, "loss": 0.003, "lr": 8.358588544833877e-08, "epoch": 0.8821283095723014, "percentage": 88.21, "elapsed_time": "0:25:35", "remaining_time": "0:03:25", "throughput": 6502.86, "total_tokens": 9982784} +{"current_steps": 10400, "total_steps": 11784, "loss": 0.0894, "lr": 8.29940743262052e-08, "epoch": 0.8825526137135098, "percentage": 88.26, "elapsed_time": "0:25:35", "remaining_time": "0:03:24", "throughput": 6504.26, "total_tokens": 9987008} +{"current_steps": 10405, "total_steps": 11784, "loss": 0.0803, "lr": 8.240427503032443e-08, "epoch": 0.8829769178547182, "percentage": 88.3, "elapsed_time": "0:25:35", "remaining_time": "0:03:23", "throughput": 6506.43, "total_tokens": 9992640} +{"current_steps": 10410, "total_steps": 11784, "loss": 0.0592, "lr": 8.181648885466141e-08, "epoch": 0.8834012219959266, "percentage": 88.34, "elapsed_time": "0:25:36", "remaining_time": "0:03:22", "throughput": 6507.71, "total_tokens": 9996672} +{"current_steps": 10415, "total_steps": 11784, "loss": 0.07, "lr": 8.123071708876473e-08, "epoch": 0.883825526137135, "percentage": 88.38, "elapsed_time": "0:25:36", "remaining_time": "0:03:21", "throughput": 6509.29, "total_tokens": 10001216} +{"current_steps": 10420, "total_steps": 11784, "loss": 0.0299, "lr": 8.064696101776358e-08, "epoch": 0.8842498302783435, "percentage": 88.42, "elapsed_time": "0:25:36", "remaining_time": "0:03:21", "throughput": 6511.09, "total_tokens": 10006144} +{"current_steps": 10425, "total_steps": 11784, "loss": 0.0395, "lr": 8.006522192236487e-08, "epoch": 0.8846741344195519, "percentage": 88.47, "elapsed_time": "0:25:37", "remaining_time": "0:03:20", "throughput": 6513.11, "total_tokens": 10011520} +{"current_steps": 10430, "total_steps": 11784, "loss": 0.0071, "lr": 7.948550107885043e-08, "epoch": 0.8850984385607603, "percentage": 88.51, "elapsed_time": "0:25:37", "remaining_time": "0:03:19", "throughput": 6514.93, "total_tokens": 10016512} +{"current_steps": 10435, "total_steps": 11784, "loss": 0.0866, "lr": 7.89077997590738e-08, "epoch": 0.8855227427019687, "percentage": 88.55, "elapsed_time": "0:25:37", "remaining_time": "0:03:18", "throughput": 6516.44, "total_tokens": 10020928} +{"current_steps": 10440, "total_steps": 11784, "loss": 0.0255, "lr": 7.833211923045891e-08, "epoch": 0.8859470468431772, "percentage": 88.59, "elapsed_time": "0:25:38", "remaining_time": "0:03:18", "throughput": 6518.26, "total_tokens": 10025920} +{"current_steps": 10445, "total_steps": 11784, "loss": 0.0434, "lr": 7.775846075599524e-08, "epoch": 0.8863713509843856, "percentage": 88.64, "elapsed_time": "0:25:38", "remaining_time": "0:03:17", "throughput": 6519.83, "total_tokens": 10030464} +{"current_steps": 10450, "total_steps": 11784, "loss": 0.0498, "lr": 7.718682559423651e-08, "epoch": 0.886795655125594, "percentage": 88.68, "elapsed_time": "0:25:38", "remaining_time": "0:03:16", "throughput": 6521.57, "total_tokens": 10035328} +{"current_steps": 10455, "total_steps": 11784, "loss": 0.0692, "lr": 7.661721499929752e-08, "epoch": 0.8872199592668024, "percentage": 88.72, "elapsed_time": "0:25:39", "remaining_time": "0:03:15", "throughput": 6523.43, "total_tokens": 10040384} +{"current_steps": 10460, "total_steps": 11784, "loss": 0.0334, "lr": 7.60496302208512e-08, "epoch": 0.8876442634080108, "percentage": 88.76, "elapsed_time": "0:25:39", "remaining_time": "0:03:14", "throughput": 6525.29, "total_tokens": 10045440} +{"current_steps": 10465, "total_steps": 11784, "loss": 0.0272, "lr": 7.548407250412614e-08, "epoch": 0.8880685675492193, "percentage": 88.81, "elapsed_time": "0:25:39", "remaining_time": "0:03:14", "throughput": 6527.11, "total_tokens": 10050432} +{"current_steps": 10470, "total_steps": 11784, "loss": 0.0403, "lr": 7.492054308990381e-08, "epoch": 0.8884928716904277, "percentage": 88.85, "elapsed_time": "0:25:40", "remaining_time": "0:03:13", "throughput": 6528.84, "total_tokens": 10055296} +{"current_steps": 10475, "total_steps": 11784, "loss": 0.0513, "lr": 7.435904321451524e-08, "epoch": 0.8889171758316361, "percentage": 88.89, "elapsed_time": "0:25:40", "remaining_time": "0:03:12", "throughput": 6530.73, "total_tokens": 10060416} +{"current_steps": 10480, "total_steps": 11784, "loss": 0.0397, "lr": 7.379957410983995e-08, "epoch": 0.8893414799728445, "percentage": 88.93, "elapsed_time": "0:25:40", "remaining_time": "0:03:11", "throughput": 6532.58, "total_tokens": 10065472} +{"current_steps": 10485, "total_steps": 11784, "loss": 0.0267, "lr": 7.324213700330095e-08, "epoch": 0.8897657841140529, "percentage": 88.98, "elapsed_time": "0:25:41", "remaining_time": "0:03:10", "throughput": 6534.57, "total_tokens": 10070784} +{"current_steps": 10490, "total_steps": 11784, "loss": 0.0202, "lr": 7.268673311786378e-08, "epoch": 0.8901900882552614, "percentage": 89.02, "elapsed_time": "0:25:41", "remaining_time": "0:03:10", "throughput": 6536.46, "total_tokens": 10075904} +{"current_steps": 10495, "total_steps": 11784, "loss": 0.0229, "lr": 7.213336367203338e-08, "epoch": 0.8906143923964698, "percentage": 89.06, "elapsed_time": "0:25:41", "remaining_time": "0:03:09", "throughput": 6538.19, "total_tokens": 10080768} +{"current_steps": 10500, "total_steps": 11784, "loss": 0.0579, "lr": 7.158202987985106e-08, "epoch": 0.8910386965376782, "percentage": 89.1, "elapsed_time": "0:25:42", "remaining_time": "0:03:08", "throughput": 6539.74, "total_tokens": 10085312} +{"current_steps": 10505, "total_steps": 11784, "loss": 0.1098, "lr": 7.10327329508923e-08, "epoch": 0.8914630006788866, "percentage": 89.15, "elapsed_time": "0:25:42", "remaining_time": "0:03:07", "throughput": 6541.27, "total_tokens": 10089792} +{"current_steps": 10510, "total_steps": 11784, "loss": 0.0712, "lr": 7.048547409026384e-08, "epoch": 0.891887304820095, "percentage": 89.19, "elapsed_time": "0:25:42", "remaining_time": "0:03:07", "throughput": 6543.19, "total_tokens": 10094976} +{"current_steps": 10515, "total_steps": 11784, "loss": 0.0331, "lr": 6.994025449860064e-08, "epoch": 0.8923116089613035, "percentage": 89.23, "elapsed_time": "0:25:43", "remaining_time": "0:03:06", "throughput": 6544.55, "total_tokens": 10099200} +{"current_steps": 10520, "total_steps": 11784, "loss": 0.075, "lr": 6.939707537206485e-08, "epoch": 0.8927359131025119, "percentage": 89.27, "elapsed_time": "0:25:43", "remaining_time": "0:03:05", "throughput": 6546.43, "total_tokens": 10104320} +{"current_steps": 10525, "total_steps": 11784, "loss": 0.0464, "lr": 6.885593790234056e-08, "epoch": 0.8931602172437203, "percentage": 89.32, "elapsed_time": "0:25:43", "remaining_time": "0:03:04", "throughput": 6548.24, "total_tokens": 10109312} +{"current_steps": 10530, "total_steps": 11784, "loss": 0.0293, "lr": 6.831684327663367e-08, "epoch": 0.8935845213849287, "percentage": 89.36, "elapsed_time": "0:25:44", "remaining_time": "0:03:03", "throughput": 6549.65, "total_tokens": 10113600} +{"current_steps": 10535, "total_steps": 11784, "loss": 0.0048, "lr": 6.777979267766786e-08, "epoch": 0.8940088255261371, "percentage": 89.4, "elapsed_time": "0:25:44", "remaining_time": "0:03:03", "throughput": 6551.27, "total_tokens": 10118272} +{"current_steps": 10540, "total_steps": 11784, "loss": 0.059, "lr": 6.724478728368277e-08, "epoch": 0.8944331296673456, "percentage": 89.44, "elapsed_time": "0:25:44", "remaining_time": "0:03:02", "throughput": 6552.75, "total_tokens": 10122688} +{"current_steps": 10545, "total_steps": 11784, "loss": 0.1179, "lr": 6.671182826843047e-08, "epoch": 0.894857433808554, "percentage": 89.49, "elapsed_time": "0:25:45", "remaining_time": "0:03:01", "throughput": 6554.72, "total_tokens": 10128000} +{"current_steps": 10550, "total_steps": 11784, "loss": 0.0508, "lr": 6.618091680117399e-08, "epoch": 0.8952817379497624, "percentage": 89.53, "elapsed_time": "0:25:45", "remaining_time": "0:03:00", "throughput": 6556.28, "total_tokens": 10132544} +{"current_steps": 10555, "total_steps": 11784, "loss": 0.0739, "lr": 6.565205404668395e-08, "epoch": 0.8957060420909708, "percentage": 89.57, "elapsed_time": "0:25:45", "remaining_time": "0:02:59", "throughput": 6558.61, "total_tokens": 10138496} +{"current_steps": 10560, "total_steps": 11784, "loss": 0.0383, "lr": 6.512524116523633e-08, "epoch": 0.8961303462321792, "percentage": 89.61, "elapsed_time": "0:25:46", "remaining_time": "0:02:59", "throughput": 6560.37, "total_tokens": 10143424} +{"current_steps": 10565, "total_steps": 11784, "loss": 0.0861, "lr": 6.460047931261003e-08, "epoch": 0.8965546503733877, "percentage": 89.66, "elapsed_time": "0:25:46", "remaining_time": "0:02:58", "throughput": 6562.3, "total_tokens": 10148672} +{"current_steps": 10570, "total_steps": 11784, "loss": 0.0124, "lr": 6.407776964008383e-08, "epoch": 0.8969789545145961, "percentage": 89.7, "elapsed_time": "0:25:46", "remaining_time": "0:02:57", "throughput": 6563.95, "total_tokens": 10153408} +{"current_steps": 10575, "total_steps": 11784, "loss": 0.055, "lr": 6.355711329443481e-08, "epoch": 0.8974032586558045, "percentage": 89.74, "elapsed_time": "0:25:47", "remaining_time": "0:02:56", "throughput": 6565.49, "total_tokens": 10157952} +{"current_steps": 10580, "total_steps": 11784, "loss": 0.0355, "lr": 6.303851141793437e-08, "epoch": 0.8978275627970129, "percentage": 89.78, "elapsed_time": "0:25:47", "remaining_time": "0:02:56", "throughput": 6567.71, "total_tokens": 10163712} +{"current_steps": 10585, "total_steps": 11784, "loss": 0.0312, "lr": 6.252196514834751e-08, "epoch": 0.8982518669382213, "percentage": 89.83, "elapsed_time": "0:25:47", "remaining_time": "0:02:55", "throughput": 6569.41, "total_tokens": 10168512} +{"current_steps": 10590, "total_steps": 11784, "loss": 0.0404, "lr": 6.200747561892882e-08, "epoch": 0.8986761710794298, "percentage": 89.87, "elapsed_time": "0:25:48", "remaining_time": "0:02:54", "throughput": 6571.0, "total_tokens": 10173120} +{"current_steps": 10595, "total_steps": 11784, "loss": 0.0617, "lr": 6.149504395842087e-08, "epoch": 0.8991004752206382, "percentage": 89.91, "elapsed_time": "0:25:48", "remaining_time": "0:02:53", "throughput": 6572.65, "total_tokens": 10177856} +{"current_steps": 10600, "total_steps": 11784, "loss": 0.0328, "lr": 6.098467129105123e-08, "epoch": 0.8995247793618466, "percentage": 89.95, "elapsed_time": "0:25:48", "remaining_time": "0:02:53", "throughput": 6574.01, "total_tokens": 10182080} +{"current_steps": 10605, "total_steps": 11784, "loss": 0.0675, "lr": 6.047635873653068e-08, "epoch": 0.899949083503055, "percentage": 89.99, "elapsed_time": "0:25:49", "remaining_time": "0:02:52", "throughput": 6575.49, "total_tokens": 10186496} +{"current_steps": 10610, "total_steps": 11784, "loss": 0.0368, "lr": 5.997010741004949e-08, "epoch": 0.9003733876442634, "percentage": 90.04, "elapsed_time": "0:25:49", "remaining_time": "0:02:51", "throughput": 6576.97, "total_tokens": 10190912} +{"current_steps": 10615, "total_steps": 11784, "loss": 0.0525, "lr": 5.946591842227677e-08, "epoch": 0.9007976917854719, "percentage": 90.08, "elapsed_time": "0:25:49", "remaining_time": "0:02:50", "throughput": 6578.25, "total_tokens": 10195008} +{"current_steps": 10620, "total_steps": 11784, "loss": 0.0855, "lr": 5.8963792879356265e-08, "epoch": 0.9012219959266803, "percentage": 90.12, "elapsed_time": "0:25:50", "remaining_time": "0:02:49", "throughput": 6579.72, "total_tokens": 10199424} +{"current_steps": 10620, "total_steps": 11784, "eval_loss": 0.05229973420500755, "epoch": 0.9012219959266803, "percentage": 90.12, "elapsed_time": "0:26:05", "remaining_time": "0:02:51", "throughput": 6513.62, "total_tokens": 10199424} +{"current_steps": 10625, "total_steps": 11784, "loss": 0.1033, "lr": 5.84637318829051e-08, "epoch": 0.9016463000678887, "percentage": 90.16, "elapsed_time": "0:26:27", "remaining_time": "0:02:53", "throughput": 6426.67, "total_tokens": 10203968} +{"current_steps": 10630, "total_steps": 11784, "loss": 0.0844, "lr": 5.796573653001091e-08, "epoch": 0.9020706042090971, "percentage": 90.21, "elapsed_time": "0:26:28", "remaining_time": "0:02:52", "throughput": 6428.28, "total_tokens": 10208640} +{"current_steps": 10635, "total_steps": 11784, "loss": 0.0136, "lr": 5.746980791322942e-08, "epoch": 0.9024949083503055, "percentage": 90.25, "elapsed_time": "0:26:28", "remaining_time": "0:02:51", "throughput": 6429.99, "total_tokens": 10213504} +{"current_steps": 10640, "total_steps": 11784, "loss": 0.0535, "lr": 5.697594712058218e-08, "epoch": 0.902919212491514, "percentage": 90.29, "elapsed_time": "0:26:28", "remaining_time": "0:02:50", "throughput": 6431.74, "total_tokens": 10218432} +{"current_steps": 10645, "total_steps": 11784, "loss": 0.09, "lr": 5.6484155235554275e-08, "epoch": 0.9033435166327224, "percentage": 90.33, "elapsed_time": "0:26:29", "remaining_time": "0:02:50", "throughput": 6433.44, "total_tokens": 10223296} +{"current_steps": 10650, "total_steps": 11784, "loss": 0.1227, "lr": 5.599443333709131e-08, "epoch": 0.9037678207739308, "percentage": 90.38, "elapsed_time": "0:26:29", "remaining_time": "0:02:49", "throughput": 6435.03, "total_tokens": 10227904} +{"current_steps": 10655, "total_steps": 11784, "loss": 0.0481, "lr": 5.5506782499598394e-08, "epoch": 0.9041921249151391, "percentage": 90.42, "elapsed_time": "0:26:29", "remaining_time": "0:02:48", "throughput": 6436.66, "total_tokens": 10232640} +{"current_steps": 10660, "total_steps": 11784, "loss": 0.0891, "lr": 5.502120379293585e-08, "epoch": 0.9046164290563475, "percentage": 90.46, "elapsed_time": "0:26:30", "remaining_time": "0:02:47", "throughput": 6438.02, "total_tokens": 10236864} +{"current_steps": 10665, "total_steps": 11784, "loss": 0.0525, "lr": 5.453769828241872e-08, "epoch": 0.905040733197556, "percentage": 90.5, "elapsed_time": "0:26:30", "remaining_time": "0:02:46", "throughput": 6439.45, "total_tokens": 10241216} +{"current_steps": 10670, "total_steps": 11784, "loss": 0.0544, "lr": 5.4056267028813606e-08, "epoch": 0.9054650373387644, "percentage": 90.55, "elapsed_time": "0:26:30", "remaining_time": "0:02:46", "throughput": 6441.1, "total_tokens": 10245952} +{"current_steps": 10675, "total_steps": 11784, "loss": 0.0321, "lr": 5.357691108833584e-08, "epoch": 0.9058893414799728, "percentage": 90.59, "elapsed_time": "0:26:31", "remaining_time": "0:02:45", "throughput": 6443.12, "total_tokens": 10251392} +{"current_steps": 10680, "total_steps": 11784, "loss": 0.1047, "lr": 5.309963151264829e-08, "epoch": 0.9063136456211812, "percentage": 90.63, "elapsed_time": "0:26:31", "remaining_time": "0:02:44", "throughput": 6444.75, "total_tokens": 10256128} +{"current_steps": 10685, "total_steps": 11784, "loss": 0.043, "lr": 5.262442934885813e-08, "epoch": 0.9067379497623896, "percentage": 90.67, "elapsed_time": "0:26:31", "remaining_time": "0:02:43", "throughput": 6446.1, "total_tokens": 10260352} +{"current_steps": 10690, "total_steps": 11784, "loss": 0.0157, "lr": 5.21513056395152e-08, "epoch": 0.9071622539035981, "percentage": 90.72, "elapsed_time": "0:26:32", "remaining_time": "0:02:42", "throughput": 6447.88, "total_tokens": 10265344} +{"current_steps": 10695, "total_steps": 11784, "loss": 0.0575, "lr": 5.168026142260862e-08, "epoch": 0.9075865580448065, "percentage": 90.76, "elapsed_time": "0:26:32", "remaining_time": "0:02:42", "throughput": 6449.24, "total_tokens": 10269632} +{"current_steps": 10700, "total_steps": 11784, "loss": 0.0206, "lr": 5.121129773156663e-08, "epoch": 0.9080108621860149, "percentage": 90.8, "elapsed_time": "0:26:32", "remaining_time": "0:02:41", "throughput": 6450.97, "total_tokens": 10274560} +{"current_steps": 10705, "total_steps": 11784, "loss": 0.0327, "lr": 5.074441559525167e-08, "epoch": 0.9084351663272233, "percentage": 90.84, "elapsed_time": "0:26:33", "remaining_time": "0:02:40", "throughput": 6452.73, "total_tokens": 10279552} +{"current_steps": 10710, "total_steps": 11784, "loss": 0.0499, "lr": 5.027961603796027e-08, "epoch": 0.9088594704684317, "percentage": 90.89, "elapsed_time": "0:26:33", "remaining_time": "0:02:39", "throughput": 6454.35, "total_tokens": 10284288} +{"current_steps": 10715, "total_steps": 11784, "loss": 0.0414, "lr": 4.981690007941952e-08, "epoch": 0.9092837746096402, "percentage": 90.93, "elapsed_time": "0:26:33", "remaining_time": "0:02:39", "throughput": 6456.29, "total_tokens": 10289600} +{"current_steps": 10720, "total_steps": 11784, "loss": 0.0168, "lr": 4.93562687347856e-08, "epoch": 0.9097080787508486, "percentage": 90.97, "elapsed_time": "0:26:34", "remaining_time": "0:02:38", "throughput": 6457.81, "total_tokens": 10294144} +{"current_steps": 10725, "total_steps": 11784, "loss": 0.0495, "lr": 4.889772301464112e-08, "epoch": 0.910132382892057, "percentage": 91.01, "elapsed_time": "0:26:34", "remaining_time": "0:02:37", "throughput": 6459.38, "total_tokens": 10298752} +{"current_steps": 10730, "total_steps": 11784, "loss": 0.0437, "lr": 4.844126392499304e-08, "epoch": 0.9105566870332654, "percentage": 91.06, "elapsed_time": "0:26:34", "remaining_time": "0:02:36", "throughput": 6460.98, "total_tokens": 10303424} +{"current_steps": 10735, "total_steps": 11784, "loss": 0.0589, "lr": 4.7986892467270057e-08, "epoch": 0.9109809911744738, "percentage": 91.1, "elapsed_time": "0:26:35", "remaining_time": "0:02:35", "throughput": 6462.58, "total_tokens": 10308096} +{"current_steps": 10740, "total_steps": 11784, "loss": 0.0508, "lr": 4.7534609638321785e-08, "epoch": 0.9114052953156823, "percentage": 91.14, "elapsed_time": "0:26:35", "remaining_time": "0:02:35", "throughput": 6464.39, "total_tokens": 10313152} +{"current_steps": 10745, "total_steps": 11784, "loss": 0.0348, "lr": 4.70844164304145e-08, "epoch": 0.9118295994568907, "percentage": 91.18, "elapsed_time": "0:26:35", "remaining_time": "0:02:34", "throughput": 6466.09, "total_tokens": 10318016} +{"current_steps": 10750, "total_steps": 11784, "loss": 0.0304, "lr": 4.663631383123057e-08, "epoch": 0.9122539035980991, "percentage": 91.23, "elapsed_time": "0:26:36", "remaining_time": "0:02:33", "throughput": 6467.55, "total_tokens": 10322432} +{"current_steps": 10755, "total_steps": 11784, "loss": 0.0115, "lr": 4.61903028238656e-08, "epoch": 0.9126782077393075, "percentage": 91.27, "elapsed_time": "0:26:36", "remaining_time": "0:02:32", "throughput": 6469.17, "total_tokens": 10327168} +{"current_steps": 10760, "total_steps": 11784, "loss": 0.0781, "lr": 4.5746384386826767e-08, "epoch": 0.9131025118805159, "percentage": 91.31, "elapsed_time": "0:26:36", "remaining_time": "0:02:31", "throughput": 6470.64, "total_tokens": 10331584} +{"current_steps": 10765, "total_steps": 11784, "loss": 0.0269, "lr": 4.5304559494030004e-08, "epoch": 0.9135268160217244, "percentage": 91.35, "elapsed_time": "0:26:37", "remaining_time": "0:02:31", "throughput": 6472.45, "total_tokens": 10336640} +{"current_steps": 10770, "total_steps": 11784, "loss": 0.0617, "lr": 4.486482911479839e-08, "epoch": 0.9139511201629328, "percentage": 91.4, "elapsed_time": "0:26:37", "remaining_time": "0:02:30", "throughput": 6474.12, "total_tokens": 10341440} +{"current_steps": 10775, "total_steps": 11784, "loss": 0.0104, "lr": 4.442719421385921e-08, "epoch": 0.9143754243041412, "percentage": 91.44, "elapsed_time": "0:26:37", "remaining_time": "0:02:29", "throughput": 6475.97, "total_tokens": 10346624} +{"current_steps": 10780, "total_steps": 11784, "loss": 0.0092, "lr": 4.399165575134378e-08, "epoch": 0.9147997284453496, "percentage": 91.48, "elapsed_time": "0:26:38", "remaining_time": "0:02:28", "throughput": 6477.69, "total_tokens": 10351552} +{"current_steps": 10785, "total_steps": 11784, "loss": 0.0601, "lr": 4.3558214682782645e-08, "epoch": 0.915224032586558, "percentage": 91.52, "elapsed_time": "0:26:38", "remaining_time": "0:02:28", "throughput": 6479.33, "total_tokens": 10356352} +{"current_steps": 10790, "total_steps": 11784, "loss": 0.0717, "lr": 4.312687195910558e-08, "epoch": 0.9156483367277665, "percentage": 91.56, "elapsed_time": "0:26:38", "remaining_time": "0:02:27", "throughput": 6481.39, "total_tokens": 10361920} +{"current_steps": 10795, "total_steps": 11784, "loss": 0.0724, "lr": 4.269762852663894e-08, "epoch": 0.9160726408689749, "percentage": 91.61, "elapsed_time": "0:26:39", "remaining_time": "0:02:26", "throughput": 6482.79, "total_tokens": 10366272} +{"current_steps": 10800, "total_steps": 11784, "loss": 0.0167, "lr": 4.227048532710287e-08, "epoch": 0.9164969450101833, "percentage": 91.65, "elapsed_time": "0:26:39", "remaining_time": "0:02:25", "throughput": 6484.59, "total_tokens": 10371328} +{"current_steps": 10805, "total_steps": 11784, "loss": 0.0604, "lr": 4.184544329761008e-08, "epoch": 0.9169212491513917, "percentage": 91.69, "elapsed_time": "0:26:39", "remaining_time": "0:02:24", "throughput": 6486.36, "total_tokens": 10376384} +{"current_steps": 10810, "total_steps": 11784, "loss": 0.0687, "lr": 4.1422503370663553e-08, "epoch": 0.9173455532926001, "percentage": 91.73, "elapsed_time": "0:26:40", "remaining_time": "0:02:24", "throughput": 6487.97, "total_tokens": 10381120} +{"current_steps": 10815, "total_steps": 11784, "loss": 0.0225, "lr": 4.100166647415437e-08, "epoch": 0.9177698574338086, "percentage": 91.78, "elapsed_time": "0:26:40", "remaining_time": "0:02:23", "throughput": 6489.39, "total_tokens": 10385536} +{"current_steps": 10820, "total_steps": 11784, "loss": 0.0188, "lr": 4.058293353135988e-08, "epoch": 0.918194161575017, "percentage": 91.82, "elapsed_time": "0:26:40", "remaining_time": "0:02:22", "throughput": 6490.97, "total_tokens": 10390208} +{"current_steps": 10825, "total_steps": 11784, "loss": 0.051, "lr": 4.016630546094158e-08, "epoch": 0.9186184657162254, "percentage": 91.86, "elapsed_time": "0:26:41", "remaining_time": "0:02:21", "throughput": 6492.38, "total_tokens": 10394560} +{"current_steps": 10830, "total_steps": 11784, "loss": 0.0707, "lr": 3.975178317694239e-08, "epoch": 0.9190427698574338, "percentage": 91.9, "elapsed_time": "0:26:41", "remaining_time": "0:02:21", "throughput": 6493.74, "total_tokens": 10398848} +{"current_steps": 10835, "total_steps": 11784, "loss": 0.036, "lr": 3.9339367588786644e-08, "epoch": 0.9194670739986422, "percentage": 91.95, "elapsed_time": "0:26:41", "remaining_time": "0:02:20", "throughput": 6495.67, "total_tokens": 10404160} +{"current_steps": 10840, "total_steps": 11784, "loss": 0.051, "lr": 3.892905960127546e-08, "epoch": 0.9198913781398507, "percentage": 91.99, "elapsed_time": "0:26:42", "remaining_time": "0:02:19", "throughput": 6497.16, "total_tokens": 10408704} +{"current_steps": 10845, "total_steps": 11784, "loss": 0.0547, "lr": 3.852086011458688e-08, "epoch": 0.9203156822810591, "percentage": 92.03, "elapsed_time": "0:26:42", "remaining_time": "0:02:18", "throughput": 6498.69, "total_tokens": 10413312} +{"current_steps": 10850, "total_steps": 11784, "loss": 0.048, "lr": 3.811477002427288e-08, "epoch": 0.9207399864222675, "percentage": 92.07, "elapsed_time": "0:26:42", "remaining_time": "0:02:17", "throughput": 6500.3, "total_tokens": 10418048} +{"current_steps": 10855, "total_steps": 11784, "loss": 0.0239, "lr": 3.771079022125745e-08, "epoch": 0.9211642905634759, "percentage": 92.12, "elapsed_time": "0:26:43", "remaining_time": "0:02:17", "throughput": 6501.73, "total_tokens": 10422464} +{"current_steps": 10860, "total_steps": 11784, "loss": 0.1148, "lr": 3.7308921591835074e-08, "epoch": 0.9215885947046843, "percentage": 92.16, "elapsed_time": "0:26:43", "remaining_time": "0:02:16", "throughput": 6503.16, "total_tokens": 10426880} +{"current_steps": 10865, "total_steps": 11784, "loss": 0.1021, "lr": 3.6909165017668385e-08, "epoch": 0.9220128988458928, "percentage": 92.2, "elapsed_time": "0:26:43", "remaining_time": "0:02:15", "throughput": 6504.55, "total_tokens": 10431232} +{"current_steps": 10870, "total_steps": 11784, "loss": 0.1128, "lr": 3.651152137578617e-08, "epoch": 0.9224372029871012, "percentage": 92.24, "elapsed_time": "0:26:44", "remaining_time": "0:02:14", "throughput": 6506.77, "total_tokens": 10437120} +{"current_steps": 10875, "total_steps": 11784, "loss": 0.1289, "lr": 3.611599153858214e-08, "epoch": 0.9228615071283096, "percentage": 92.29, "elapsed_time": "0:26:44", "remaining_time": "0:02:14", "throughput": 6509.22, "total_tokens": 10443456} +{"current_steps": 10880, "total_steps": 11784, "loss": 0.0611, "lr": 3.572257637381182e-08, "epoch": 0.923285811269518, "percentage": 92.33, "elapsed_time": "0:26:44", "remaining_time": "0:02:13", "throughput": 6511.01, "total_tokens": 10448576} +{"current_steps": 10885, "total_steps": 11784, "loss": 0.0389, "lr": 3.533127674459202e-08, "epoch": 0.9237101154107265, "percentage": 92.37, "elapsed_time": "0:26:45", "remaining_time": "0:02:12", "throughput": 6512.68, "total_tokens": 10453440} +{"current_steps": 10890, "total_steps": 11784, "loss": 0.0268, "lr": 3.494209350939792e-08, "epoch": 0.9241344195519349, "percentage": 92.41, "elapsed_time": "0:26:45", "remaining_time": "0:02:11", "throughput": 6514.27, "total_tokens": 10458176} +{"current_steps": 10895, "total_steps": 11784, "loss": 0.0594, "lr": 3.455502752206152e-08, "epoch": 0.9245587236931433, "percentage": 92.46, "elapsed_time": "0:26:45", "remaining_time": "0:02:11", "throughput": 6516.02, "total_tokens": 10463232} +{"current_steps": 10900, "total_steps": 11784, "loss": 0.0892, "lr": 3.4170079631769764e-08, "epoch": 0.9249830278343517, "percentage": 92.5, "elapsed_time": "0:26:46", "remaining_time": "0:02:10", "throughput": 6517.65, "total_tokens": 10468032} +{"current_steps": 10905, "total_steps": 11784, "loss": 0.0135, "lr": 3.378725068306298e-08, "epoch": 0.9254073319755601, "percentage": 92.54, "elapsed_time": "0:26:46", "remaining_time": "0:02:09", "throughput": 6519.27, "total_tokens": 10472832} +{"current_steps": 10910, "total_steps": 11784, "loss": 0.0199, "lr": 3.3406541515832e-08, "epoch": 0.9258316361167686, "percentage": 92.58, "elapsed_time": "0:26:46", "remaining_time": "0:02:08", "throughput": 6520.69, "total_tokens": 10477248} +{"current_steps": 10915, "total_steps": 11784, "loss": 0.057, "lr": 3.302795296531813e-08, "epoch": 0.926255940257977, "percentage": 92.63, "elapsed_time": "0:26:47", "remaining_time": "0:02:07", "throughput": 6522.24, "total_tokens": 10481920} +{"current_steps": 10920, "total_steps": 11784, "loss": 0.0741, "lr": 3.265148586210942e-08, "epoch": 0.9266802443991853, "percentage": 92.67, "elapsed_time": "0:26:47", "remaining_time": "0:02:07", "throughput": 6523.97, "total_tokens": 10486976} +{"current_steps": 10925, "total_steps": 11784, "loss": 0.0575, "lr": 3.2277141032139746e-08, "epoch": 0.9271045485403937, "percentage": 92.71, "elapsed_time": "0:26:47", "remaining_time": "0:02:06", "throughput": 6525.55, "total_tokens": 10491712} +{"current_steps": 10930, "total_steps": 11784, "loss": 0.0889, "lr": 3.190491929668748e-08, "epoch": 0.9275288526816021, "percentage": 92.75, "elapsed_time": "0:26:48", "remaining_time": "0:02:05", "throughput": 6527.4, "total_tokens": 10496960} +{"current_steps": 10935, "total_steps": 11784, "loss": 0.0602, "lr": 3.15348214723723e-08, "epoch": 0.9279531568228105, "percentage": 92.8, "elapsed_time": "0:26:48", "remaining_time": "0:02:04", "throughput": 6528.68, "total_tokens": 10501120} +{"current_steps": 10940, "total_steps": 11784, "loss": 0.0228, "lr": 3.11668483711548e-08, "epoch": 0.928377460964019, "percentage": 92.84, "elapsed_time": "0:26:48", "remaining_time": "0:02:04", "throughput": 6530.88, "total_tokens": 10507008} +{"current_steps": 10945, "total_steps": 11784, "loss": 0.0284, "lr": 3.0801000800333876e-08, "epoch": 0.9288017651052274, "percentage": 92.88, "elapsed_time": "0:26:49", "remaining_time": "0:02:03", "throughput": 6532.28, "total_tokens": 10511424} +{"current_steps": 10950, "total_steps": 11784, "loss": 0.0394, "lr": 3.043727956254538e-08, "epoch": 0.9292260692464358, "percentage": 92.92, "elapsed_time": "0:26:49", "remaining_time": "0:02:02", "throughput": 6533.75, "total_tokens": 10515968} +{"current_steps": 10955, "total_steps": 11784, "loss": 0.0882, "lr": 3.007568545576011e-08, "epoch": 0.9296503733876442, "percentage": 92.97, "elapsed_time": "0:26:49", "remaining_time": "0:02:01", "throughput": 6535.27, "total_tokens": 10520576} +{"current_steps": 10960, "total_steps": 11784, "loss": 0.0491, "lr": 2.971621927328216e-08, "epoch": 0.9300746775288526, "percentage": 93.01, "elapsed_time": "0:26:50", "remaining_time": "0:02:01", "throughput": 6536.96, "total_tokens": 10525504} +{"current_steps": 10965, "total_steps": 11784, "loss": 0.1112, "lr": 2.9358881803746794e-08, "epoch": 0.9304989816700611, "percentage": 93.05, "elapsed_time": "0:26:50", "remaining_time": "0:02:00", "throughput": 6538.46, "total_tokens": 10530112} +{"current_steps": 10970, "total_steps": 11784, "loss": 0.0864, "lr": 2.900367383111979e-08, "epoch": 0.9309232858112695, "percentage": 93.09, "elapsed_time": "0:26:50", "remaining_time": "0:01:59", "throughput": 6540.24, "total_tokens": 10535232} +{"current_steps": 10975, "total_steps": 11784, "loss": 0.0258, "lr": 2.865059613469434e-08, "epoch": 0.9313475899524779, "percentage": 93.13, "elapsed_time": "0:26:51", "remaining_time": "0:01:58", "throughput": 6541.68, "total_tokens": 10539712} +{"current_steps": 10980, "total_steps": 11784, "loss": 0.057, "lr": 2.829964948909047e-08, "epoch": 0.9317718940936863, "percentage": 93.18, "elapsed_time": "0:26:51", "remaining_time": "0:01:58", "throughput": 6543.08, "total_tokens": 10544128} +{"current_steps": 10985, "total_steps": 11784, "loss": 0.0174, "lr": 2.795083466425252e-08, "epoch": 0.9321961982348947, "percentage": 93.22, "elapsed_time": "0:26:51", "remaining_time": "0:01:57", "throughput": 6544.33, "total_tokens": 10548288} +{"current_steps": 10990, "total_steps": 11784, "loss": 0.0176, "lr": 2.760415242544811e-08, "epoch": 0.9326205023761032, "percentage": 93.26, "elapsed_time": "0:26:52", "remaining_time": "0:01:56", "throughput": 6545.62, "total_tokens": 10552512} +{"current_steps": 10995, "total_steps": 11784, "loss": 0.0617, "lr": 2.7259603533266063e-08, "epoch": 0.9330448065173116, "percentage": 93.3, "elapsed_time": "0:26:52", "remaining_time": "0:01:55", "throughput": 6547.58, "total_tokens": 10557952} +{"current_steps": 11000, "total_steps": 11784, "loss": 0.0529, "lr": 2.6917188743614704e-08, "epoch": 0.93346911065852, "percentage": 93.35, "elapsed_time": "0:26:52", "remaining_time": "0:01:54", "throughput": 6548.89, "total_tokens": 10562240} +{"current_steps": 11005, "total_steps": 11784, "loss": 0.0832, "lr": 2.6576908807720233e-08, "epoch": 0.9338934147997284, "percentage": 93.39, "elapsed_time": "0:26:53", "remaining_time": "0:01:54", "throughput": 6550.45, "total_tokens": 10566976} +{"current_steps": 11010, "total_steps": 11784, "loss": 0.0393, "lr": 2.623876447212592e-08, "epoch": 0.9343177189409368, "percentage": 93.43, "elapsed_time": "0:26:53", "remaining_time": "0:01:53", "throughput": 6551.94, "total_tokens": 10571584} +{"current_steps": 11015, "total_steps": 11784, "loss": 0.082, "lr": 2.590275647868867e-08, "epoch": 0.9347420230821453, "percentage": 93.47, "elapsed_time": "0:26:53", "remaining_time": "0:01:52", "throughput": 6553.78, "total_tokens": 10576832} +{"current_steps": 11020, "total_steps": 11784, "loss": 0.0305, "lr": 2.5568885564579258e-08, "epoch": 0.9351663272233537, "percentage": 93.52, "elapsed_time": "0:26:54", "remaining_time": "0:01:51", "throughput": 6555.14, "total_tokens": 10581184} +{"current_steps": 11025, "total_steps": 11784, "loss": 0.0419, "lr": 2.5237152462279532e-08, "epoch": 0.9355906313645621, "percentage": 93.56, "elapsed_time": "0:26:54", "remaining_time": "0:01:51", "throughput": 6556.64, "total_tokens": 10585792} +{"current_steps": 11030, "total_steps": 11784, "loss": 0.0216, "lr": 2.4907557899581212e-08, "epoch": 0.9360149355057705, "percentage": 93.6, "elapsed_time": "0:26:54", "remaining_time": "0:01:50", "throughput": 6557.93, "total_tokens": 10590016} +{"current_steps": 11035, "total_steps": 11784, "loss": 0.0183, "lr": 2.4580102599584317e-08, "epoch": 0.936439239646979, "percentage": 93.64, "elapsed_time": "0:26:55", "remaining_time": "0:01:49", "throughput": 6559.42, "total_tokens": 10594624} +{"current_steps": 11040, "total_steps": 11784, "loss": 0.1089, "lr": 2.425478728069552e-08, "epoch": 0.9368635437881874, "percentage": 93.69, "elapsed_time": "0:26:55", "remaining_time": "0:01:48", "throughput": 6560.75, "total_tokens": 10598912} +{"current_steps": 11045, "total_steps": 11784, "loss": 0.0255, "lr": 2.3931612656626688e-08, "epoch": 0.9372878479293958, "percentage": 93.73, "elapsed_time": "0:26:55", "remaining_time": "0:01:48", "throughput": 6562.34, "total_tokens": 10603648} +{"current_steps": 11050, "total_steps": 11784, "loss": 0.0221, "lr": 2.3610579436392996e-08, "epoch": 0.9377121520706042, "percentage": 93.77, "elapsed_time": "0:26:56", "remaining_time": "0:01:47", "throughput": 6564.05, "total_tokens": 10608640} +{"current_steps": 11055, "total_steps": 11784, "loss": 0.1013, "lr": 2.329168832431161e-08, "epoch": 0.9381364562118126, "percentage": 93.81, "elapsed_time": "0:26:56", "remaining_time": "0:01:46", "throughput": 6565.58, "total_tokens": 10613312} +{"current_steps": 11060, "total_steps": 11784, "loss": 0.0113, "lr": 2.2974940020000112e-08, "epoch": 0.938560760353021, "percentage": 93.86, "elapsed_time": "0:26:56", "remaining_time": "0:01:45", "throughput": 6566.92, "total_tokens": 10617600} +{"current_steps": 11065, "total_steps": 11784, "loss": 0.109, "lr": 2.266033521837529e-08, "epoch": 0.9389850644942295, "percentage": 93.9, "elapsed_time": "0:26:57", "remaining_time": "0:01:45", "throughput": 6568.39, "total_tokens": 10622144} +{"current_steps": 11070, "total_steps": 11784, "loss": 0.0026, "lr": 2.2347874609650596e-08, "epoch": 0.9394093686354379, "percentage": 93.94, "elapsed_time": "0:26:57", "remaining_time": "0:01:44", "throughput": 6569.95, "total_tokens": 10626880} +{"current_steps": 11075, "total_steps": 11784, "loss": 0.0656, "lr": 2.2037558879336004e-08, "epoch": 0.9398336727766463, "percentage": 93.98, "elapsed_time": "0:26:57", "remaining_time": "0:01:43", "throughput": 6571.8, "total_tokens": 10632128} +{"current_steps": 11080, "total_steps": 11784, "loss": 0.0192, "lr": 2.1729388708235485e-08, "epoch": 0.9402579769178547, "percentage": 94.03, "elapsed_time": "0:26:58", "remaining_time": "0:01:42", "throughput": 6573.36, "total_tokens": 10636800} +{"current_steps": 11085, "total_steps": 11784, "loss": 0.0352, "lr": 2.1423364772445886e-08, "epoch": 0.9406822810590632, "percentage": 94.07, "elapsed_time": "0:26:58", "remaining_time": "0:01:42", "throughput": 6574.85, "total_tokens": 10641408} +{"current_steps": 11090, "total_steps": 11784, "loss": 0.0058, "lr": 2.111948774335548e-08, "epoch": 0.9411065852002716, "percentage": 94.11, "elapsed_time": "0:26:58", "remaining_time": "0:01:41", "throughput": 6576.55, "total_tokens": 10646400} +{"current_steps": 11095, "total_steps": 11784, "loss": 0.1124, "lr": 2.081775828764254e-08, "epoch": 0.94153088934148, "percentage": 94.15, "elapsed_time": "0:26:59", "remaining_time": "0:01:40", "throughput": 6577.94, "total_tokens": 10650816} +{"current_steps": 11100, "total_steps": 11784, "loss": 0.0133, "lr": 2.0518177067273103e-08, "epoch": 0.9419551934826884, "percentage": 94.2, "elapsed_time": "0:26:59", "remaining_time": "0:01:39", "throughput": 6579.43, "total_tokens": 10655424} +{"current_steps": 11105, "total_steps": 11784, "loss": 0.0908, "lr": 2.0220744739501305e-08, "epoch": 0.9423794976238968, "percentage": 94.24, "elapsed_time": "0:26:59", "remaining_time": "0:01:39", "throughput": 6581.14, "total_tokens": 10660416} +{"current_steps": 11110, "total_steps": 11784, "loss": 0.0695, "lr": 1.992546195686573e-08, "epoch": 0.9428038017651053, "percentage": 94.28, "elapsed_time": "0:27:00", "remaining_time": "0:01:38", "throughput": 6582.68, "total_tokens": 10665088} +{"current_steps": 11115, "total_steps": 11784, "loss": 0.0392, "lr": 1.9632329367189725e-08, "epoch": 0.9432281059063137, "percentage": 94.32, "elapsed_time": "0:27:00", "remaining_time": "0:01:37", "throughput": 6584.12, "total_tokens": 10669632} +{"current_steps": 11120, "total_steps": 11784, "loss": 0.0275, "lr": 1.9341347613579086e-08, "epoch": 0.9436524100475221, "percentage": 94.37, "elapsed_time": "0:27:00", "remaining_time": "0:01:36", "throughput": 6585.89, "total_tokens": 10674752} +{"current_steps": 11125, "total_steps": 11784, "loss": 0.0512, "lr": 1.9052517334420704e-08, "epoch": 0.9440767141887305, "percentage": 94.41, "elapsed_time": "0:27:01", "remaining_time": "0:01:36", "throughput": 6587.34, "total_tokens": 10679296} +{"current_steps": 11130, "total_steps": 11784, "loss": 0.0959, "lr": 1.8765839163381815e-08, "epoch": 0.9445010183299389, "percentage": 94.45, "elapsed_time": "0:27:01", "remaining_time": "0:01:35", "throughput": 6588.87, "total_tokens": 10683968} +{"current_steps": 11135, "total_steps": 11784, "loss": 0.0589, "lr": 1.8481313729407645e-08, "epoch": 0.9449253224711474, "percentage": 94.49, "elapsed_time": "0:27:01", "remaining_time": "0:01:34", "throughput": 6590.32, "total_tokens": 10688512} +{"current_steps": 11140, "total_steps": 11784, "loss": 0.1141, "lr": 1.8198941656720646e-08, "epoch": 0.9453496266123558, "percentage": 94.53, "elapsed_time": "0:27:02", "remaining_time": "0:01:33", "throughput": 6591.91, "total_tokens": 10693312} +{"current_steps": 11145, "total_steps": 11784, "loss": 0.105, "lr": 1.7918723564819272e-08, "epoch": 0.9457739307535642, "percentage": 94.58, "elapsed_time": "0:27:02", "remaining_time": "0:01:33", "throughput": 6593.81, "total_tokens": 10698688} +{"current_steps": 11150, "total_steps": 11784, "loss": 0.1199, "lr": 1.7640660068475976e-08, "epoch": 0.9461982348947726, "percentage": 94.62, "elapsed_time": "0:27:02", "remaining_time": "0:01:32", "throughput": 6595.91, "total_tokens": 10704448} +{"current_steps": 11155, "total_steps": 11784, "loss": 0.0696, "lr": 1.7364751777736332e-08, "epoch": 0.946622539035981, "percentage": 94.66, "elapsed_time": "0:27:03", "remaining_time": "0:01:31", "throughput": 6597.54, "total_tokens": 10709312} +{"current_steps": 11160, "total_steps": 11784, "loss": 0.0331, "lr": 1.7090999297917684e-08, "epoch": 0.9470468431771895, "percentage": 94.7, "elapsed_time": "0:27:03", "remaining_time": "0:01:30", "throughput": 6599.28, "total_tokens": 10714368} +{"current_steps": 11165, "total_steps": 11784, "loss": 0.0659, "lr": 1.6819403229607732e-08, "epoch": 0.9474711473183979, "percentage": 94.75, "elapsed_time": "0:27:03", "remaining_time": "0:01:30", "throughput": 6600.7, "total_tokens": 10718848} +{"current_steps": 11170, "total_steps": 11784, "loss": 0.0355, "lr": 1.6549964168663054e-08, "epoch": 0.9478954514596063, "percentage": 94.79, "elapsed_time": "0:27:04", "remaining_time": "0:01:29", "throughput": 6602.32, "total_tokens": 10723712} +{"current_steps": 11175, "total_steps": 11784, "loss": 0.0551, "lr": 1.6282682706208028e-08, "epoch": 0.9483197556008147, "percentage": 94.83, "elapsed_time": "0:27:04", "remaining_time": "0:01:28", "throughput": 6603.83, "total_tokens": 10728384} +{"current_steps": 11180, "total_steps": 11784, "loss": 0.0233, "lr": 1.6017559428633588e-08, "epoch": 0.948744059742023, "percentage": 94.87, "elapsed_time": "0:27:04", "remaining_time": "0:01:27", "throughput": 6605.64, "total_tokens": 10733632} +{"current_steps": 11185, "total_steps": 11784, "loss": 0.0556, "lr": 1.5754594917595564e-08, "epoch": 0.9491683638832314, "percentage": 94.92, "elapsed_time": "0:27:05", "remaining_time": "0:01:27", "throughput": 6607.47, "total_tokens": 10738880} +{"current_steps": 11190, "total_steps": 11784, "loss": 0.0574, "lr": 1.549378975001403e-08, "epoch": 0.9495926680244399, "percentage": 94.96, "elapsed_time": "0:27:05", "remaining_time": "0:01:26", "throughput": 6609.43, "total_tokens": 10744384} +{"current_steps": 11195, "total_steps": 11784, "loss": 0.0319, "lr": 1.5235144498071172e-08, "epoch": 0.9500169721656483, "percentage": 95.0, "elapsed_time": "0:27:05", "remaining_time": "0:01:25", "throughput": 6611.29, "total_tokens": 10749632} +{"current_steps": 11200, "total_steps": 11784, "loss": 0.0301, "lr": 1.4978659729210974e-08, "epoch": 0.9504412763068567, "percentage": 95.04, "elapsed_time": "0:27:06", "remaining_time": "0:01:24", "throughput": 6613.0, "total_tokens": 10754624} +{"current_steps": 11205, "total_steps": 11784, "loss": 0.0761, "lr": 1.4724336006137094e-08, "epoch": 0.9508655804480651, "percentage": 95.09, "elapsed_time": "0:27:06", "remaining_time": "0:01:24", "throughput": 6614.64, "total_tokens": 10759488} +{"current_steps": 11210, "total_steps": 11784, "loss": 0.004, "lr": 1.4472173886812433e-08, "epoch": 0.9512898845892735, "percentage": 95.13, "elapsed_time": "0:27:06", "remaining_time": "0:01:23", "throughput": 6616.13, "total_tokens": 10764096} +{"current_steps": 11210, "total_steps": 11784, "eval_loss": 0.05228454992175102, "epoch": 0.9512898845892735, "percentage": 95.13, "elapsed_time": "0:27:22", "remaining_time": "0:01:24", "throughput": 6552.32, "total_tokens": 10764096} +{"current_steps": 11215, "total_steps": 11784, "loss": 0.0577, "lr": 1.4222173924457348e-08, "epoch": 0.951714188730482, "percentage": 95.17, "elapsed_time": "0:28:08", "remaining_time": "0:01:25", "throughput": 6378.93, "total_tokens": 10768640} +{"current_steps": 11220, "total_steps": 11784, "loss": 0.0544, "lr": 1.3974336667548659e-08, "epoch": 0.9521384928716904, "percentage": 95.21, "elapsed_time": "0:28:08", "remaining_time": "0:01:24", "throughput": 6380.81, "total_tokens": 10774016} +{"current_steps": 11225, "total_steps": 11784, "loss": 0.0036, "lr": 1.3728662659818201e-08, "epoch": 0.9525627970128988, "percentage": 95.26, "elapsed_time": "0:28:08", "remaining_time": "0:01:24", "throughput": 6382.52, "total_tokens": 10779072} +{"current_steps": 11230, "total_steps": 11784, "loss": 0.0487, "lr": 1.3485152440252389e-08, "epoch": 0.9529871011541072, "percentage": 95.3, "elapsed_time": "0:28:09", "remaining_time": "0:01:23", "throughput": 6384.18, "total_tokens": 10784000} +{"current_steps": 11235, "total_steps": 11784, "loss": 0.0644, "lr": 1.3243806543089875e-08, "epoch": 0.9534114052953157, "percentage": 95.34, "elapsed_time": "0:28:09", "remaining_time": "0:01:22", "throughput": 6385.82, "total_tokens": 10788928} +{"current_steps": 11240, "total_steps": 11784, "loss": 0.0275, "lr": 1.3004625497821553e-08, "epoch": 0.9538357094365241, "percentage": 95.38, "elapsed_time": "0:28:09", "remaining_time": "0:01:21", "throughput": 6387.7, "total_tokens": 10794304} +{"current_steps": 11245, "total_steps": 11784, "loss": 0.0298, "lr": 1.276760982918812e-08, "epoch": 0.9542600135777325, "percentage": 95.43, "elapsed_time": "0:28:10", "remaining_time": "0:01:21", "throughput": 6388.98, "total_tokens": 10798528} +{"current_steps": 11250, "total_steps": 11784, "loss": 0.0547, "lr": 1.2532760057180291e-08, "epoch": 0.9546843177189409, "percentage": 95.47, "elapsed_time": "0:28:10", "remaining_time": "0:01:20", "throughput": 6390.51, "total_tokens": 10803200} +{"current_steps": 11255, "total_steps": 11784, "loss": 0.0283, "lr": 1.230007669703681e-08, "epoch": 0.9551086218601493, "percentage": 95.51, "elapsed_time": "0:28:10", "remaining_time": "0:01:19", "throughput": 6391.93, "total_tokens": 10807680} +{"current_steps": 11260, "total_steps": 11784, "loss": 0.0351, "lr": 1.2069560259243328e-08, "epoch": 0.9555329260013578, "percentage": 95.55, "elapsed_time": "0:28:11", "remaining_time": "0:01:18", "throughput": 6393.39, "total_tokens": 10812224} +{"current_steps": 11265, "total_steps": 11784, "loss": 0.045, "lr": 1.1841211249531636e-08, "epoch": 0.9559572301425662, "percentage": 95.6, "elapsed_time": "0:28:11", "remaining_time": "0:01:17", "throughput": 6394.78, "total_tokens": 10816640} +{"current_steps": 11270, "total_steps": 11784, "loss": 0.0182, "lr": 1.1615030168878438e-08, "epoch": 0.9563815342837746, "percentage": 95.64, "elapsed_time": "0:28:11", "remaining_time": "0:01:17", "throughput": 6396.37, "total_tokens": 10821440} +{"current_steps": 11275, "total_steps": 11784, "loss": 0.0752, "lr": 1.139101751350402e-08, "epoch": 0.956805838424983, "percentage": 95.68, "elapsed_time": "0:28:12", "remaining_time": "0:01:16", "throughput": 6398.12, "total_tokens": 10826560} +{"current_steps": 11280, "total_steps": 11784, "loss": 0.0729, "lr": 1.1169173774871477e-08, "epoch": 0.9572301425661914, "percentage": 95.72, "elapsed_time": "0:28:12", "remaining_time": "0:01:15", "throughput": 6399.8, "total_tokens": 10831552} +{"current_steps": 11285, "total_steps": 11784, "loss": 0.027, "lr": 1.0949499439685483e-08, "epoch": 0.9576544467073999, "percentage": 95.77, "elapsed_time": "0:28:12", "remaining_time": "0:01:14", "throughput": 6401.19, "total_tokens": 10835968} +{"current_steps": 11290, "total_steps": 11784, "loss": 0.0548, "lr": 1.0731994989891302e-08, "epoch": 0.9580787508486083, "percentage": 95.81, "elapsed_time": "0:28:13", "remaining_time": "0:01:14", "throughput": 6402.39, "total_tokens": 10840064} +{"current_steps": 11295, "total_steps": 11784, "loss": 0.0353, "lr": 1.0516660902673446e-08, "epoch": 0.9585030549898167, "percentage": 95.85, "elapsed_time": "0:28:13", "remaining_time": "0:01:13", "throughput": 6403.86, "total_tokens": 10844672} +{"current_steps": 11300, "total_steps": 11784, "loss": 0.0655, "lr": 1.0303497650455128e-08, "epoch": 0.9589273591310251, "percentage": 95.89, "elapsed_time": "0:28:13", "remaining_time": "0:01:12", "throughput": 6405.87, "total_tokens": 10850304} +{"current_steps": 11305, "total_steps": 11784, "loss": 0.0381, "lr": 1.0092505700896703e-08, "epoch": 0.9593516632722335, "percentage": 95.94, "elapsed_time": "0:28:14", "remaining_time": "0:01:11", "throughput": 6407.35, "total_tokens": 10854912} +{"current_steps": 11310, "total_steps": 11784, "loss": 0.0385, "lr": 9.883685516895113e-09, "epoch": 0.959775967413442, "percentage": 95.98, "elapsed_time": "0:28:14", "remaining_time": "0:01:11", "throughput": 6409.01, "total_tokens": 10859904} +{"current_steps": 11315, "total_steps": 11784, "loss": 0.0542, "lr": 9.677037556582557e-09, "epoch": 0.9602002715546504, "percentage": 96.02, "elapsed_time": "0:28:14", "remaining_time": "0:01:10", "throughput": 6410.58, "total_tokens": 10864704} +{"current_steps": 11320, "total_steps": 11784, "loss": 0.0082, "lr": 9.47256227332538e-09, "epoch": 0.9606245756958588, "percentage": 96.06, "elapsed_time": "0:28:15", "remaining_time": "0:01:09", "throughput": 6412.03, "total_tokens": 10869248} +{"current_steps": 11325, "total_steps": 11784, "loss": 0.0511, "lr": 9.270260115723739e-09, "epoch": 0.9610488798370672, "percentage": 96.1, "elapsed_time": "0:28:15", "remaining_time": "0:01:08", "throughput": 6413.56, "total_tokens": 10873984} +{"current_steps": 11330, "total_steps": 11784, "loss": 0.0736, "lr": 9.070131527609603e-09, "epoch": 0.9614731839782756, "percentage": 96.15, "elapsed_time": "0:28:15", "remaining_time": "0:01:07", "throughput": 6415.04, "total_tokens": 10878592} +{"current_steps": 11335, "total_steps": 11784, "loss": 0.0627, "lr": 8.872176948046761e-09, "epoch": 0.9618974881194841, "percentage": 96.19, "elapsed_time": "0:28:16", "remaining_time": "0:01:07", "throughput": 6416.54, "total_tokens": 10883264} +{"current_steps": 11340, "total_steps": 11784, "loss": 0.0327, "lr": 8.676396811329145e-09, "epoch": 0.9623217922606925, "percentage": 96.23, "elapsed_time": "0:28:16", "remaining_time": "0:01:06", "throughput": 6417.84, "total_tokens": 10887552} +{"current_steps": 11345, "total_steps": 11784, "loss": 0.0727, "lr": 8.482791546980506e-09, "epoch": 0.9627460964019009, "percentage": 96.27, "elapsed_time": "0:28:16", "remaining_time": "0:01:05", "throughput": 6419.64, "total_tokens": 10892800} +{"current_steps": 11350, "total_steps": 11784, "loss": 0.0624, "lr": 8.291361579752631e-09, "epoch": 0.9631704005431093, "percentage": 96.32, "elapsed_time": "0:28:17", "remaining_time": "0:01:04", "throughput": 6421.05, "total_tokens": 10897280} +{"current_steps": 11355, "total_steps": 11784, "loss": 0.0544, "lr": 8.102107329625351e-09, "epoch": 0.9635947046843177, "percentage": 96.36, "elapsed_time": "0:28:17", "remaining_time": "0:01:04", "throughput": 6422.65, "total_tokens": 10902144} +{"current_steps": 11360, "total_steps": 11784, "loss": 0.0805, "lr": 7.91502921180487e-09, "epoch": 0.9640190088255262, "percentage": 96.4, "elapsed_time": "0:28:17", "remaining_time": "0:01:03", "throughput": 6424.37, "total_tokens": 10907264} +{"current_steps": 11365, "total_steps": 11784, "loss": 0.0146, "lr": 7.730127636723538e-09, "epoch": 0.9644433129667346, "percentage": 96.44, "elapsed_time": "0:28:18", "remaining_time": "0:01:02", "throughput": 6425.81, "total_tokens": 10911808} +{"current_steps": 11370, "total_steps": 11784, "loss": 0.0853, "lr": 7.547403010037978e-09, "epoch": 0.964867617107943, "percentage": 96.49, "elapsed_time": "0:28:18", "remaining_time": "0:01:01", "throughput": 6427.35, "total_tokens": 10916544} +{"current_steps": 11375, "total_steps": 11784, "loss": 0.0814, "lr": 7.366855732629407e-09, "epoch": 0.9652919212491514, "percentage": 96.53, "elapsed_time": "0:28:18", "remaining_time": "0:01:01", "throughput": 6428.75, "total_tokens": 10921024} +{"current_steps": 11380, "total_steps": 11784, "loss": 0.0551, "lr": 7.1884862006017514e-09, "epoch": 0.9657162253903598, "percentage": 96.57, "elapsed_time": "0:28:19", "remaining_time": "0:01:00", "throughput": 6430.39, "total_tokens": 10926016} +{"current_steps": 11385, "total_steps": 11784, "loss": 0.0426, "lr": 7.012294805281205e-09, "epoch": 0.9661405295315683, "percentage": 96.61, "elapsed_time": "0:28:19", "remaining_time": "0:00:59", "throughput": 6432.15, "total_tokens": 10931200} +{"current_steps": 11390, "total_steps": 11784, "loss": 0.0681, "lr": 6.838281933215562e-09, "epoch": 0.9665648336727767, "percentage": 96.66, "elapsed_time": "0:28:19", "remaining_time": "0:00:58", "throughput": 6433.9, "total_tokens": 10936384} +{"current_steps": 11395, "total_steps": 11784, "loss": 0.0278, "lr": 6.6664479661729944e-09, "epoch": 0.9669891378139851, "percentage": 96.7, "elapsed_time": "0:28:20", "remaining_time": "0:00:58", "throughput": 6435.46, "total_tokens": 10941184} +{"current_steps": 11400, "total_steps": 11784, "loss": 0.0654, "lr": 6.496793281141055e-09, "epoch": 0.9674134419551935, "percentage": 96.74, "elapsed_time": "0:28:20", "remaining_time": "0:00:57", "throughput": 6436.72, "total_tokens": 10945408} +{"current_steps": 11405, "total_steps": 11784, "loss": 0.0877, "lr": 6.329318250326898e-09, "epoch": 0.9678377460964019, "percentage": 96.78, "elapsed_time": "0:28:20", "remaining_time": "0:00:56", "throughput": 6438.25, "total_tokens": 10950144} +{"current_steps": 11410, "total_steps": 11784, "loss": 0.1094, "lr": 6.164023241154837e-09, "epoch": 0.9682620502376104, "percentage": 96.83, "elapsed_time": "0:28:21", "remaining_time": "0:00:55", "throughput": 6439.97, "total_tokens": 10955264} +{"current_steps": 11415, "total_steps": 11784, "loss": 0.0078, "lr": 6.000908616267009e-09, "epoch": 0.9686863543788188, "percentage": 96.87, "elapsed_time": "0:28:21", "remaining_time": "0:00:55", "throughput": 6441.62, "total_tokens": 10960256} +{"current_steps": 11420, "total_steps": 11784, "loss": 0.0628, "lr": 5.839974733522046e-09, "epoch": 0.9691106585200272, "percentage": 96.91, "elapsed_time": "0:28:21", "remaining_time": "0:00:54", "throughput": 6442.99, "total_tokens": 10964672} +{"current_steps": 11425, "total_steps": 11784, "loss": 0.0369, "lr": 5.68122194599363e-09, "epoch": 0.9695349626612356, "percentage": 96.95, "elapsed_time": "0:28:22", "remaining_time": "0:00:53", "throughput": 6444.68, "total_tokens": 10969728} +{"current_steps": 11430, "total_steps": 11784, "loss": 0.0914, "lr": 5.5246506019709374e-09, "epoch": 0.969959266802444, "percentage": 97.0, "elapsed_time": "0:28:22", "remaining_time": "0:00:52", "throughput": 6446.34, "total_tokens": 10974720} +{"current_steps": 11435, "total_steps": 11784, "loss": 0.0407, "lr": 5.370261044956969e-09, "epoch": 0.9703835709436525, "percentage": 97.04, "elapsed_time": "0:28:22", "remaining_time": "0:00:51", "throughput": 6447.63, "total_tokens": 10979008} +{"current_steps": 11440, "total_steps": 11784, "loss": 0.0368, "lr": 5.218053613668116e-09, "epoch": 0.9708078750848609, "percentage": 97.08, "elapsed_time": "0:28:23", "remaining_time": "0:00:51", "throughput": 6449.34, "total_tokens": 10984128} +{"current_steps": 11445, "total_steps": 11784, "loss": 0.0629, "lr": 5.068028642033595e-09, "epoch": 0.9712321792260692, "percentage": 97.12, "elapsed_time": "0:28:23", "remaining_time": "0:00:50", "throughput": 6450.66, "total_tokens": 10988480} +{"current_steps": 11450, "total_steps": 11784, "loss": 0.0341, "lr": 4.92018645919412e-09, "epoch": 0.9716564833672776, "percentage": 97.17, "elapsed_time": "0:28:23", "remaining_time": "0:00:49", "throughput": 6452.0, "total_tokens": 10992896} +{"current_steps": 11455, "total_steps": 11784, "loss": 0.078, "lr": 4.774527389501681e-09, "epoch": 0.972080787508486, "percentage": 97.21, "elapsed_time": "0:28:24", "remaining_time": "0:00:48", "throughput": 6453.57, "total_tokens": 10997696} +{"current_steps": 11460, "total_steps": 11784, "loss": 0.0699, "lr": 4.63105175251921e-09, "epoch": 0.9725050916496945, "percentage": 97.25, "elapsed_time": "0:28:24", "remaining_time": "0:00:48", "throughput": 6455.35, "total_tokens": 11002944} +{"current_steps": 11465, "total_steps": 11784, "loss": 0.085, "lr": 4.489759863018583e-09, "epoch": 0.9729293957909029, "percentage": 97.29, "elapsed_time": "0:28:24", "remaining_time": "0:00:47", "throughput": 6457.22, "total_tokens": 11008384} +{"current_steps": 11470, "total_steps": 11784, "loss": 0.0997, "lr": 4.350652030981394e-09, "epoch": 0.9733536999321113, "percentage": 97.34, "elapsed_time": "0:28:25", "remaining_time": "0:00:46", "throughput": 6458.59, "total_tokens": 11012800} +{"current_steps": 11475, "total_steps": 11784, "loss": 0.1097, "lr": 4.213728561597296e-09, "epoch": 0.9737780040733197, "percentage": 97.38, "elapsed_time": "0:28:25", "remaining_time": "0:00:45", "throughput": 6459.96, "total_tokens": 11017216} +{"current_steps": 11480, "total_steps": 11784, "loss": 0.0678, "lr": 4.0789897552637735e-09, "epoch": 0.9742023082145281, "percentage": 97.42, "elapsed_time": "0:28:25", "remaining_time": "0:00:45", "throughput": 6461.25, "total_tokens": 11021504} +{"current_steps": 11485, "total_steps": 11784, "loss": 0.0978, "lr": 3.946435907585255e-09, "epoch": 0.9746266123557366, "percentage": 97.46, "elapsed_time": "0:28:26", "remaining_time": "0:00:44", "throughput": 6463.06, "total_tokens": 11026816} +{"current_steps": 11490, "total_steps": 11784, "loss": 0.0679, "lr": 3.816067309372673e-09, "epoch": 0.975050916496945, "percentage": 97.51, "elapsed_time": "0:28:26", "remaining_time": "0:00:43", "throughput": 6465.03, "total_tokens": 11032448} +{"current_steps": 11495, "total_steps": 11784, "loss": 0.1102, "lr": 3.68788424664257e-09, "epoch": 0.9754752206381534, "percentage": 97.55, "elapsed_time": "0:28:26", "remaining_time": "0:00:42", "throughput": 6466.47, "total_tokens": 11037056} +{"current_steps": 11500, "total_steps": 11784, "loss": 0.0901, "lr": 3.561887000616881e-09, "epoch": 0.9758995247793618, "percentage": 97.59, "elapsed_time": "0:28:27", "remaining_time": "0:00:42", "throughput": 6467.66, "total_tokens": 11041152} +{"current_steps": 11505, "total_steps": 11784, "loss": 0.0358, "lr": 3.438075847721933e-09, "epoch": 0.9763238289205702, "percentage": 97.63, "elapsed_time": "0:28:27", "remaining_time": "0:00:41", "throughput": 6468.98, "total_tokens": 11045504} +{"current_steps": 11510, "total_steps": 11784, "loss": 0.0784, "lr": 3.316451059587777e-09, "epoch": 0.9767481330617787, "percentage": 97.67, "elapsed_time": "0:28:27", "remaining_time": "0:00:40", "throughput": 6470.28, "total_tokens": 11049792} +{"current_steps": 11515, "total_steps": 11784, "loss": 0.0353, "lr": 3.1970129030481907e-09, "epoch": 0.9771724372029871, "percentage": 97.72, "elapsed_time": "0:28:28", "remaining_time": "0:00:39", "throughput": 6471.51, "total_tokens": 11053952} +{"current_steps": 11520, "total_steps": 11784, "loss": 0.0642, "lr": 3.0797616401392335e-09, "epoch": 0.9775967413441955, "percentage": 97.76, "elapsed_time": "0:28:28", "remaining_time": "0:00:39", "throughput": 6472.79, "total_tokens": 11058240} +{"current_steps": 11525, "total_steps": 11784, "loss": 0.0538, "lr": 2.964697528099469e-09, "epoch": 0.9780210454854039, "percentage": 97.8, "elapsed_time": "0:28:28", "remaining_time": "0:00:38", "throughput": 6474.56, "total_tokens": 11063488} +{"current_steps": 11530, "total_steps": 11784, "loss": 0.0462, "lr": 2.8518208193689664e-09, "epoch": 0.9784453496266123, "percentage": 97.84, "elapsed_time": "0:28:29", "remaining_time": "0:00:37", "throughput": 6476.01, "total_tokens": 11068096} +{"current_steps": 11535, "total_steps": 11784, "loss": 0.1083, "lr": 2.741131761588522e-09, "epoch": 0.9788696537678208, "percentage": 97.89, "elapsed_time": "0:28:29", "remaining_time": "0:00:36", "throughput": 6477.98, "total_tokens": 11073728} +{"current_steps": 11540, "total_steps": 11784, "loss": 0.0648, "lr": 2.632630597600105e-09, "epoch": 0.9792939579090292, "percentage": 97.93, "elapsed_time": "0:28:29", "remaining_time": "0:00:36", "throughput": 6479.3, "total_tokens": 11078144} +{"current_steps": 11545, "total_steps": 11784, "loss": 0.0129, "lr": 2.526317565444969e-09, "epoch": 0.9797182620502376, "percentage": 97.97, "elapsed_time": "0:28:30", "remaining_time": "0:00:35", "throughput": 6481.05, "total_tokens": 11083328} +{"current_steps": 11550, "total_steps": 11784, "loss": 0.089, "lr": 2.422192898364095e-09, "epoch": 0.980142566191446, "percentage": 98.01, "elapsed_time": "0:28:30", "remaining_time": "0:00:34", "throughput": 6482.57, "total_tokens": 11088064} +{"current_steps": 11555, "total_steps": 11784, "loss": 0.0186, "lr": 2.3202568247974175e-09, "epoch": 0.9805668703326544, "percentage": 98.06, "elapsed_time": "0:28:30", "remaining_time": "0:00:33", "throughput": 6484.05, "total_tokens": 11092736} +{"current_steps": 11560, "total_steps": 11784, "loss": 0.0534, "lr": 2.2205095683833774e-09, "epoch": 0.9809911744738629, "percentage": 98.1, "elapsed_time": "0:28:31", "remaining_time": "0:00:33", "throughput": 6485.91, "total_tokens": 11098176} +{"current_steps": 11565, "total_steps": 11784, "loss": 0.0566, "lr": 2.122951347958035e-09, "epoch": 0.9814154786150713, "percentage": 98.14, "elapsed_time": "0:28:31", "remaining_time": "0:00:32", "throughput": 6487.41, "total_tokens": 11102912} +{"current_steps": 11570, "total_steps": 11784, "loss": 0.0898, "lr": 2.0275823775551817e-09, "epoch": 0.9818397827562797, "percentage": 98.18, "elapsed_time": "0:28:31", "remaining_time": "0:00:31", "throughput": 6488.8, "total_tokens": 11107392} +{"current_steps": 11575, "total_steps": 11784, "loss": 0.0508, "lr": 1.934402866405671e-09, "epoch": 0.9822640868974881, "percentage": 98.23, "elapsed_time": "0:28:32", "remaining_time": "0:00:30", "throughput": 6490.39, "total_tokens": 11112256} +{"current_steps": 11580, "total_steps": 11784, "loss": 0.0302, "lr": 1.843413018936535e-09, "epoch": 0.9826883910386965, "percentage": 98.27, "elapsed_time": "0:28:32", "remaining_time": "0:00:30", "throughput": 6492.08, "total_tokens": 11117376} +{"current_steps": 11585, "total_steps": 11784, "loss": 0.105, "lr": 1.7546130347712018e-09, "epoch": 0.983112695179905, "percentage": 98.31, "elapsed_time": "0:28:32", "remaining_time": "0:00:29", "throughput": 6493.84, "total_tokens": 11122624} +{"current_steps": 11590, "total_steps": 11784, "loss": 0.0155, "lr": 1.6680031087286106e-09, "epoch": 0.9835369993211134, "percentage": 98.35, "elapsed_time": "0:28:33", "remaining_time": "0:00:28", "throughput": 6495.51, "total_tokens": 11127680} +{"current_steps": 11595, "total_steps": 11784, "loss": 0.0984, "lr": 1.5835834308228768e-09, "epoch": 0.9839613034623218, "percentage": 98.4, "elapsed_time": "0:28:33", "remaining_time": "0:00:27", "throughput": 6497.13, "total_tokens": 11132672} +{"current_steps": 11600, "total_steps": 11784, "loss": 0.091, "lr": 1.5013541862630708e-09, "epoch": 0.9843856076035302, "percentage": 98.44, "elapsed_time": "0:28:33", "remaining_time": "0:00:27", "throughput": 6498.64, "total_tokens": 11137408} +{"current_steps": 11605, "total_steps": 11784, "loss": 0.0188, "lr": 1.4213155554525513e-09, "epoch": 0.9848099117447386, "percentage": 98.48, "elapsed_time": "0:28:34", "remaining_time": "0:00:26", "throughput": 6500.08, "total_tokens": 11142016} +{"current_steps": 11610, "total_steps": 11784, "loss": 0.0406, "lr": 1.343467713988522e-09, "epoch": 0.9852342158859471, "percentage": 98.52, "elapsed_time": "0:28:34", "remaining_time": "0:00:25", "throughput": 6501.52, "total_tokens": 11146624} +{"current_steps": 11615, "total_steps": 11784, "loss": 0.135, "lr": 1.2678108326621418e-09, "epoch": 0.9856585200271555, "percentage": 98.57, "elapsed_time": "0:28:34", "remaining_time": "0:00:24", "throughput": 6503.18, "total_tokens": 11151680} +{"current_steps": 11620, "total_steps": 11784, "loss": 0.0567, "lr": 1.1943450774574148e-09, "epoch": 0.9860828241683639, "percentage": 98.61, "elapsed_time": "0:28:35", "remaining_time": "0:00:24", "throughput": 6504.51, "total_tokens": 11156096} +{"current_steps": 11625, "total_steps": 11784, "loss": 0.0577, "lr": 1.1230706095516352e-09, "epoch": 0.9865071283095723, "percentage": 98.65, "elapsed_time": "0:28:35", "remaining_time": "0:00:23", "throughput": 6505.72, "total_tokens": 11160256} +{"current_steps": 11630, "total_steps": 11784, "loss": 0.0315, "lr": 1.0539875853142754e-09, "epoch": 0.9869314324507807, "percentage": 98.69, "elapsed_time": "0:28:35", "remaining_time": "0:00:22", "throughput": 6507.51, "total_tokens": 11165568} +{"current_steps": 11635, "total_steps": 11784, "loss": 0.071, "lr": 9.8709615630721e-10, "epoch": 0.9873557365919892, "percentage": 98.74, "elapsed_time": "0:28:36", "remaining_time": "0:00:21", "throughput": 6509.08, "total_tokens": 11170432} +{"current_steps": 11640, "total_steps": 11784, "loss": 0.0726, "lr": 9.22396469283937e-10, "epoch": 0.9877800407331976, "percentage": 98.78, "elapsed_time": "0:28:36", "remaining_time": "0:00:21", "throughput": 6510.53, "total_tokens": 11175104} +{"current_steps": 11645, "total_steps": 11784, "loss": 0.0355, "lr": 8.598886661895787e-10, "epoch": 0.988204344874406, "percentage": 98.82, "elapsed_time": "0:28:36", "remaining_time": "0:00:20", "throughput": 6511.91, "total_tokens": 11179584} +{"current_steps": 11650, "total_steps": 11784, "loss": 0.0425, "lr": 7.995728841605487e-10, "epoch": 0.9886286490156144, "percentage": 98.86, "elapsed_time": "0:28:37", "remaining_time": "0:00:19", "throughput": 6513.72, "total_tokens": 11184960} +{"current_steps": 11655, "total_steps": 11784, "loss": 0.0954, "lr": 7.41449255524107e-10, "epoch": 0.9890529531568228, "percentage": 98.91, "elapsed_time": "0:28:37", "remaining_time": "0:00:19", "throughput": 6515.59, "total_tokens": 11190464} +{"current_steps": 11660, "total_steps": 11784, "loss": 0.0764, "lr": 6.855179077981388e-10, "epoch": 0.9894772572980313, "percentage": 98.95, "elapsed_time": "0:28:37", "remaining_time": "0:00:18", "throughput": 6517.14, "total_tokens": 11195392} +{"current_steps": 11665, "total_steps": 11784, "loss": 0.0392, "lr": 6.3177896369071e-10, "epoch": 0.9899015614392397, "percentage": 98.99, "elapsed_time": "0:28:38", "remaining_time": "0:00:17", "throughput": 6518.6, "total_tokens": 11200128} +{"current_steps": 11670, "total_steps": 11784, "loss": 0.0484, "lr": 5.802325411001785e-10, "epoch": 0.9903258655804481, "percentage": 99.03, "elapsed_time": "0:28:38", "remaining_time": "0:00:16", "throughput": 6520.05, "total_tokens": 11204800} +{"current_steps": 11675, "total_steps": 11784, "loss": 0.0924, "lr": 5.308787531147496e-10, "epoch": 0.9907501697216565, "percentage": 99.08, "elapsed_time": "0:28:38", "remaining_time": "0:00:16", "throughput": 6521.5, "total_tokens": 11209472} +{"current_steps": 11680, "total_steps": 11784, "loss": 0.0679, "lr": 4.837177080119214e-10, "epoch": 0.991174473862865, "percentage": 99.12, "elapsed_time": "0:28:39", "remaining_time": "0:00:15", "throughput": 6522.89, "total_tokens": 11214016} +{"current_steps": 11685, "total_steps": 11784, "loss": 0.072, "lr": 4.387495092587068e-10, "epoch": 0.9915987780040734, "percentage": 99.16, "elapsed_time": "0:28:39", "remaining_time": "0:00:14", "throughput": 6524.37, "total_tokens": 11218752} +{"current_steps": 11690, "total_steps": 11784, "loss": 0.1148, "lr": 3.959742555111889e-10, "epoch": 0.9920230821452818, "percentage": 99.2, "elapsed_time": "0:28:39", "remaining_time": "0:00:13", "throughput": 6525.88, "total_tokens": 11223552} +{"current_steps": 11695, "total_steps": 11784, "loss": 0.0671, "lr": 3.553920406144106e-10, "epoch": 0.9924473862864902, "percentage": 99.24, "elapsed_time": "0:28:40", "remaining_time": "0:00:13", "throughput": 6527.24, "total_tokens": 11228032} +{"current_steps": 11700, "total_steps": 11784, "loss": 0.0562, "lr": 3.1700295360181927e-10, "epoch": 0.9928716904276986, "percentage": 99.29, "elapsed_time": "0:28:40", "remaining_time": "0:00:12", "throughput": 6528.52, "total_tokens": 11232448} +{"current_steps": 11705, "total_steps": 11784, "loss": 0.0858, "lr": 2.808070786955996e-10, "epoch": 0.993295994568907, "percentage": 99.33, "elapsed_time": "0:28:40", "remaining_time": "0:00:11", "throughput": 6530.13, "total_tokens": 11237440} +{"current_steps": 11710, "total_steps": 11784, "loss": 0.0601, "lr": 2.4680449530622984e-10, "epoch": 0.9937202987101154, "percentage": 99.37, "elapsed_time": "0:28:41", "remaining_time": "0:00:10", "throughput": 6531.54, "total_tokens": 11242048} +{"current_steps": 11715, "total_steps": 11784, "loss": 0.086, "lr": 2.1499527803214846e-10, "epoch": 0.9941446028513238, "percentage": 99.41, "elapsed_time": "0:28:41", "remaining_time": "0:00:10", "throughput": 6532.8, "total_tokens": 11246400} +{"current_steps": 11720, "total_steps": 11784, "loss": 0.0707, "lr": 1.8537949665997642e-10, "epoch": 0.9945689069925322, "percentage": 99.46, "elapsed_time": "0:28:41", "remaining_time": "0:00:09", "throughput": 6534.41, "total_tokens": 11251456} +{"current_steps": 11725, "total_steps": 11784, "loss": 0.0287, "lr": 1.5795721616373992e-10, "epoch": 0.9949932111337406, "percentage": 99.5, "elapsed_time": "0:28:42", "remaining_time": "0:00:08", "throughput": 6535.83, "total_tokens": 11256128} +{"current_steps": 11730, "total_steps": 11784, "loss": 0.0433, "lr": 1.3272849670564746e-10, "epoch": 0.995417515274949, "percentage": 99.54, "elapsed_time": "0:28:42", "remaining_time": "0:00:07", "throughput": 6537.23, "total_tokens": 11260736} +{"current_steps": 11735, "total_steps": 11784, "loss": 0.0669, "lr": 1.0969339363497975e-10, "epoch": 0.9958418194161575, "percentage": 99.58, "elapsed_time": "0:28:42", "remaining_time": "0:00:07", "throughput": 6538.39, "total_tokens": 11264896} +{"current_steps": 11740, "total_steps": 11784, "loss": 0.0291, "lr": 8.885195748875584e-11, "epoch": 0.9962661235573659, "percentage": 99.63, "elapsed_time": "0:28:43", "remaining_time": "0:00:06", "throughput": 6540.22, "total_tokens": 11270400} +{"current_steps": 11745, "total_steps": 11784, "loss": 0.0136, "lr": 7.020423399117791e-11, "epoch": 0.9966904276985743, "percentage": 99.67, "elapsed_time": "0:28:43", "remaining_time": "0:00:05", "throughput": 6541.63, "total_tokens": 11275008} +{"current_steps": 11750, "total_steps": 11784, "loss": 0.0776, "lr": 5.375026405352034e-11, "epoch": 0.9971147318397827, "percentage": 99.71, "elapsed_time": "0:28:43", "remaining_time": "0:00:04", "throughput": 6543.12, "total_tokens": 11279808} +{"current_steps": 11755, "total_steps": 11784, "loss": 0.0341, "lr": 3.949008377424068e-11, "epoch": 0.9975390359809911, "percentage": 99.75, "elapsed_time": "0:28:44", "remaining_time": "0:00:04", "throughput": 6544.26, "total_tokens": 11283904} +{"current_steps": 11760, "total_steps": 11784, "loss": 0.0601, "lr": 2.742372443909069e-11, "epoch": 0.9979633401221996, "percentage": 99.8, "elapsed_time": "0:28:44", "remaining_time": "0:00:03", "throughput": 6545.65, "total_tokens": 11288512} +{"current_steps": 11765, "total_steps": 11784, "loss": 0.1003, "lr": 1.7551212520339197e-11, "epoch": 0.998387644263408, "percentage": 99.84, "elapsed_time": "0:28:44", "remaining_time": "0:00:02", "throughput": 6547.25, "total_tokens": 11293568} +{"current_steps": 11770, "total_steps": 11784, "loss": 0.1294, "lr": 9.872569677438213e-12, "epoch": 0.9988119484046164, "percentage": 99.88, "elapsed_time": "0:28:45", "remaining_time": "0:00:02", "throughput": 6548.93, "total_tokens": 11298752} +{"current_steps": 11775, "total_steps": 11784, "loss": 0.04, "lr": 4.387812756578846e-12, "epoch": 0.9992362525458248, "percentage": 99.92, "elapsed_time": "0:28:45", "remaining_time": "0:00:01", "throughput": 6550.47, "total_tokens": 11303680} +{"current_steps": 11780, "total_steps": 11784, "loss": 0.0059, "lr": 1.0969537908023242e-12, "epoch": 0.9996605566870332, "percentage": 99.97, "elapsed_time": "0:28:45", "remaining_time": "0:00:00", "throughput": 6551.9, "total_tokens": 11308288} +{"current_steps": 11784, "total_steps": 11784, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:29:02", "remaining_time": "0:00:00", "throughput": 6492.68, "total_tokens": 11312256} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..83ef41b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,19063 @@ +{ + "best_global_step": 11210, + "best_metric": 0.05228454992175102, + "best_model_checkpoint": "saves_bts_preliminary/freeze/llama-3.2-1b-instruct/train_qnli_42_1779286680/checkpoint-11210", + "epoch": 1.0, + "eval_steps": 590, + "global_step": 11784, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0004243041412084182, + "grad_norm": 246.15960693359375, + "learning_rate": 6.785411365564037e-09, + "loss": 0.8772, + "num_input_tokens_seen": 4992, + "step": 5 + }, + { + "epoch": 0.0008486082824168364, + "grad_norm": 284.08428955078125, + "learning_rate": 1.526717557251908e-08, + "loss": 0.958, + "num_input_tokens_seen": 9536, + "step": 10 + }, + { + "epoch": 0.0012729124236252546, + "grad_norm": 248.28021240234375, + "learning_rate": 2.374893977947413e-08, + "loss": 0.8691, + "num_input_tokens_seen": 14016, + "step": 15 + }, + { + "epoch": 0.0016972165648336728, + "grad_norm": 297.8770446777344, + "learning_rate": 3.223070398642917e-08, + "loss": 0.8723, + "num_input_tokens_seen": 19648, + "step": 20 + }, + { + "epoch": 0.002121520706042091, + "grad_norm": 267.6341552734375, + "learning_rate": 4.0712468193384224e-08, + "loss": 0.8359, + "num_input_tokens_seen": 24768, + "step": 25 + }, + { + "epoch": 0.0025458248472505093, + "grad_norm": 284.44561767578125, + "learning_rate": 4.919423240033927e-08, + "loss": 0.8734, + "num_input_tokens_seen": 29952, + "step": 30 + }, + { + "epoch": 0.0029701289884589274, + "grad_norm": 267.7585144042969, + "learning_rate": 5.767599660729432e-08, + "loss": 0.8972, + "num_input_tokens_seen": 34304, + "step": 35 + }, + { + "epoch": 0.0033944331296673455, + "grad_norm": 300.0269470214844, + "learning_rate": 6.615776081424935e-08, + "loss": 0.8139, + "num_input_tokens_seen": 39360, + "step": 40 + }, + { + "epoch": 0.0038187372708757637, + "grad_norm": 247.20384216308594, + "learning_rate": 7.463952502120441e-08, + "loss": 0.8369, + "num_input_tokens_seen": 44480, + "step": 45 + }, + { + "epoch": 0.004243041412084182, + "grad_norm": 194.6239013671875, + "learning_rate": 8.312128922815945e-08, + "loss": 0.6877, + "num_input_tokens_seen": 49152, + "step": 50 + }, + { + "epoch": 0.0046673455532926, + "grad_norm": 150.263427734375, + "learning_rate": 9.16030534351145e-08, + "loss": 0.5185, + "num_input_tokens_seen": 53696, + "step": 55 + }, + { + "epoch": 0.0050916496945010185, + "grad_norm": 151.96859741210938, + "learning_rate": 1.0008481764206955e-07, + "loss": 0.4927, + "num_input_tokens_seen": 58560, + "step": 60 + }, + { + "epoch": 0.005515953835709436, + "grad_norm": 108.88196563720703, + "learning_rate": 1.085665818490246e-07, + "loss": 0.4352, + "num_input_tokens_seen": 63808, + "step": 65 + }, + { + "epoch": 0.005940257976917855, + "grad_norm": 31.13915252685547, + "learning_rate": 1.1704834605597964e-07, + "loss": 0.3313, + "num_input_tokens_seen": 68096, + "step": 70 + }, + { + "epoch": 0.006364562118126273, + "grad_norm": 19.339509963989258, + "learning_rate": 1.2553011026293469e-07, + "loss": 0.2376, + "num_input_tokens_seen": 72448, + "step": 75 + }, + { + "epoch": 0.006788866259334691, + "grad_norm": 21.34341812133789, + "learning_rate": 1.3401187446988974e-07, + "loss": 0.2135, + "num_input_tokens_seen": 78336, + "step": 80 + }, + { + "epoch": 0.00721317040054311, + "grad_norm": 28.049270629882812, + "learning_rate": 1.4249363867684477e-07, + "loss": 0.165, + "num_input_tokens_seen": 83072, + "step": 85 + }, + { + "epoch": 0.007637474541751527, + "grad_norm": 16.34543228149414, + "learning_rate": 1.509754028837998e-07, + "loss": 0.1788, + "num_input_tokens_seen": 88128, + "step": 90 + }, + { + "epoch": 0.008061778682959946, + "grad_norm": 58.355735778808594, + "learning_rate": 1.594571670907549e-07, + "loss": 0.1682, + "num_input_tokens_seen": 92992, + "step": 95 + }, + { + "epoch": 0.008486082824168364, + "grad_norm": 32.8367805480957, + "learning_rate": 1.6793893129770992e-07, + "loss": 0.16, + "num_input_tokens_seen": 98112, + "step": 100 + }, + { + "epoch": 0.008910386965376781, + "grad_norm": 23.30171775817871, + "learning_rate": 1.7642069550466495e-07, + "loss": 0.1406, + "num_input_tokens_seen": 102720, + "step": 105 + }, + { + "epoch": 0.0093346911065852, + "grad_norm": 10.516228675842285, + "learning_rate": 1.8490245971162e-07, + "loss": 0.156, + "num_input_tokens_seen": 107520, + "step": 110 + }, + { + "epoch": 0.009758995247793618, + "grad_norm": 16.174560546875, + "learning_rate": 1.9338422391857507e-07, + "loss": 0.17, + "num_input_tokens_seen": 112064, + "step": 115 + }, + { + "epoch": 0.010183299389002037, + "grad_norm": 11.447249412536621, + "learning_rate": 2.018659881255301e-07, + "loss": 0.1468, + "num_input_tokens_seen": 117184, + "step": 120 + }, + { + "epoch": 0.010607603530210456, + "grad_norm": 21.983373641967773, + "learning_rate": 2.1034775233248513e-07, + "loss": 0.1524, + "num_input_tokens_seen": 121792, + "step": 125 + }, + { + "epoch": 0.011031907671418872, + "grad_norm": 19.451534271240234, + "learning_rate": 2.188295165394402e-07, + "loss": 0.1493, + "num_input_tokens_seen": 126272, + "step": 130 + }, + { + "epoch": 0.011456211812627291, + "grad_norm": 44.81078338623047, + "learning_rate": 2.2731128074639524e-07, + "loss": 0.1492, + "num_input_tokens_seen": 130880, + "step": 135 + }, + { + "epoch": 0.01188051595383571, + "grad_norm": 17.800430297851562, + "learning_rate": 2.3579304495335027e-07, + "loss": 0.152, + "num_input_tokens_seen": 135552, + "step": 140 + }, + { + "epoch": 0.012304820095044128, + "grad_norm": 19.99517250061035, + "learning_rate": 2.442748091603053e-07, + "loss": 0.1422, + "num_input_tokens_seen": 140928, + "step": 145 + }, + { + "epoch": 0.012729124236252547, + "grad_norm": 43.71803665161133, + "learning_rate": 2.5275657336726036e-07, + "loss": 0.1357, + "num_input_tokens_seen": 146176, + "step": 150 + }, + { + "epoch": 0.013153428377460964, + "grad_norm": 21.6224365234375, + "learning_rate": 2.612383375742154e-07, + "loss": 0.1692, + "num_input_tokens_seen": 151680, + "step": 155 + }, + { + "epoch": 0.013577732518669382, + "grad_norm": 54.11295700073242, + "learning_rate": 2.697201017811705e-07, + "loss": 0.1577, + "num_input_tokens_seen": 156480, + "step": 160 + }, + { + "epoch": 0.0140020366598778, + "grad_norm": 92.01202392578125, + "learning_rate": 2.782018659881255e-07, + "loss": 0.1464, + "num_input_tokens_seen": 161024, + "step": 165 + }, + { + "epoch": 0.01442634080108622, + "grad_norm": 38.31109619140625, + "learning_rate": 2.866836301950806e-07, + "loss": 0.1223, + "num_input_tokens_seen": 165760, + "step": 170 + }, + { + "epoch": 0.014850644942294636, + "grad_norm": 16.718843460083008, + "learning_rate": 2.951653944020356e-07, + "loss": 0.1156, + "num_input_tokens_seen": 169984, + "step": 175 + }, + { + "epoch": 0.015274949083503055, + "grad_norm": 15.30700397491455, + "learning_rate": 3.0364715860899065e-07, + "loss": 0.1584, + "num_input_tokens_seen": 174528, + "step": 180 + }, + { + "epoch": 0.01569925322471147, + "grad_norm": 77.4272689819336, + "learning_rate": 3.121289228159457e-07, + "loss": 0.1117, + "num_input_tokens_seen": 179136, + "step": 185 + }, + { + "epoch": 0.016123557365919892, + "grad_norm": 33.88642883300781, + "learning_rate": 3.206106870229007e-07, + "loss": 0.1283, + "num_input_tokens_seen": 183424, + "step": 190 + }, + { + "epoch": 0.01654786150712831, + "grad_norm": 12.441794395446777, + "learning_rate": 3.2909245122985577e-07, + "loss": 0.0957, + "num_input_tokens_seen": 187968, + "step": 195 + }, + { + "epoch": 0.01697216564833673, + "grad_norm": 23.63111686706543, + "learning_rate": 3.375742154368109e-07, + "loss": 0.1221, + "num_input_tokens_seen": 193152, + "step": 200 + }, + { + "epoch": 0.017396469789545146, + "grad_norm": 48.66728210449219, + "learning_rate": 3.460559796437659e-07, + "loss": 0.1305, + "num_input_tokens_seen": 197632, + "step": 205 + }, + { + "epoch": 0.017820773930753563, + "grad_norm": 22.497852325439453, + "learning_rate": 3.5453774385072094e-07, + "loss": 0.0971, + "num_input_tokens_seen": 202304, + "step": 210 + }, + { + "epoch": 0.018245078071961983, + "grad_norm": 47.47602844238281, + "learning_rate": 3.63019508057676e-07, + "loss": 0.1392, + "num_input_tokens_seen": 207040, + "step": 215 + }, + { + "epoch": 0.0186693822131704, + "grad_norm": 27.37762451171875, + "learning_rate": 3.71501272264631e-07, + "loss": 0.1071, + "num_input_tokens_seen": 212480, + "step": 220 + }, + { + "epoch": 0.01909368635437882, + "grad_norm": 73.55374908447266, + "learning_rate": 3.7998303647158606e-07, + "loss": 0.1398, + "num_input_tokens_seen": 217728, + "step": 225 + }, + { + "epoch": 0.019517990495587237, + "grad_norm": 31.551063537597656, + "learning_rate": 3.8846480067854107e-07, + "loss": 0.0853, + "num_input_tokens_seen": 221888, + "step": 230 + }, + { + "epoch": 0.019942294636795654, + "grad_norm": 9.147037506103516, + "learning_rate": 3.969465648854962e-07, + "loss": 0.0764, + "num_input_tokens_seen": 226496, + "step": 235 + }, + { + "epoch": 0.020366598778004074, + "grad_norm": 24.393016815185547, + "learning_rate": 4.0542832909245124e-07, + "loss": 0.1296, + "num_input_tokens_seen": 230720, + "step": 240 + }, + { + "epoch": 0.02079090291921249, + "grad_norm": 126.87380981445312, + "learning_rate": 4.1391009329940624e-07, + "loss": 0.2537, + "num_input_tokens_seen": 235584, + "step": 245 + }, + { + "epoch": 0.02121520706042091, + "grad_norm": 12.414925575256348, + "learning_rate": 4.223918575063613e-07, + "loss": 0.0764, + "num_input_tokens_seen": 241088, + "step": 250 + }, + { + "epoch": 0.021639511201629328, + "grad_norm": 18.61178970336914, + "learning_rate": 4.3087362171331635e-07, + "loss": 0.0627, + "num_input_tokens_seen": 245568, + "step": 255 + }, + { + "epoch": 0.022063815342837745, + "grad_norm": 52.55159378051758, + "learning_rate": 4.3935538592027136e-07, + "loss": 0.0585, + "num_input_tokens_seen": 250304, + "step": 260 + }, + { + "epoch": 0.022488119484046165, + "grad_norm": 39.259368896484375, + "learning_rate": 4.4783715012722647e-07, + "loss": 0.0933, + "num_input_tokens_seen": 255232, + "step": 265 + }, + { + "epoch": 0.022912423625254582, + "grad_norm": 5.972058296203613, + "learning_rate": 4.5631891433418153e-07, + "loss": 0.0946, + "num_input_tokens_seen": 259840, + "step": 270 + }, + { + "epoch": 0.023336727766463002, + "grad_norm": 27.744028091430664, + "learning_rate": 4.6480067854113653e-07, + "loss": 0.081, + "num_input_tokens_seen": 264768, + "step": 275 + }, + { + "epoch": 0.02376103190767142, + "grad_norm": 48.95866394042969, + "learning_rate": 4.732824427480916e-07, + "loss": 0.1051, + "num_input_tokens_seen": 270016, + "step": 280 + }, + { + "epoch": 0.024185336048879836, + "grad_norm": 29.231962203979492, + "learning_rate": 4.817642069550466e-07, + "loss": 0.0954, + "num_input_tokens_seen": 274496, + "step": 285 + }, + { + "epoch": 0.024609640190088256, + "grad_norm": 21.682893753051758, + "learning_rate": 4.902459711620017e-07, + "loss": 0.0791, + "num_input_tokens_seen": 279296, + "step": 290 + }, + { + "epoch": 0.025033944331296673, + "grad_norm": 66.02873229980469, + "learning_rate": 4.987277353689568e-07, + "loss": 0.077, + "num_input_tokens_seen": 284288, + "step": 295 + }, + { + "epoch": 0.025458248472505093, + "grad_norm": 30.446861267089844, + "learning_rate": 5.072094995759117e-07, + "loss": 0.1016, + "num_input_tokens_seen": 289088, + "step": 300 + }, + { + "epoch": 0.02588255261371351, + "grad_norm": 34.33888244628906, + "learning_rate": 5.156912637828668e-07, + "loss": 0.1201, + "num_input_tokens_seen": 293632, + "step": 305 + }, + { + "epoch": 0.026306856754921927, + "grad_norm": 15.951445579528809, + "learning_rate": 5.241730279898219e-07, + "loss": 0.0812, + "num_input_tokens_seen": 298176, + "step": 310 + }, + { + "epoch": 0.026731160896130347, + "grad_norm": 34.837608337402344, + "learning_rate": 5.326547921967769e-07, + "loss": 0.1464, + "num_input_tokens_seen": 302720, + "step": 315 + }, + { + "epoch": 0.027155465037338764, + "grad_norm": 66.30300903320312, + "learning_rate": 5.411365564037319e-07, + "loss": 0.1395, + "num_input_tokens_seen": 307648, + "step": 320 + }, + { + "epoch": 0.02757976917854718, + "grad_norm": 10.806644439697266, + "learning_rate": 5.49618320610687e-07, + "loss": 0.0854, + "num_input_tokens_seen": 312832, + "step": 325 + }, + { + "epoch": 0.0280040733197556, + "grad_norm": 45.027854919433594, + "learning_rate": 5.581000848176421e-07, + "loss": 0.0932, + "num_input_tokens_seen": 317376, + "step": 330 + }, + { + "epoch": 0.028428377460964018, + "grad_norm": 16.889318466186523, + "learning_rate": 5.66581849024597e-07, + "loss": 0.0609, + "num_input_tokens_seen": 322560, + "step": 335 + }, + { + "epoch": 0.02885268160217244, + "grad_norm": 28.992074966430664, + "learning_rate": 5.750636132315522e-07, + "loss": 0.0641, + "num_input_tokens_seen": 327104, + "step": 340 + }, + { + "epoch": 0.029276985743380855, + "grad_norm": 32.16653060913086, + "learning_rate": 5.835453774385072e-07, + "loss": 0.0712, + "num_input_tokens_seen": 332160, + "step": 345 + }, + { + "epoch": 0.029701289884589272, + "grad_norm": 64.0754623413086, + "learning_rate": 5.920271416454622e-07, + "loss": 0.0986, + "num_input_tokens_seen": 336960, + "step": 350 + }, + { + "epoch": 0.030125594025797692, + "grad_norm": 11.283415794372559, + "learning_rate": 6.005089058524173e-07, + "loss": 0.1444, + "num_input_tokens_seen": 341696, + "step": 355 + }, + { + "epoch": 0.03054989816700611, + "grad_norm": 47.3290901184082, + "learning_rate": 6.089906700593723e-07, + "loss": 0.1736, + "num_input_tokens_seen": 347008, + "step": 360 + }, + { + "epoch": 0.03097420230821453, + "grad_norm": 44.4918098449707, + "learning_rate": 6.174724342663274e-07, + "loss": 0.1123, + "num_input_tokens_seen": 352256, + "step": 365 + }, + { + "epoch": 0.03139850644942294, + "grad_norm": 13.7459135055542, + "learning_rate": 6.259541984732824e-07, + "loss": 0.0541, + "num_input_tokens_seen": 357312, + "step": 370 + }, + { + "epoch": 0.03182281059063136, + "grad_norm": 44.1005973815918, + "learning_rate": 6.344359626802375e-07, + "loss": 0.151, + "num_input_tokens_seen": 361728, + "step": 375 + }, + { + "epoch": 0.032247114731839784, + "grad_norm": 34.00252914428711, + "learning_rate": 6.429177268871925e-07, + "loss": 0.1249, + "num_input_tokens_seen": 366592, + "step": 380 + }, + { + "epoch": 0.032671418873048204, + "grad_norm": 41.16194152832031, + "learning_rate": 6.513994910941476e-07, + "loss": 0.2083, + "num_input_tokens_seen": 371392, + "step": 385 + }, + { + "epoch": 0.03309572301425662, + "grad_norm": 16.21058464050293, + "learning_rate": 6.598812553011026e-07, + "loss": 0.0916, + "num_input_tokens_seen": 376640, + "step": 390 + }, + { + "epoch": 0.03352002715546504, + "grad_norm": 33.83322525024414, + "learning_rate": 6.683630195080576e-07, + "loss": 0.1429, + "num_input_tokens_seen": 381504, + "step": 395 + }, + { + "epoch": 0.03394433129667346, + "grad_norm": 39.9620475769043, + "learning_rate": 6.768447837150128e-07, + "loss": 0.1211, + "num_input_tokens_seen": 385920, + "step": 400 + }, + { + "epoch": 0.03436863543788187, + "grad_norm": 47.01149368286133, + "learning_rate": 6.853265479219677e-07, + "loss": 0.1086, + "num_input_tokens_seen": 390272, + "step": 405 + }, + { + "epoch": 0.03479293957909029, + "grad_norm": 10.420038223266602, + "learning_rate": 6.938083121289228e-07, + "loss": 0.1208, + "num_input_tokens_seen": 396160, + "step": 410 + }, + { + "epoch": 0.03521724372029871, + "grad_norm": 10.787165641784668, + "learning_rate": 7.022900763358778e-07, + "loss": 0.0872, + "num_input_tokens_seen": 400768, + "step": 415 + }, + { + "epoch": 0.035641547861507125, + "grad_norm": 12.446135520935059, + "learning_rate": 7.107718405428329e-07, + "loss": 0.0613, + "num_input_tokens_seen": 405504, + "step": 420 + }, + { + "epoch": 0.036065852002715545, + "grad_norm": 24.936521530151367, + "learning_rate": 7.192536047497879e-07, + "loss": 0.0373, + "num_input_tokens_seen": 410176, + "step": 425 + }, + { + "epoch": 0.036490156143923966, + "grad_norm": 6.019582748413086, + "learning_rate": 7.277353689567429e-07, + "loss": 0.0669, + "num_input_tokens_seen": 415040, + "step": 430 + }, + { + "epoch": 0.036914460285132386, + "grad_norm": 63.409305572509766, + "learning_rate": 7.36217133163698e-07, + "loss": 0.1154, + "num_input_tokens_seen": 419968, + "step": 435 + }, + { + "epoch": 0.0373387644263408, + "grad_norm": 75.0130615234375, + "learning_rate": 7.446988973706531e-07, + "loss": 0.2025, + "num_input_tokens_seen": 424832, + "step": 440 + }, + { + "epoch": 0.03776306856754922, + "grad_norm": 48.14998245239258, + "learning_rate": 7.531806615776081e-07, + "loss": 0.3266, + "num_input_tokens_seen": 432064, + "step": 445 + }, + { + "epoch": 0.03818737270875764, + "grad_norm": 28.366230010986328, + "learning_rate": 7.616624257845632e-07, + "loss": 0.0826, + "num_input_tokens_seen": 437184, + "step": 450 + }, + { + "epoch": 0.03861167684996605, + "grad_norm": 21.336973190307617, + "learning_rate": 7.701441899915182e-07, + "loss": 0.102, + "num_input_tokens_seen": 442432, + "step": 455 + }, + { + "epoch": 0.039035980991174474, + "grad_norm": 26.27196502685547, + "learning_rate": 7.786259541984732e-07, + "loss": 0.0454, + "num_input_tokens_seen": 447040, + "step": 460 + }, + { + "epoch": 0.039460285132382894, + "grad_norm": 53.3596305847168, + "learning_rate": 7.871077184054283e-07, + "loss": 0.1319, + "num_input_tokens_seen": 452032, + "step": 465 + }, + { + "epoch": 0.03988458927359131, + "grad_norm": 22.736515045166016, + "learning_rate": 7.955894826123833e-07, + "loss": 0.0636, + "num_input_tokens_seen": 457344, + "step": 470 + }, + { + "epoch": 0.04030889341479973, + "grad_norm": 26.367496490478516, + "learning_rate": 8.040712468193384e-07, + "loss": 0.1349, + "num_input_tokens_seen": 462336, + "step": 475 + }, + { + "epoch": 0.04073319755600815, + "grad_norm": 22.535175323486328, + "learning_rate": 8.125530110262935e-07, + "loss": 0.0904, + "num_input_tokens_seen": 467072, + "step": 480 + }, + { + "epoch": 0.04115750169721656, + "grad_norm": 23.10480308532715, + "learning_rate": 8.210347752332485e-07, + "loss": 0.0983, + "num_input_tokens_seen": 471616, + "step": 485 + }, + { + "epoch": 0.04158180583842498, + "grad_norm": 26.855924606323242, + "learning_rate": 8.295165394402035e-07, + "loss": 0.0815, + "num_input_tokens_seen": 476480, + "step": 490 + }, + { + "epoch": 0.0420061099796334, + "grad_norm": 27.23138999938965, + "learning_rate": 8.379983036471586e-07, + "loss": 0.0929, + "num_input_tokens_seen": 481536, + "step": 495 + }, + { + "epoch": 0.04243041412084182, + "grad_norm": 14.400825500488281, + "learning_rate": 8.464800678541136e-07, + "loss": 0.1216, + "num_input_tokens_seen": 486336, + "step": 500 + }, + { + "epoch": 0.042854718262050236, + "grad_norm": 36.2694091796875, + "learning_rate": 8.549618320610686e-07, + "loss": 0.1143, + "num_input_tokens_seen": 490944, + "step": 505 + }, + { + "epoch": 0.043279022403258656, + "grad_norm": 7.930865287780762, + "learning_rate": 8.634435962680237e-07, + "loss": 0.0585, + "num_input_tokens_seen": 495488, + "step": 510 + }, + { + "epoch": 0.043703326544467076, + "grad_norm": 17.148012161254883, + "learning_rate": 8.719253604749788e-07, + "loss": 0.1, + "num_input_tokens_seen": 499840, + "step": 515 + }, + { + "epoch": 0.04412763068567549, + "grad_norm": 24.2634334564209, + "learning_rate": 8.804071246819338e-07, + "loss": 0.0818, + "num_input_tokens_seen": 504512, + "step": 520 + }, + { + "epoch": 0.04455193482688391, + "grad_norm": 27.504880905151367, + "learning_rate": 8.888888888888888e-07, + "loss": 0.0705, + "num_input_tokens_seen": 509376, + "step": 525 + }, + { + "epoch": 0.04497623896809233, + "grad_norm": 29.78805923461914, + "learning_rate": 8.973706530958439e-07, + "loss": 0.07, + "num_input_tokens_seen": 513856, + "step": 530 + }, + { + "epoch": 0.045400543109300744, + "grad_norm": 31.55817413330078, + "learning_rate": 9.058524173027989e-07, + "loss": 0.107, + "num_input_tokens_seen": 518976, + "step": 535 + }, + { + "epoch": 0.045824847250509164, + "grad_norm": 68.85687255859375, + "learning_rate": 9.143341815097539e-07, + "loss": 0.0958, + "num_input_tokens_seen": 524160, + "step": 540 + }, + { + "epoch": 0.046249151391717584, + "grad_norm": 32.40176773071289, + "learning_rate": 9.228159457167091e-07, + "loss": 0.1709, + "num_input_tokens_seen": 529152, + "step": 545 + }, + { + "epoch": 0.046673455532926005, + "grad_norm": 67.73282623291016, + "learning_rate": 9.312977099236641e-07, + "loss": 0.1825, + "num_input_tokens_seen": 533824, + "step": 550 + }, + { + "epoch": 0.04709775967413442, + "grad_norm": 13.536025047302246, + "learning_rate": 9.397794741306191e-07, + "loss": 0.0982, + "num_input_tokens_seen": 538560, + "step": 555 + }, + { + "epoch": 0.04752206381534284, + "grad_norm": 20.389511108398438, + "learning_rate": 9.482612383375742e-07, + "loss": 0.1049, + "num_input_tokens_seen": 542784, + "step": 560 + }, + { + "epoch": 0.04794636795655126, + "grad_norm": 13.790285110473633, + "learning_rate": 9.567430025445291e-07, + "loss": 0.1076, + "num_input_tokens_seen": 547840, + "step": 565 + }, + { + "epoch": 0.04837067209775967, + "grad_norm": 9.404131889343262, + "learning_rate": 9.652247667514842e-07, + "loss": 0.0785, + "num_input_tokens_seen": 552448, + "step": 570 + }, + { + "epoch": 0.04879497623896809, + "grad_norm": 11.799363136291504, + "learning_rate": 9.737065309584394e-07, + "loss": 0.1165, + "num_input_tokens_seen": 557120, + "step": 575 + }, + { + "epoch": 0.04921928038017651, + "grad_norm": 12.816581726074219, + "learning_rate": 9.821882951653943e-07, + "loss": 0.1119, + "num_input_tokens_seen": 561536, + "step": 580 + }, + { + "epoch": 0.049643584521384926, + "grad_norm": 16.07106590270996, + "learning_rate": 9.906700593723493e-07, + "loss": 0.0704, + "num_input_tokens_seen": 566336, + "step": 585 + }, + { + "epoch": 0.050067888662593346, + "grad_norm": 11.079269409179688, + "learning_rate": 9.991518235793044e-07, + "loss": 0.0929, + "num_input_tokens_seen": 571072, + "step": 590 + }, + { + "epoch": 0.050067888662593346, + "eval_loss": 0.08071617037057877, + "eval_runtime": 15.6888, + "eval_samples_per_second": 667.674, + "eval_steps_per_second": 83.499, + "num_input_tokens_seen": 571072, + "step": 590 + }, + { + "epoch": 0.050492192803801766, + "grad_norm": 21.24359893798828, + "learning_rate": 1.0076335877862595e-06, + "loss": 0.0317, + "num_input_tokens_seen": 576192, + "step": 595 + }, + { + "epoch": 0.05091649694501019, + "grad_norm": 38.84645462036133, + "learning_rate": 1.0161153519932147e-06, + "loss": 0.1199, + "num_input_tokens_seen": 580928, + "step": 600 + }, + { + "epoch": 0.0513408010862186, + "grad_norm": 16.443923950195312, + "learning_rate": 1.0245971162001696e-06, + "loss": 0.1215, + "num_input_tokens_seen": 585728, + "step": 605 + }, + { + "epoch": 0.05176510522742702, + "grad_norm": 51.92510223388672, + "learning_rate": 1.0330788804071246e-06, + "loss": 0.0926, + "num_input_tokens_seen": 591040, + "step": 610 + }, + { + "epoch": 0.05218940936863544, + "grad_norm": 34.422847747802734, + "learning_rate": 1.0415606446140797e-06, + "loss": 0.1686, + "num_input_tokens_seen": 595584, + "step": 615 + }, + { + "epoch": 0.052613713509843854, + "grad_norm": 28.12080192565918, + "learning_rate": 1.0500424088210348e-06, + "loss": 0.1433, + "num_input_tokens_seen": 600384, + "step": 620 + }, + { + "epoch": 0.053038017651052274, + "grad_norm": 17.771556854248047, + "learning_rate": 1.0585241730279896e-06, + "loss": 0.058, + "num_input_tokens_seen": 605248, + "step": 625 + }, + { + "epoch": 0.053462321792260695, + "grad_norm": 7.092148303985596, + "learning_rate": 1.0670059372349449e-06, + "loss": 0.0805, + "num_input_tokens_seen": 609856, + "step": 630 + }, + { + "epoch": 0.05388662593346911, + "grad_norm": 11.3302001953125, + "learning_rate": 1.0754877014419e-06, + "loss": 0.0443, + "num_input_tokens_seen": 614976, + "step": 635 + }, + { + "epoch": 0.05431093007467753, + "grad_norm": 28.04367446899414, + "learning_rate": 1.083969465648855e-06, + "loss": 0.0862, + "num_input_tokens_seen": 619648, + "step": 640 + }, + { + "epoch": 0.05473523421588595, + "grad_norm": 25.472623825073242, + "learning_rate": 1.09245122985581e-06, + "loss": 0.0478, + "num_input_tokens_seen": 624896, + "step": 645 + }, + { + "epoch": 0.05515953835709436, + "grad_norm": 16.80830192565918, + "learning_rate": 1.1009329940627649e-06, + "loss": 0.1452, + "num_input_tokens_seen": 629632, + "step": 650 + }, + { + "epoch": 0.05558384249830278, + "grad_norm": 21.809473037719727, + "learning_rate": 1.10941475826972e-06, + "loss": 0.0974, + "num_input_tokens_seen": 634624, + "step": 655 + }, + { + "epoch": 0.0560081466395112, + "grad_norm": 25.004167556762695, + "learning_rate": 1.1178965224766752e-06, + "loss": 0.0974, + "num_input_tokens_seen": 639360, + "step": 660 + }, + { + "epoch": 0.05643245078071962, + "grad_norm": 22.970441818237305, + "learning_rate": 1.1263782866836303e-06, + "loss": 0.038, + "num_input_tokens_seen": 644032, + "step": 665 + }, + { + "epoch": 0.056856754921928036, + "grad_norm": 17.413970947265625, + "learning_rate": 1.1348600508905853e-06, + "loss": 0.0969, + "num_input_tokens_seen": 648256, + "step": 670 + }, + { + "epoch": 0.05728105906313646, + "grad_norm": 28.037620544433594, + "learning_rate": 1.1433418150975402e-06, + "loss": 0.1091, + "num_input_tokens_seen": 653056, + "step": 675 + }, + { + "epoch": 0.05770536320434488, + "grad_norm": 36.98174285888672, + "learning_rate": 1.1518235793044952e-06, + "loss": 0.0743, + "num_input_tokens_seen": 657664, + "step": 680 + }, + { + "epoch": 0.05812966734555329, + "grad_norm": 27.782451629638672, + "learning_rate": 1.1603053435114503e-06, + "loss": 0.0379, + "num_input_tokens_seen": 662016, + "step": 685 + }, + { + "epoch": 0.05855397148676171, + "grad_norm": 6.267802715301514, + "learning_rate": 1.1687871077184053e-06, + "loss": 0.1067, + "num_input_tokens_seen": 666368, + "step": 690 + }, + { + "epoch": 0.05897827562797013, + "grad_norm": 27.76127815246582, + "learning_rate": 1.1772688719253606e-06, + "loss": 0.1044, + "num_input_tokens_seen": 671616, + "step": 695 + }, + { + "epoch": 0.059402579769178544, + "grad_norm": 1.9939544200897217, + "learning_rate": 1.1857506361323155e-06, + "loss": 0.0405, + "num_input_tokens_seen": 676288, + "step": 700 + }, + { + "epoch": 0.059826883910386965, + "grad_norm": 5.708540916442871, + "learning_rate": 1.1942324003392705e-06, + "loss": 0.0789, + "num_input_tokens_seen": 680960, + "step": 705 + }, + { + "epoch": 0.060251188051595385, + "grad_norm": 18.987085342407227, + "learning_rate": 1.2027141645462256e-06, + "loss": 0.0686, + "num_input_tokens_seen": 685440, + "step": 710 + }, + { + "epoch": 0.060675492192803805, + "grad_norm": 29.056886672973633, + "learning_rate": 1.2111959287531806e-06, + "loss": 0.1027, + "num_input_tokens_seen": 690304, + "step": 715 + }, + { + "epoch": 0.06109979633401222, + "grad_norm": 24.722980499267578, + "learning_rate": 1.2196776929601355e-06, + "loss": 0.0782, + "num_input_tokens_seen": 695040, + "step": 720 + }, + { + "epoch": 0.06152410047522064, + "grad_norm": 12.598089218139648, + "learning_rate": 1.2281594571670907e-06, + "loss": 0.0291, + "num_input_tokens_seen": 699456, + "step": 725 + }, + { + "epoch": 0.06194840461642906, + "grad_norm": 36.17939758300781, + "learning_rate": 1.2366412213740458e-06, + "loss": 0.1563, + "num_input_tokens_seen": 704064, + "step": 730 + }, + { + "epoch": 0.06237270875763747, + "grad_norm": 3.6635186672210693, + "learning_rate": 1.2451229855810009e-06, + "loss": 0.0311, + "num_input_tokens_seen": 708544, + "step": 735 + }, + { + "epoch": 0.06279701289884589, + "grad_norm": 36.30998611450195, + "learning_rate": 1.253604749787956e-06, + "loss": 0.2106, + "num_input_tokens_seen": 713088, + "step": 740 + }, + { + "epoch": 0.0632213170400543, + "grad_norm": 19.025728225708008, + "learning_rate": 1.2620865139949108e-06, + "loss": 0.1061, + "num_input_tokens_seen": 718016, + "step": 745 + }, + { + "epoch": 0.06364562118126273, + "grad_norm": 19.909170150756836, + "learning_rate": 1.2705682782018658e-06, + "loss": 0.042, + "num_input_tokens_seen": 722752, + "step": 750 + }, + { + "epoch": 0.06406992532247115, + "grad_norm": 19.725980758666992, + "learning_rate": 1.279050042408821e-06, + "loss": 0.0744, + "num_input_tokens_seen": 727872, + "step": 755 + }, + { + "epoch": 0.06449422946367957, + "grad_norm": 21.742664337158203, + "learning_rate": 1.2875318066157761e-06, + "loss": 0.1042, + "num_input_tokens_seen": 732480, + "step": 760 + }, + { + "epoch": 0.06491853360488799, + "grad_norm": 4.472809791564941, + "learning_rate": 1.2960135708227312e-06, + "loss": 0.0475, + "num_input_tokens_seen": 736704, + "step": 765 + }, + { + "epoch": 0.06534283774609641, + "grad_norm": 8.560938835144043, + "learning_rate": 1.304495335029686e-06, + "loss": 0.0575, + "num_input_tokens_seen": 742848, + "step": 770 + }, + { + "epoch": 0.06576714188730481, + "grad_norm": 4.725236892700195, + "learning_rate": 1.3129770992366411e-06, + "loss": 0.1154, + "num_input_tokens_seen": 747072, + "step": 775 + }, + { + "epoch": 0.06619144602851323, + "grad_norm": 1.102369785308838, + "learning_rate": 1.3214588634435962e-06, + "loss": 0.0311, + "num_input_tokens_seen": 751808, + "step": 780 + }, + { + "epoch": 0.06661575016972165, + "grad_norm": 34.201229095458984, + "learning_rate": 1.3299406276505512e-06, + "loss": 0.0878, + "num_input_tokens_seen": 756800, + "step": 785 + }, + { + "epoch": 0.06704005431093008, + "grad_norm": 27.980588912963867, + "learning_rate": 1.3384223918575063e-06, + "loss": 0.1431, + "num_input_tokens_seen": 761536, + "step": 790 + }, + { + "epoch": 0.0674643584521385, + "grad_norm": 28.48628807067871, + "learning_rate": 1.3469041560644613e-06, + "loss": 0.1062, + "num_input_tokens_seen": 765824, + "step": 795 + }, + { + "epoch": 0.06788866259334692, + "grad_norm": 2.4105472564697266, + "learning_rate": 1.3553859202714164e-06, + "loss": 0.3519, + "num_input_tokens_seen": 770240, + "step": 800 + }, + { + "epoch": 0.06831296673455534, + "grad_norm": 2.4612503051757812, + "learning_rate": 1.3638676844783715e-06, + "loss": 0.1262, + "num_input_tokens_seen": 775424, + "step": 805 + }, + { + "epoch": 0.06873727087576374, + "grad_norm": 11.974044799804688, + "learning_rate": 1.3723494486853265e-06, + "loss": 0.061, + "num_input_tokens_seen": 779904, + "step": 810 + }, + { + "epoch": 0.06916157501697216, + "grad_norm": 11.464366912841797, + "learning_rate": 1.3808312128922814e-06, + "loss": 0.0747, + "num_input_tokens_seen": 784512, + "step": 815 + }, + { + "epoch": 0.06958587915818058, + "grad_norm": 20.74694061279297, + "learning_rate": 1.3893129770992366e-06, + "loss": 0.078, + "num_input_tokens_seen": 789440, + "step": 820 + }, + { + "epoch": 0.070010183299389, + "grad_norm": 20.158458709716797, + "learning_rate": 1.3977947413061917e-06, + "loss": 0.1098, + "num_input_tokens_seen": 794240, + "step": 825 + }, + { + "epoch": 0.07043448744059742, + "grad_norm": 8.966534614562988, + "learning_rate": 1.4062765055131467e-06, + "loss": 0.139, + "num_input_tokens_seen": 798592, + "step": 830 + }, + { + "epoch": 0.07085879158180584, + "grad_norm": 31.50881004333496, + "learning_rate": 1.4147582697201018e-06, + "loss": 0.1213, + "num_input_tokens_seen": 803776, + "step": 835 + }, + { + "epoch": 0.07128309572301425, + "grad_norm": 6.87647008895874, + "learning_rate": 1.4232400339270566e-06, + "loss": 0.0552, + "num_input_tokens_seen": 809088, + "step": 840 + }, + { + "epoch": 0.07170739986422267, + "grad_norm": 15.782602310180664, + "learning_rate": 1.4317217981340117e-06, + "loss": 0.0523, + "num_input_tokens_seen": 813824, + "step": 845 + }, + { + "epoch": 0.07213170400543109, + "grad_norm": 5.775845527648926, + "learning_rate": 1.440203562340967e-06, + "loss": 0.0617, + "num_input_tokens_seen": 818240, + "step": 850 + }, + { + "epoch": 0.07255600814663951, + "grad_norm": 14.254950523376465, + "learning_rate": 1.448685326547922e-06, + "loss": 0.1225, + "num_input_tokens_seen": 822400, + "step": 855 + }, + { + "epoch": 0.07298031228784793, + "grad_norm": 38.01359558105469, + "learning_rate": 1.457167090754877e-06, + "loss": 0.1247, + "num_input_tokens_seen": 827328, + "step": 860 + }, + { + "epoch": 0.07340461642905635, + "grad_norm": 15.854536056518555, + "learning_rate": 1.465648854961832e-06, + "loss": 0.1375, + "num_input_tokens_seen": 832320, + "step": 865 + }, + { + "epoch": 0.07382892057026477, + "grad_norm": 21.019590377807617, + "learning_rate": 1.474130619168787e-06, + "loss": 0.0976, + "num_input_tokens_seen": 837376, + "step": 870 + }, + { + "epoch": 0.07425322471147318, + "grad_norm": 15.762535095214844, + "learning_rate": 1.482612383375742e-06, + "loss": 0.0646, + "num_input_tokens_seen": 843008, + "step": 875 + }, + { + "epoch": 0.0746775288526816, + "grad_norm": 11.819944381713867, + "learning_rate": 1.491094147582697e-06, + "loss": 0.1057, + "num_input_tokens_seen": 847680, + "step": 880 + }, + { + "epoch": 0.07510183299389002, + "grad_norm": 11.814828872680664, + "learning_rate": 1.4995759117896522e-06, + "loss": 0.0703, + "num_input_tokens_seen": 852288, + "step": 885 + }, + { + "epoch": 0.07552613713509844, + "grad_norm": 19.450414657592773, + "learning_rate": 1.5080576759966072e-06, + "loss": 0.0832, + "num_input_tokens_seen": 857280, + "step": 890 + }, + { + "epoch": 0.07595044127630686, + "grad_norm": 15.626432418823242, + "learning_rate": 1.5165394402035623e-06, + "loss": 0.0415, + "num_input_tokens_seen": 862080, + "step": 895 + }, + { + "epoch": 0.07637474541751528, + "grad_norm": 20.097511291503906, + "learning_rate": 1.5250212044105173e-06, + "loss": 0.1667, + "num_input_tokens_seen": 866624, + "step": 900 + }, + { + "epoch": 0.07679904955872369, + "grad_norm": 18.306550979614258, + "learning_rate": 1.5335029686174724e-06, + "loss": 0.0798, + "num_input_tokens_seen": 871360, + "step": 905 + }, + { + "epoch": 0.0772233536999321, + "grad_norm": 20.8496150970459, + "learning_rate": 1.5419847328244272e-06, + "loss": 0.1418, + "num_input_tokens_seen": 877632, + "step": 910 + }, + { + "epoch": 0.07764765784114053, + "grad_norm": 8.582545280456543, + "learning_rate": 1.5504664970313825e-06, + "loss": 0.0659, + "num_input_tokens_seen": 881600, + "step": 915 + }, + { + "epoch": 0.07807196198234895, + "grad_norm": 14.078996658325195, + "learning_rate": 1.5589482612383376e-06, + "loss": 0.0591, + "num_input_tokens_seen": 886400, + "step": 920 + }, + { + "epoch": 0.07849626612355737, + "grad_norm": 16.29892349243164, + "learning_rate": 1.5674300254452926e-06, + "loss": 0.0906, + "num_input_tokens_seen": 890880, + "step": 925 + }, + { + "epoch": 0.07892057026476579, + "grad_norm": 20.222871780395508, + "learning_rate": 1.5759117896522477e-06, + "loss": 0.076, + "num_input_tokens_seen": 895744, + "step": 930 + }, + { + "epoch": 0.07934487440597421, + "grad_norm": 35.49565505981445, + "learning_rate": 1.5843935538592025e-06, + "loss": 0.1215, + "num_input_tokens_seen": 900224, + "step": 935 + }, + { + "epoch": 0.07976917854718261, + "grad_norm": 9.415274620056152, + "learning_rate": 1.5928753180661576e-06, + "loss": 0.0687, + "num_input_tokens_seen": 905280, + "step": 940 + }, + { + "epoch": 0.08019348268839104, + "grad_norm": 4.259697914123535, + "learning_rate": 1.6013570822731128e-06, + "loss": 0.0859, + "num_input_tokens_seen": 910336, + "step": 945 + }, + { + "epoch": 0.08061778682959946, + "grad_norm": 15.21580696105957, + "learning_rate": 1.609838846480068e-06, + "loss": 0.0632, + "num_input_tokens_seen": 915776, + "step": 950 + }, + { + "epoch": 0.08104209097080788, + "grad_norm": 12.9989595413208, + "learning_rate": 1.618320610687023e-06, + "loss": 0.0857, + "num_input_tokens_seen": 920512, + "step": 955 + }, + { + "epoch": 0.0814663951120163, + "grad_norm": 33.018089294433594, + "learning_rate": 1.6268023748939778e-06, + "loss": 0.1331, + "num_input_tokens_seen": 924992, + "step": 960 + }, + { + "epoch": 0.08189069925322472, + "grad_norm": 24.30968475341797, + "learning_rate": 1.6352841391009329e-06, + "loss": 0.1138, + "num_input_tokens_seen": 929792, + "step": 965 + }, + { + "epoch": 0.08231500339443312, + "grad_norm": 6.3038010597229, + "learning_rate": 1.643765903307888e-06, + "loss": 0.1123, + "num_input_tokens_seen": 934208, + "step": 970 + }, + { + "epoch": 0.08273930753564154, + "grad_norm": 4.268393039703369, + "learning_rate": 1.652247667514843e-06, + "loss": 0.0767, + "num_input_tokens_seen": 938624, + "step": 975 + }, + { + "epoch": 0.08316361167684996, + "grad_norm": 9.543072700500488, + "learning_rate": 1.660729431721798e-06, + "loss": 0.0286, + "num_input_tokens_seen": 943168, + "step": 980 + }, + { + "epoch": 0.08358791581805838, + "grad_norm": 35.978233337402344, + "learning_rate": 1.669211195928753e-06, + "loss": 0.0942, + "num_input_tokens_seen": 948032, + "step": 985 + }, + { + "epoch": 0.0840122199592668, + "grad_norm": 27.4283504486084, + "learning_rate": 1.6776929601357082e-06, + "loss": 0.0874, + "num_input_tokens_seen": 954176, + "step": 990 + }, + { + "epoch": 0.08443652410047522, + "grad_norm": 16.803491592407227, + "learning_rate": 1.6861747243426632e-06, + "loss": 0.0855, + "num_input_tokens_seen": 958912, + "step": 995 + }, + { + "epoch": 0.08486082824168364, + "grad_norm": 11.425243377685547, + "learning_rate": 1.6946564885496183e-06, + "loss": 0.1609, + "num_input_tokens_seen": 963264, + "step": 1000 + }, + { + "epoch": 0.08528513238289205, + "grad_norm": 13.470209121704102, + "learning_rate": 1.7031382527565731e-06, + "loss": 0.0747, + "num_input_tokens_seen": 968256, + "step": 1005 + }, + { + "epoch": 0.08570943652410047, + "grad_norm": 7.800799369812012, + "learning_rate": 1.7116200169635284e-06, + "loss": 0.1322, + "num_input_tokens_seen": 972608, + "step": 1010 + }, + { + "epoch": 0.08613374066530889, + "grad_norm": 16.069305419921875, + "learning_rate": 1.7201017811704834e-06, + "loss": 0.0859, + "num_input_tokens_seen": 977856, + "step": 1015 + }, + { + "epoch": 0.08655804480651731, + "grad_norm": 31.697803497314453, + "learning_rate": 1.7285835453774385e-06, + "loss": 0.0987, + "num_input_tokens_seen": 982720, + "step": 1020 + }, + { + "epoch": 0.08698234894772573, + "grad_norm": 32.31370544433594, + "learning_rate": 1.7370653095843936e-06, + "loss": 0.1228, + "num_input_tokens_seen": 987584, + "step": 1025 + }, + { + "epoch": 0.08740665308893415, + "grad_norm": 19.247591018676758, + "learning_rate": 1.7455470737913484e-06, + "loss": 0.082, + "num_input_tokens_seen": 992448, + "step": 1030 + }, + { + "epoch": 0.08783095723014257, + "grad_norm": 24.267414093017578, + "learning_rate": 1.7540288379983035e-06, + "loss": 0.1068, + "num_input_tokens_seen": 997184, + "step": 1035 + }, + { + "epoch": 0.08825526137135098, + "grad_norm": 11.689900398254395, + "learning_rate": 1.7625106022052587e-06, + "loss": 0.0649, + "num_input_tokens_seen": 1002432, + "step": 1040 + }, + { + "epoch": 0.0886795655125594, + "grad_norm": 29.932924270629883, + "learning_rate": 1.7709923664122138e-06, + "loss": 0.0748, + "num_input_tokens_seen": 1007360, + "step": 1045 + }, + { + "epoch": 0.08910386965376782, + "grad_norm": 0.5252959728240967, + "learning_rate": 1.7794741306191686e-06, + "loss": 0.0598, + "num_input_tokens_seen": 1011968, + "step": 1050 + }, + { + "epoch": 0.08952817379497624, + "grad_norm": 39.90684127807617, + "learning_rate": 1.7879558948261237e-06, + "loss": 0.0976, + "num_input_tokens_seen": 1016896, + "step": 1055 + }, + { + "epoch": 0.08995247793618466, + "grad_norm": 2.350248336791992, + "learning_rate": 1.7964376590330787e-06, + "loss": 0.1005, + "num_input_tokens_seen": 1021952, + "step": 1060 + }, + { + "epoch": 0.09037678207739308, + "grad_norm": 27.664554595947266, + "learning_rate": 1.8049194232400338e-06, + "loss": 0.1977, + "num_input_tokens_seen": 1026560, + "step": 1065 + }, + { + "epoch": 0.09080108621860149, + "grad_norm": 1.7253851890563965, + "learning_rate": 1.813401187446989e-06, + "loss": 0.0821, + "num_input_tokens_seen": 1031360, + "step": 1070 + }, + { + "epoch": 0.09122539035980991, + "grad_norm": 32.1817512512207, + "learning_rate": 1.821882951653944e-06, + "loss": 0.1014, + "num_input_tokens_seen": 1036480, + "step": 1075 + }, + { + "epoch": 0.09164969450101833, + "grad_norm": 30.749300003051758, + "learning_rate": 1.830364715860899e-06, + "loss": 0.0861, + "num_input_tokens_seen": 1041024, + "step": 1080 + }, + { + "epoch": 0.09207399864222675, + "grad_norm": 10.305405616760254, + "learning_rate": 1.838846480067854e-06, + "loss": 0.0588, + "num_input_tokens_seen": 1045312, + "step": 1085 + }, + { + "epoch": 0.09249830278343517, + "grad_norm": 13.730518341064453, + "learning_rate": 1.847328244274809e-06, + "loss": 0.0759, + "num_input_tokens_seen": 1050240, + "step": 1090 + }, + { + "epoch": 0.09292260692464359, + "grad_norm": 11.231359481811523, + "learning_rate": 1.8558100084817641e-06, + "loss": 0.09, + "num_input_tokens_seen": 1055744, + "step": 1095 + }, + { + "epoch": 0.09334691106585201, + "grad_norm": 25.91359519958496, + "learning_rate": 1.864291772688719e-06, + "loss": 0.073, + "num_input_tokens_seen": 1060352, + "step": 1100 + }, + { + "epoch": 0.09377121520706042, + "grad_norm": 28.921405792236328, + "learning_rate": 1.8727735368956743e-06, + "loss": 0.1046, + "num_input_tokens_seen": 1065472, + "step": 1105 + }, + { + "epoch": 0.09419551934826884, + "grad_norm": 0.6421509385108948, + "learning_rate": 1.8812553011026293e-06, + "loss": 0.0767, + "num_input_tokens_seen": 1070144, + "step": 1110 + }, + { + "epoch": 0.09461982348947726, + "grad_norm": 0.3113643527030945, + "learning_rate": 1.8897370653095844e-06, + "loss": 0.0741, + "num_input_tokens_seen": 1074688, + "step": 1115 + }, + { + "epoch": 0.09504412763068568, + "grad_norm": 0.44791167974472046, + "learning_rate": 1.8982188295165394e-06, + "loss": 0.0479, + "num_input_tokens_seen": 1079040, + "step": 1120 + }, + { + "epoch": 0.0954684317718941, + "grad_norm": 49.530181884765625, + "learning_rate": 1.9067005937234943e-06, + "loss": 0.0952, + "num_input_tokens_seen": 1083456, + "step": 1125 + }, + { + "epoch": 0.09589273591310252, + "grad_norm": 4.848371505737305, + "learning_rate": 1.9151823579304493e-06, + "loss": 0.1757, + "num_input_tokens_seen": 1088064, + "step": 1130 + }, + { + "epoch": 0.09631704005431092, + "grad_norm": 11.97738265991211, + "learning_rate": 1.9236641221374044e-06, + "loss": 0.0502, + "num_input_tokens_seen": 1092544, + "step": 1135 + }, + { + "epoch": 0.09674134419551934, + "grad_norm": 52.587013244628906, + "learning_rate": 1.9321458863443595e-06, + "loss": 0.0995, + "num_input_tokens_seen": 1097792, + "step": 1140 + }, + { + "epoch": 0.09716564833672776, + "grad_norm": 13.555150032043457, + "learning_rate": 1.9406276505513145e-06, + "loss": 0.1066, + "num_input_tokens_seen": 1102912, + "step": 1145 + }, + { + "epoch": 0.09758995247793618, + "grad_norm": 3.3610050678253174, + "learning_rate": 1.9491094147582696e-06, + "loss": 0.0254, + "num_input_tokens_seen": 1107840, + "step": 1150 + }, + { + "epoch": 0.0980142566191446, + "grad_norm": 38.56612777709961, + "learning_rate": 1.9575911789652246e-06, + "loss": 0.1516, + "num_input_tokens_seen": 1112448, + "step": 1155 + }, + { + "epoch": 0.09843856076035302, + "grad_norm": 24.816673278808594, + "learning_rate": 1.9660729431721797e-06, + "loss": 0.1223, + "num_input_tokens_seen": 1117248, + "step": 1160 + }, + { + "epoch": 0.09886286490156145, + "grad_norm": 25.663911819458008, + "learning_rate": 1.9745547073791347e-06, + "loss": 0.089, + "num_input_tokens_seen": 1121984, + "step": 1165 + }, + { + "epoch": 0.09928716904276985, + "grad_norm": 8.376113891601562, + "learning_rate": 1.98303647158609e-06, + "loss": 0.0988, + "num_input_tokens_seen": 1127040, + "step": 1170 + }, + { + "epoch": 0.09971147318397827, + "grad_norm": 12.205850601196289, + "learning_rate": 1.991518235793045e-06, + "loss": 0.0578, + "num_input_tokens_seen": 1131904, + "step": 1175 + }, + { + "epoch": 0.10013577732518669, + "grad_norm": 19.723358154296875, + "learning_rate": 2e-06, + "loss": 0.1054, + "num_input_tokens_seen": 1136384, + "step": 1180 + }, + { + "epoch": 0.10013577732518669, + "eval_loss": 0.07076410949230194, + "eval_runtime": 15.763, + "eval_samples_per_second": 664.533, + "eval_steps_per_second": 83.106, + "num_input_tokens_seen": 1136384, + "step": 1180 + }, + { + "epoch": 0.10056008146639511, + "grad_norm": 8.96194076538086, + "learning_rate": 1.999998903046209e-06, + "loss": 0.0543, + "num_input_tokens_seen": 1140864, + "step": 1185 + }, + { + "epoch": 0.10098438560760353, + "grad_norm": 24.896997451782227, + "learning_rate": 1.999995612187243e-06, + "loss": 0.1416, + "num_input_tokens_seen": 1145408, + "step": 1190 + }, + { + "epoch": 0.10140868974881195, + "grad_norm": 21.160457611083984, + "learning_rate": 1.9999901274303226e-06, + "loss": 0.1497, + "num_input_tokens_seen": 1150400, + "step": 1195 + }, + { + "epoch": 0.10183299389002037, + "grad_norm": 68.9247817993164, + "learning_rate": 1.9999824487874795e-06, + "loss": 0.1094, + "num_input_tokens_seen": 1154880, + "step": 1200 + }, + { + "epoch": 0.10225729803122878, + "grad_norm": 0.9308235049247742, + "learning_rate": 1.999972576275561e-06, + "loss": 0.1046, + "num_input_tokens_seen": 1159552, + "step": 1205 + }, + { + "epoch": 0.1026816021724372, + "grad_norm": 14.106361389160156, + "learning_rate": 1.999960509916226e-06, + "loss": 0.0262, + "num_input_tokens_seen": 1164800, + "step": 1210 + }, + { + "epoch": 0.10310590631364562, + "grad_norm": 31.319644927978516, + "learning_rate": 1.9999462497359463e-06, + "loss": 0.0621, + "num_input_tokens_seen": 1170304, + "step": 1215 + }, + { + "epoch": 0.10353021045485404, + "grad_norm": 26.950510025024414, + "learning_rate": 1.999929795766009e-06, + "loss": 0.0834, + "num_input_tokens_seen": 1175040, + "step": 1220 + }, + { + "epoch": 0.10395451459606246, + "grad_norm": 0.2634492814540863, + "learning_rate": 1.999911148042511e-06, + "loss": 0.0045, + "num_input_tokens_seen": 1180288, + "step": 1225 + }, + { + "epoch": 0.10437881873727088, + "grad_norm": 36.4853630065918, + "learning_rate": 1.999890306606365e-06, + "loss": 0.097, + "num_input_tokens_seen": 1185088, + "step": 1230 + }, + { + "epoch": 0.10480312287847929, + "grad_norm": 0.16783910989761353, + "learning_rate": 1.9998672715032944e-06, + "loss": 0.0987, + "num_input_tokens_seen": 1189504, + "step": 1235 + }, + { + "epoch": 0.10522742701968771, + "grad_norm": 0.6057221293449402, + "learning_rate": 1.999842042783836e-06, + "loss": 0.1065, + "num_input_tokens_seen": 1194304, + "step": 1240 + }, + { + "epoch": 0.10565173116089613, + "grad_norm": 6.111150741577148, + "learning_rate": 1.99981462050334e-06, + "loss": 0.0156, + "num_input_tokens_seen": 1198976, + "step": 1245 + }, + { + "epoch": 0.10607603530210455, + "grad_norm": 16.78822898864746, + "learning_rate": 1.999785004721968e-06, + "loss": 0.0797, + "num_input_tokens_seen": 1203520, + "step": 1250 + }, + { + "epoch": 0.10650033944331297, + "grad_norm": 7.630631923675537, + "learning_rate": 1.9997531955046936e-06, + "loss": 0.0947, + "num_input_tokens_seen": 1207808, + "step": 1255 + }, + { + "epoch": 0.10692464358452139, + "grad_norm": 2.147573947906494, + "learning_rate": 1.9997191929213044e-06, + "loss": 0.0938, + "num_input_tokens_seen": 1212992, + "step": 1260 + }, + { + "epoch": 0.10734894772572981, + "grad_norm": 33.501197814941406, + "learning_rate": 1.999682997046398e-06, + "loss": 0.1488, + "num_input_tokens_seen": 1217344, + "step": 1265 + }, + { + "epoch": 0.10777325186693822, + "grad_norm": 34.510108947753906, + "learning_rate": 1.9996446079593855e-06, + "loss": 0.0821, + "num_input_tokens_seen": 1222080, + "step": 1270 + }, + { + "epoch": 0.10819755600814664, + "grad_norm": 4.3939208984375, + "learning_rate": 1.999604025744489e-06, + "loss": 0.1393, + "num_input_tokens_seen": 1226752, + "step": 1275 + }, + { + "epoch": 0.10862186014935506, + "grad_norm": 34.965816497802734, + "learning_rate": 1.9995612504907414e-06, + "loss": 0.1702, + "num_input_tokens_seen": 1231808, + "step": 1280 + }, + { + "epoch": 0.10904616429056348, + "grad_norm": 17.587800979614258, + "learning_rate": 1.999516282291988e-06, + "loss": 0.1218, + "num_input_tokens_seen": 1236352, + "step": 1285 + }, + { + "epoch": 0.1094704684317719, + "grad_norm": 18.25070571899414, + "learning_rate": 1.9994691212468853e-06, + "loss": 0.0983, + "num_input_tokens_seen": 1241088, + "step": 1290 + }, + { + "epoch": 0.10989477257298032, + "grad_norm": 2.138901472091675, + "learning_rate": 1.9994197674588997e-06, + "loss": 0.0506, + "num_input_tokens_seen": 1246336, + "step": 1295 + }, + { + "epoch": 0.11031907671418872, + "grad_norm": 7.946754455566406, + "learning_rate": 1.999368221036309e-06, + "loss": 0.1021, + "num_input_tokens_seen": 1251648, + "step": 1300 + }, + { + "epoch": 0.11074338085539714, + "grad_norm": 18.631914138793945, + "learning_rate": 1.9993144820922015e-06, + "loss": 0.0848, + "num_input_tokens_seen": 1256448, + "step": 1305 + }, + { + "epoch": 0.11116768499660556, + "grad_norm": 1.045030117034912, + "learning_rate": 1.9992585507444757e-06, + "loss": 0.096, + "num_input_tokens_seen": 1261184, + "step": 1310 + }, + { + "epoch": 0.11159198913781398, + "grad_norm": 38.57451629638672, + "learning_rate": 1.999200427115839e-06, + "loss": 0.1, + "num_input_tokens_seen": 1266304, + "step": 1315 + }, + { + "epoch": 0.1120162932790224, + "grad_norm": 4.0364556312561035, + "learning_rate": 1.99914011133381e-06, + "loss": 0.0415, + "num_input_tokens_seen": 1270848, + "step": 1320 + }, + { + "epoch": 0.11244059742023083, + "grad_norm": 7.261585712432861, + "learning_rate": 1.999077603530716e-06, + "loss": 0.0318, + "num_input_tokens_seen": 1275712, + "step": 1325 + }, + { + "epoch": 0.11286490156143925, + "grad_norm": 18.819059371948242, + "learning_rate": 1.999012903843693e-06, + "loss": 0.0425, + "num_input_tokens_seen": 1280000, + "step": 1330 + }, + { + "epoch": 0.11328920570264765, + "grad_norm": 17.924287796020508, + "learning_rate": 1.9989460124146854e-06, + "loss": 0.0826, + "num_input_tokens_seen": 1285440, + "step": 1335 + }, + { + "epoch": 0.11371350984385607, + "grad_norm": 0.6249921321868896, + "learning_rate": 1.998876929390448e-06, + "loss": 0.0835, + "num_input_tokens_seen": 1290176, + "step": 1340 + }, + { + "epoch": 0.11413781398506449, + "grad_norm": 54.781005859375, + "learning_rate": 1.9988056549225423e-06, + "loss": 0.108, + "num_input_tokens_seen": 1294912, + "step": 1345 + }, + { + "epoch": 0.11456211812627291, + "grad_norm": 1.0688238143920898, + "learning_rate": 1.9987321891673375e-06, + "loss": 0.0703, + "num_input_tokens_seen": 1299136, + "step": 1350 + }, + { + "epoch": 0.11498642226748133, + "grad_norm": 8.30221939086914, + "learning_rate": 1.9986565322860116e-06, + "loss": 0.1112, + "num_input_tokens_seen": 1303936, + "step": 1355 + }, + { + "epoch": 0.11541072640868975, + "grad_norm": 21.210979461669922, + "learning_rate": 1.9985786844445474e-06, + "loss": 0.045, + "num_input_tokens_seen": 1308928, + "step": 1360 + }, + { + "epoch": 0.11583503054989816, + "grad_norm": 16.939516067504883, + "learning_rate": 1.9984986458137366e-06, + "loss": 0.0518, + "num_input_tokens_seen": 1313728, + "step": 1365 + }, + { + "epoch": 0.11625933469110658, + "grad_norm": 2.8378939628601074, + "learning_rate": 1.998416416569177e-06, + "loss": 0.062, + "num_input_tokens_seen": 1318400, + "step": 1370 + }, + { + "epoch": 0.116683638832315, + "grad_norm": 15.487943649291992, + "learning_rate": 1.9983319968912714e-06, + "loss": 0.0946, + "num_input_tokens_seen": 1322752, + "step": 1375 + }, + { + "epoch": 0.11710794297352342, + "grad_norm": 6.950421333312988, + "learning_rate": 1.9982453869652286e-06, + "loss": 0.0354, + "num_input_tokens_seen": 1327552, + "step": 1380 + }, + { + "epoch": 0.11753224711473184, + "grad_norm": 45.381065368652344, + "learning_rate": 1.9981565869810637e-06, + "loss": 0.084, + "num_input_tokens_seen": 1331776, + "step": 1385 + }, + { + "epoch": 0.11795655125594026, + "grad_norm": 7.881857872009277, + "learning_rate": 1.998065597133594e-06, + "loss": 0.0591, + "num_input_tokens_seen": 1336128, + "step": 1390 + }, + { + "epoch": 0.11838085539714868, + "grad_norm": 14.927377700805664, + "learning_rate": 1.9979724176224447e-06, + "loss": 0.1068, + "num_input_tokens_seen": 1340800, + "step": 1395 + }, + { + "epoch": 0.11880515953835709, + "grad_norm": 7.404366970062256, + "learning_rate": 1.997877048652042e-06, + "loss": 0.0982, + "num_input_tokens_seen": 1345408, + "step": 1400 + }, + { + "epoch": 0.11922946367956551, + "grad_norm": 37.19292068481445, + "learning_rate": 1.9977794904316163e-06, + "loss": 0.12, + "num_input_tokens_seen": 1350208, + "step": 1405 + }, + { + "epoch": 0.11965376782077393, + "grad_norm": 4.006378650665283, + "learning_rate": 1.9976797431752023e-06, + "loss": 0.0689, + "num_input_tokens_seen": 1354624, + "step": 1410 + }, + { + "epoch": 0.12007807196198235, + "grad_norm": 1.479600429534912, + "learning_rate": 1.9975778071016357e-06, + "loss": 0.0574, + "num_input_tokens_seen": 1359232, + "step": 1415 + }, + { + "epoch": 0.12050237610319077, + "grad_norm": 8.415390014648438, + "learning_rate": 1.997473682434555e-06, + "loss": 0.0794, + "num_input_tokens_seen": 1363904, + "step": 1420 + }, + { + "epoch": 0.12092668024439919, + "grad_norm": 0.7171556949615479, + "learning_rate": 1.9973673694023998e-06, + "loss": 0.0577, + "num_input_tokens_seen": 1368448, + "step": 1425 + }, + { + "epoch": 0.12135098438560761, + "grad_norm": 19.485977172851562, + "learning_rate": 1.997258868238411e-06, + "loss": 0.099, + "num_input_tokens_seen": 1372864, + "step": 1430 + }, + { + "epoch": 0.12177528852681602, + "grad_norm": 26.69516944885254, + "learning_rate": 1.997148179180631e-06, + "loss": 0.0979, + "num_input_tokens_seen": 1377920, + "step": 1435 + }, + { + "epoch": 0.12219959266802444, + "grad_norm": 15.690411567687988, + "learning_rate": 1.9970353024719003e-06, + "loss": 0.0951, + "num_input_tokens_seen": 1382464, + "step": 1440 + }, + { + "epoch": 0.12262389680923286, + "grad_norm": 15.733072280883789, + "learning_rate": 1.9969202383598605e-06, + "loss": 0.065, + "num_input_tokens_seen": 1387072, + "step": 1445 + }, + { + "epoch": 0.12304820095044128, + "grad_norm": 13.7681245803833, + "learning_rate": 1.996802987096952e-06, + "loss": 0.0363, + "num_input_tokens_seen": 1391488, + "step": 1450 + }, + { + "epoch": 0.1234725050916497, + "grad_norm": 22.600217819213867, + "learning_rate": 1.9966835489404123e-06, + "loss": 0.1148, + "num_input_tokens_seen": 1397440, + "step": 1455 + }, + { + "epoch": 0.12389680923285812, + "grad_norm": 2.1628708839416504, + "learning_rate": 1.996561924152278e-06, + "loss": 0.0559, + "num_input_tokens_seen": 1402048, + "step": 1460 + }, + { + "epoch": 0.12432111337406652, + "grad_norm": 51.919342041015625, + "learning_rate": 1.996438112999383e-06, + "loss": 0.0275, + "num_input_tokens_seen": 1406784, + "step": 1465 + }, + { + "epoch": 0.12474541751527495, + "grad_norm": 58.69674301147461, + "learning_rate": 1.9963121157533573e-06, + "loss": 0.1324, + "num_input_tokens_seen": 1411328, + "step": 1470 + }, + { + "epoch": 0.12516972165648338, + "grad_norm": 1.9336317777633667, + "learning_rate": 1.9961839326906272e-06, + "loss": 0.1638, + "num_input_tokens_seen": 1415936, + "step": 1475 + }, + { + "epoch": 0.12559402579769177, + "grad_norm": 41.08507537841797, + "learning_rate": 1.9960535640924146e-06, + "loss": 0.1479, + "num_input_tokens_seen": 1421248, + "step": 1480 + }, + { + "epoch": 0.1260183299389002, + "grad_norm": 7.536561965942383, + "learning_rate": 1.995921010244736e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1425728, + "step": 1485 + }, + { + "epoch": 0.1264426340801086, + "grad_norm": 14.329437255859375, + "learning_rate": 1.9957862714384025e-06, + "loss": 0.0857, + "num_input_tokens_seen": 1431296, + "step": 1490 + }, + { + "epoch": 0.12686693822131703, + "grad_norm": 10.391690254211426, + "learning_rate": 1.9956493479690188e-06, + "loss": 0.0819, + "num_input_tokens_seen": 1436160, + "step": 1495 + }, + { + "epoch": 0.12729124236252545, + "grad_norm": 2.942070722579956, + "learning_rate": 1.9955102401369814e-06, + "loss": 0.1003, + "num_input_tokens_seen": 1440960, + "step": 1500 + }, + { + "epoch": 0.12771554650373387, + "grad_norm": 14.67211627960205, + "learning_rate": 1.9953689482474806e-06, + "loss": 0.0611, + "num_input_tokens_seen": 1445760, + "step": 1505 + }, + { + "epoch": 0.1281398506449423, + "grad_norm": 0.5613120198249817, + "learning_rate": 1.995225472610498e-06, + "loss": 0.0144, + "num_input_tokens_seen": 1450688, + "step": 1510 + }, + { + "epoch": 0.12856415478615071, + "grad_norm": 40.097408294677734, + "learning_rate": 1.9950798135408057e-06, + "loss": 0.1675, + "num_input_tokens_seen": 1455552, + "step": 1515 + }, + { + "epoch": 0.12898845892735913, + "grad_norm": 22.93716812133789, + "learning_rate": 1.994931971357966e-06, + "loss": 0.0808, + "num_input_tokens_seen": 1460416, + "step": 1520 + }, + { + "epoch": 0.12941276306856755, + "grad_norm": 1.170467495918274, + "learning_rate": 1.9947819463863316e-06, + "loss": 0.0717, + "num_input_tokens_seen": 1466432, + "step": 1525 + }, + { + "epoch": 0.12983706720977597, + "grad_norm": 18.326679229736328, + "learning_rate": 1.9946297389550432e-06, + "loss": 0.0989, + "num_input_tokens_seen": 1471232, + "step": 1530 + }, + { + "epoch": 0.1302613713509844, + "grad_norm": 13.557463645935059, + "learning_rate": 1.9944753493980292e-06, + "loss": 0.0587, + "num_input_tokens_seen": 1476160, + "step": 1535 + }, + { + "epoch": 0.13068567549219282, + "grad_norm": 19.043073654174805, + "learning_rate": 1.9943187780540062e-06, + "loss": 0.0755, + "num_input_tokens_seen": 1481152, + "step": 1540 + }, + { + "epoch": 0.13110997963340124, + "grad_norm": 6.632198333740234, + "learning_rate": 1.994160025266478e-06, + "loss": 0.1668, + "num_input_tokens_seen": 1486336, + "step": 1545 + }, + { + "epoch": 0.13153428377460963, + "grad_norm": 11.749693870544434, + "learning_rate": 1.9939990913837327e-06, + "loss": 0.0588, + "num_input_tokens_seen": 1491264, + "step": 1550 + }, + { + "epoch": 0.13195858791581805, + "grad_norm": 5.760255813598633, + "learning_rate": 1.993835976758845e-06, + "loss": 0.0672, + "num_input_tokens_seen": 1495680, + "step": 1555 + }, + { + "epoch": 0.13238289205702647, + "grad_norm": 41.815704345703125, + "learning_rate": 1.993670681749673e-06, + "loss": 0.1687, + "num_input_tokens_seen": 1501376, + "step": 1560 + }, + { + "epoch": 0.1328071961982349, + "grad_norm": 4.122589588165283, + "learning_rate": 1.9935032067188587e-06, + "loss": 0.1089, + "num_input_tokens_seen": 1506176, + "step": 1565 + }, + { + "epoch": 0.1332315003394433, + "grad_norm": 25.289459228515625, + "learning_rate": 1.993333552033827e-06, + "loss": 0.0749, + "num_input_tokens_seen": 1511808, + "step": 1570 + }, + { + "epoch": 0.13365580448065173, + "grad_norm": 20.910133361816406, + "learning_rate": 1.9931617180667844e-06, + "loss": 0.0406, + "num_input_tokens_seen": 1516608, + "step": 1575 + }, + { + "epoch": 0.13408010862186015, + "grad_norm": 15.184967041015625, + "learning_rate": 1.992987705194719e-06, + "loss": 0.0988, + "num_input_tokens_seen": 1521280, + "step": 1580 + }, + { + "epoch": 0.13450441276306857, + "grad_norm": 1.4249966144561768, + "learning_rate": 1.9928115137993983e-06, + "loss": 0.0683, + "num_input_tokens_seen": 1526080, + "step": 1585 + }, + { + "epoch": 0.134928716904277, + "grad_norm": 21.653785705566406, + "learning_rate": 1.9926331442673703e-06, + "loss": 0.0429, + "num_input_tokens_seen": 1530944, + "step": 1590 + }, + { + "epoch": 0.1353530210454854, + "grad_norm": 37.388206481933594, + "learning_rate": 1.992452596989962e-06, + "loss": 0.1098, + "num_input_tokens_seen": 1536256, + "step": 1595 + }, + { + "epoch": 0.13577732518669383, + "grad_norm": 36.764583587646484, + "learning_rate": 1.9922698723632763e-06, + "loss": 0.0842, + "num_input_tokens_seen": 1540864, + "step": 1600 + }, + { + "epoch": 0.13620162932790225, + "grad_norm": 9.037910461425781, + "learning_rate": 1.992084970788195e-06, + "loss": 0.077, + "num_input_tokens_seen": 1545536, + "step": 1605 + }, + { + "epoch": 0.13662593346911067, + "grad_norm": 30.793230056762695, + "learning_rate": 1.991897892670375e-06, + "loss": 0.1246, + "num_input_tokens_seen": 1550144, + "step": 1610 + }, + { + "epoch": 0.13705023761031906, + "grad_norm": 8.332464218139648, + "learning_rate": 1.9917086384202475e-06, + "loss": 0.0509, + "num_input_tokens_seen": 1554624, + "step": 1615 + }, + { + "epoch": 0.13747454175152748, + "grad_norm": 16.857133865356445, + "learning_rate": 1.9915172084530195e-06, + "loss": 0.1169, + "num_input_tokens_seen": 1559168, + "step": 1620 + }, + { + "epoch": 0.1378988458927359, + "grad_norm": 9.418706893920898, + "learning_rate": 1.9913236031886707e-06, + "loss": 0.0867, + "num_input_tokens_seen": 1564032, + "step": 1625 + }, + { + "epoch": 0.13832315003394433, + "grad_norm": 11.382521629333496, + "learning_rate": 1.9911278230519533e-06, + "loss": 0.0813, + "num_input_tokens_seen": 1568896, + "step": 1630 + }, + { + "epoch": 0.13874745417515275, + "grad_norm": 21.073949813842773, + "learning_rate": 1.9909298684723905e-06, + "loss": 0.0779, + "num_input_tokens_seen": 1573888, + "step": 1635 + }, + { + "epoch": 0.13917175831636117, + "grad_norm": 0.7896543741226196, + "learning_rate": 1.9907297398842764e-06, + "loss": 0.0649, + "num_input_tokens_seen": 1578496, + "step": 1640 + }, + { + "epoch": 0.1395960624575696, + "grad_norm": 20.59737777709961, + "learning_rate": 1.9905274377266744e-06, + "loss": 0.0418, + "num_input_tokens_seen": 1583104, + "step": 1645 + }, + { + "epoch": 0.140020366598778, + "grad_norm": 53.22269821166992, + "learning_rate": 1.9903229624434174e-06, + "loss": 0.1031, + "num_input_tokens_seen": 1587648, + "step": 1650 + }, + { + "epoch": 0.14044467073998643, + "grad_norm": 17.283193588256836, + "learning_rate": 1.9901163144831047e-06, + "loss": 0.1513, + "num_input_tokens_seen": 1593216, + "step": 1655 + }, + { + "epoch": 0.14086897488119485, + "grad_norm": 17.767263412475586, + "learning_rate": 1.989907494299103e-06, + "loss": 0.0057, + "num_input_tokens_seen": 1598208, + "step": 1660 + }, + { + "epoch": 0.14129327902240327, + "grad_norm": 1.1825686693191528, + "learning_rate": 1.989696502349545e-06, + "loss": 0.0057, + "num_input_tokens_seen": 1602688, + "step": 1665 + }, + { + "epoch": 0.1417175831636117, + "grad_norm": 17.303054809570312, + "learning_rate": 1.9894833390973266e-06, + "loss": 0.1691, + "num_input_tokens_seen": 1606784, + "step": 1670 + }, + { + "epoch": 0.1421418873048201, + "grad_norm": 27.718305587768555, + "learning_rate": 1.9892680050101085e-06, + "loss": 0.1757, + "num_input_tokens_seen": 1611584, + "step": 1675 + }, + { + "epoch": 0.1425661914460285, + "grad_norm": 13.038661003112793, + "learning_rate": 1.9890505005603146e-06, + "loss": 0.094, + "num_input_tokens_seen": 1616576, + "step": 1680 + }, + { + "epoch": 0.14299049558723692, + "grad_norm": 9.758828163146973, + "learning_rate": 1.9888308262251284e-06, + "loss": 0.0994, + "num_input_tokens_seen": 1621440, + "step": 1685 + }, + { + "epoch": 0.14341479972844534, + "grad_norm": 21.367116928100586, + "learning_rate": 1.9886089824864956e-06, + "loss": 0.071, + "num_input_tokens_seen": 1626368, + "step": 1690 + }, + { + "epoch": 0.14383910386965376, + "grad_norm": 16.830913543701172, + "learning_rate": 1.9883849698311213e-06, + "loss": 0.0566, + "num_input_tokens_seen": 1630784, + "step": 1695 + }, + { + "epoch": 0.14426340801086218, + "grad_norm": 13.30070686340332, + "learning_rate": 1.988158788750468e-06, + "loss": 0.0815, + "num_input_tokens_seen": 1635776, + "step": 1700 + }, + { + "epoch": 0.1446877121520706, + "grad_norm": 16.3173828125, + "learning_rate": 1.9879304397407566e-06, + "loss": 0.0967, + "num_input_tokens_seen": 1640448, + "step": 1705 + }, + { + "epoch": 0.14511201629327902, + "grad_norm": 7.858912467956543, + "learning_rate": 1.987699923302963e-06, + "loss": 0.0206, + "num_input_tokens_seen": 1645440, + "step": 1710 + }, + { + "epoch": 0.14553632043448744, + "grad_norm": 35.57898712158203, + "learning_rate": 1.9874672399428195e-06, + "loss": 0.0811, + "num_input_tokens_seen": 1649984, + "step": 1715 + }, + { + "epoch": 0.14596062457569586, + "grad_norm": 39.42796325683594, + "learning_rate": 1.9872323901708116e-06, + "loss": 0.1235, + "num_input_tokens_seen": 1654720, + "step": 1720 + }, + { + "epoch": 0.14638492871690428, + "grad_norm": 19.698476791381836, + "learning_rate": 1.9869953745021785e-06, + "loss": 0.1061, + "num_input_tokens_seen": 1659648, + "step": 1725 + }, + { + "epoch": 0.1468092328581127, + "grad_norm": 0.6171606779098511, + "learning_rate": 1.9867561934569103e-06, + "loss": 0.0567, + "num_input_tokens_seen": 1664896, + "step": 1730 + }, + { + "epoch": 0.14723353699932112, + "grad_norm": 3.3654897212982178, + "learning_rate": 1.9865148475597475e-06, + "loss": 0.0482, + "num_input_tokens_seen": 1669568, + "step": 1735 + }, + { + "epoch": 0.14765784114052954, + "grad_norm": 19.97379493713379, + "learning_rate": 1.986271337340182e-06, + "loss": 0.0588, + "num_input_tokens_seen": 1674432, + "step": 1740 + }, + { + "epoch": 0.14808214528173794, + "grad_norm": 3.5285873413085938, + "learning_rate": 1.9860256633324513e-06, + "loss": 0.0265, + "num_input_tokens_seen": 1678720, + "step": 1745 + }, + { + "epoch": 0.14850644942294636, + "grad_norm": 23.46439552307129, + "learning_rate": 1.9857778260755426e-06, + "loss": 0.0692, + "num_input_tokens_seen": 1683904, + "step": 1750 + }, + { + "epoch": 0.14893075356415478, + "grad_norm": 28.765380859375, + "learning_rate": 1.9855278261131876e-06, + "loss": 0.0717, + "num_input_tokens_seen": 1689024, + "step": 1755 + }, + { + "epoch": 0.1493550577053632, + "grad_norm": 2.764413833618164, + "learning_rate": 1.985275663993863e-06, + "loss": 0.0851, + "num_input_tokens_seen": 1693632, + "step": 1760 + }, + { + "epoch": 0.14977936184657162, + "grad_norm": 13.944731712341309, + "learning_rate": 1.9850213402707888e-06, + "loss": 0.0532, + "num_input_tokens_seen": 1698304, + "step": 1765 + }, + { + "epoch": 0.15020366598778004, + "grad_norm": 40.670230865478516, + "learning_rate": 1.9847648555019286e-06, + "loss": 0.1201, + "num_input_tokens_seen": 1703808, + "step": 1770 + }, + { + "epoch": 0.15020366598778004, + "eval_loss": 0.0835869163274765, + "eval_runtime": 16.0609, + "eval_samples_per_second": 652.205, + "eval_steps_per_second": 81.565, + "num_input_tokens_seen": 1703808, + "step": 1770 + }, + { + "epoch": 0.15062797012898846, + "grad_norm": 4.531874656677246, + "learning_rate": 1.9845062102499858e-06, + "loss": 0.0634, + "num_input_tokens_seen": 1708992, + "step": 1775 + }, + { + "epoch": 0.15105227427019688, + "grad_norm": 1.780526876449585, + "learning_rate": 1.9842454050824043e-06, + "loss": 0.0769, + "num_input_tokens_seen": 1713600, + "step": 1780 + }, + { + "epoch": 0.1514765784114053, + "grad_norm": 42.02983856201172, + "learning_rate": 1.9839824405713663e-06, + "loss": 0.0963, + "num_input_tokens_seen": 1718208, + "step": 1785 + }, + { + "epoch": 0.15190088255261372, + "grad_norm": 32.276702880859375, + "learning_rate": 1.983717317293792e-06, + "loss": 0.1394, + "num_input_tokens_seen": 1722560, + "step": 1790 + }, + { + "epoch": 0.15232518669382214, + "grad_norm": 1.8770980834960938, + "learning_rate": 1.983450035831337e-06, + "loss": 0.0263, + "num_input_tokens_seen": 1727296, + "step": 1795 + }, + { + "epoch": 0.15274949083503056, + "grad_norm": 13.28686809539795, + "learning_rate": 1.983180596770392e-06, + "loss": 0.0809, + "num_input_tokens_seen": 1732608, + "step": 1800 + }, + { + "epoch": 0.15317379497623898, + "grad_norm": 0.3505326211452484, + "learning_rate": 1.982909000702082e-06, + "loss": 0.0569, + "num_input_tokens_seen": 1737280, + "step": 1805 + }, + { + "epoch": 0.15359809911744737, + "grad_norm": 5.0527262687683105, + "learning_rate": 1.982635248222264e-06, + "loss": 0.0948, + "num_input_tokens_seen": 1741440, + "step": 1810 + }, + { + "epoch": 0.1540224032586558, + "grad_norm": 10.845234870910645, + "learning_rate": 1.982359339931524e-06, + "loss": 0.0485, + "num_input_tokens_seen": 1746176, + "step": 1815 + }, + { + "epoch": 0.1544467073998642, + "grad_norm": 21.369482040405273, + "learning_rate": 1.9820812764351804e-06, + "loss": 0.1267, + "num_input_tokens_seen": 1751680, + "step": 1820 + }, + { + "epoch": 0.15487101154107263, + "grad_norm": 0.3655910789966583, + "learning_rate": 1.981801058343279e-06, + "loss": 0.0524, + "num_input_tokens_seen": 1756416, + "step": 1825 + }, + { + "epoch": 0.15529531568228105, + "grad_norm": 1.76851487159729, + "learning_rate": 1.981518686270592e-06, + "loss": 0.0919, + "num_input_tokens_seen": 1760960, + "step": 1830 + }, + { + "epoch": 0.15571961982348947, + "grad_norm": 33.344947814941406, + "learning_rate": 1.9812341608366183e-06, + "loss": 0.0884, + "num_input_tokens_seen": 1766208, + "step": 1835 + }, + { + "epoch": 0.1561439239646979, + "grad_norm": 48.86477279663086, + "learning_rate": 1.980947482665579e-06, + "loss": 0.0528, + "num_input_tokens_seen": 1771264, + "step": 1840 + }, + { + "epoch": 0.15656822810590632, + "grad_norm": 10.009577751159668, + "learning_rate": 1.980658652386421e-06, + "loss": 0.15, + "num_input_tokens_seen": 1776192, + "step": 1845 + }, + { + "epoch": 0.15699253224711474, + "grad_norm": 1.3521720170974731, + "learning_rate": 1.9803676706328102e-06, + "loss": 0.0842, + "num_input_tokens_seen": 1780992, + "step": 1850 + }, + { + "epoch": 0.15741683638832316, + "grad_norm": 12.654996871948242, + "learning_rate": 1.980074538043134e-06, + "loss": 0.0473, + "num_input_tokens_seen": 1785408, + "step": 1855 + }, + { + "epoch": 0.15784114052953158, + "grad_norm": 24.43471908569336, + "learning_rate": 1.9797792552604985e-06, + "loss": 0.1532, + "num_input_tokens_seen": 1790208, + "step": 1860 + }, + { + "epoch": 0.15826544467074, + "grad_norm": 0.8395228385925293, + "learning_rate": 1.9794818229327266e-06, + "loss": 0.0137, + "num_input_tokens_seen": 1795264, + "step": 1865 + }, + { + "epoch": 0.15868974881194842, + "grad_norm": 28.747392654418945, + "learning_rate": 1.9791822417123576e-06, + "loss": 0.0572, + "num_input_tokens_seen": 1800064, + "step": 1870 + }, + { + "epoch": 0.1591140529531568, + "grad_norm": 6.029873847961426, + "learning_rate": 1.9788805122566445e-06, + "loss": 0.05, + "num_input_tokens_seen": 1804672, + "step": 1875 + }, + { + "epoch": 0.15953835709436523, + "grad_norm": 28.338607788085938, + "learning_rate": 1.9785766352275538e-06, + "loss": 0.1075, + "num_input_tokens_seen": 1809408, + "step": 1880 + }, + { + "epoch": 0.15996266123557365, + "grad_norm": 22.816967010498047, + "learning_rate": 1.9782706112917643e-06, + "loss": 0.1561, + "num_input_tokens_seen": 1813824, + "step": 1885 + }, + { + "epoch": 0.16038696537678207, + "grad_norm": 28.80828857421875, + "learning_rate": 1.977962441120664e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1818176, + "step": 1890 + }, + { + "epoch": 0.1608112695179905, + "grad_norm": 1.2556086778640747, + "learning_rate": 1.9776521253903492e-06, + "loss": 0.0622, + "num_input_tokens_seen": 1822784, + "step": 1895 + }, + { + "epoch": 0.1612355736591989, + "grad_norm": 14.4972562789917, + "learning_rate": 1.9773396647816246e-06, + "loss": 0.1414, + "num_input_tokens_seen": 1827520, + "step": 1900 + }, + { + "epoch": 0.16165987780040733, + "grad_norm": 19.10732650756836, + "learning_rate": 1.97702505998e-06, + "loss": 0.0911, + "num_input_tokens_seen": 1832256, + "step": 1905 + }, + { + "epoch": 0.16208418194161575, + "grad_norm": 14.778487205505371, + "learning_rate": 1.976708311675688e-06, + "loss": 0.0821, + "num_input_tokens_seen": 1836864, + "step": 1910 + }, + { + "epoch": 0.16250848608282417, + "grad_norm": 7.831988334655762, + "learning_rate": 1.976389420563607e-06, + "loss": 0.0317, + "num_input_tokens_seen": 1841280, + "step": 1915 + }, + { + "epoch": 0.1629327902240326, + "grad_norm": 1.758711338043213, + "learning_rate": 1.9760683873433734e-06, + "loss": 0.0848, + "num_input_tokens_seen": 1846080, + "step": 1920 + }, + { + "epoch": 0.163357094365241, + "grad_norm": 0.39283105731010437, + "learning_rate": 1.9757452127193043e-06, + "loss": 0.0373, + "num_input_tokens_seen": 1850816, + "step": 1925 + }, + { + "epoch": 0.16378139850644943, + "grad_norm": 20.11293601989746, + "learning_rate": 1.9754198974004156e-06, + "loss": 0.0922, + "num_input_tokens_seen": 1855232, + "step": 1930 + }, + { + "epoch": 0.16420570264765785, + "grad_norm": 2.879047393798828, + "learning_rate": 1.975092442100419e-06, + "loss": 0.0689, + "num_input_tokens_seen": 1860160, + "step": 1935 + }, + { + "epoch": 0.16463000678886625, + "grad_norm": 4.302145957946777, + "learning_rate": 1.9747628475377204e-06, + "loss": 0.0229, + "num_input_tokens_seen": 1865024, + "step": 1940 + }, + { + "epoch": 0.16505431093007467, + "grad_norm": 48.407005310058594, + "learning_rate": 1.9744311144354208e-06, + "loss": 0.0846, + "num_input_tokens_seen": 1869888, + "step": 1945 + }, + { + "epoch": 0.16547861507128309, + "grad_norm": 0.2003674954175949, + "learning_rate": 1.9740972435213112e-06, + "loss": 0.1164, + "num_input_tokens_seen": 1874624, + "step": 1950 + }, + { + "epoch": 0.1659029192124915, + "grad_norm": 33.10806655883789, + "learning_rate": 1.973761235527874e-06, + "loss": 0.066, + "num_input_tokens_seen": 1879168, + "step": 1955 + }, + { + "epoch": 0.16632722335369993, + "grad_norm": 51.882904052734375, + "learning_rate": 1.9734230911922795e-06, + "loss": 0.1811, + "num_input_tokens_seen": 1884096, + "step": 1960 + }, + { + "epoch": 0.16675152749490835, + "grad_norm": 63.34104919433594, + "learning_rate": 1.9730828112563852e-06, + "loss": 0.0921, + "num_input_tokens_seen": 1888832, + "step": 1965 + }, + { + "epoch": 0.16717583163611677, + "grad_norm": 32.06380081176758, + "learning_rate": 1.972740396466734e-06, + "loss": 0.0428, + "num_input_tokens_seen": 1893696, + "step": 1970 + }, + { + "epoch": 0.1676001357773252, + "grad_norm": 2.3903558254241943, + "learning_rate": 1.972395847574552e-06, + "loss": 0.0128, + "num_input_tokens_seen": 1898176, + "step": 1975 + }, + { + "epoch": 0.1680244399185336, + "grad_norm": 6.827738285064697, + "learning_rate": 1.972049165335747e-06, + "loss": 0.092, + "num_input_tokens_seen": 1902720, + "step": 1980 + }, + { + "epoch": 0.16844874405974203, + "grad_norm": 25.457487106323242, + "learning_rate": 1.9717003505109094e-06, + "loss": 0.0494, + "num_input_tokens_seen": 1907520, + "step": 1985 + }, + { + "epoch": 0.16887304820095045, + "grad_norm": 13.417096138000488, + "learning_rate": 1.9713494038653054e-06, + "loss": 0.0955, + "num_input_tokens_seen": 1912000, + "step": 1990 + }, + { + "epoch": 0.16929735234215887, + "grad_norm": 5.452141761779785, + "learning_rate": 1.97099632616888e-06, + "loss": 0.0437, + "num_input_tokens_seen": 1916224, + "step": 1995 + }, + { + "epoch": 0.1697216564833673, + "grad_norm": 0.8709999918937683, + "learning_rate": 1.9706411181962534e-06, + "loss": 0.0532, + "num_input_tokens_seen": 1920896, + "step": 2000 + }, + { + "epoch": 0.1701459606245757, + "grad_norm": 17.97975730895996, + "learning_rate": 1.970283780726718e-06, + "loss": 0.0502, + "num_input_tokens_seen": 1925312, + "step": 2005 + }, + { + "epoch": 0.1705702647657841, + "grad_norm": 0.3186114728450775, + "learning_rate": 1.9699243145442397e-06, + "loss": 0.095, + "num_input_tokens_seen": 1929920, + "step": 2010 + }, + { + "epoch": 0.17099456890699252, + "grad_norm": 25.567066192626953, + "learning_rate": 1.9695627204374544e-06, + "loss": 0.0817, + "num_input_tokens_seen": 1934720, + "step": 2015 + }, + { + "epoch": 0.17141887304820094, + "grad_norm": 21.674535751342773, + "learning_rate": 1.969198999199666e-06, + "loss": 0.0332, + "num_input_tokens_seen": 1939584, + "step": 2020 + }, + { + "epoch": 0.17184317718940936, + "grad_norm": 8.817938804626465, + "learning_rate": 1.968833151628845e-06, + "loss": 0.125, + "num_input_tokens_seen": 1944576, + "step": 2025 + }, + { + "epoch": 0.17226748133061778, + "grad_norm": 28.406312942504883, + "learning_rate": 1.968465178527628e-06, + "loss": 0.1032, + "num_input_tokens_seen": 1948928, + "step": 2030 + }, + { + "epoch": 0.1726917854718262, + "grad_norm": 27.26176643371582, + "learning_rate": 1.9680950807033124e-06, + "loss": 0.0727, + "num_input_tokens_seen": 1953600, + "step": 2035 + }, + { + "epoch": 0.17311608961303462, + "grad_norm": 9.266279220581055, + "learning_rate": 1.96772285896786e-06, + "loss": 0.0892, + "num_input_tokens_seen": 1958592, + "step": 2040 + }, + { + "epoch": 0.17354039375424304, + "grad_norm": 2.9285852909088135, + "learning_rate": 1.9673485141378904e-06, + "loss": 0.042, + "num_input_tokens_seen": 1962752, + "step": 2045 + }, + { + "epoch": 0.17396469789545146, + "grad_norm": 23.246116638183594, + "learning_rate": 1.9669720470346817e-06, + "loss": 0.1337, + "num_input_tokens_seen": 1967424, + "step": 2050 + }, + { + "epoch": 0.17438900203665988, + "grad_norm": 5.3057541847229, + "learning_rate": 1.966593458484168e-06, + "loss": 0.0393, + "num_input_tokens_seen": 1972736, + "step": 2055 + }, + { + "epoch": 0.1748133061778683, + "grad_norm": 13.531951904296875, + "learning_rate": 1.9662127493169367e-06, + "loss": 0.0351, + "num_input_tokens_seen": 1977408, + "step": 2060 + }, + { + "epoch": 0.17523761031907673, + "grad_norm": 34.17113494873047, + "learning_rate": 1.96582992036823e-06, + "loss": 0.044, + "num_input_tokens_seen": 1982016, + "step": 2065 + }, + { + "epoch": 0.17566191446028515, + "grad_norm": 13.662484169006348, + "learning_rate": 1.9654449724779387e-06, + "loss": 0.114, + "num_input_tokens_seen": 1987392, + "step": 2070 + }, + { + "epoch": 0.17608621860149354, + "grad_norm": 12.098200798034668, + "learning_rate": 1.965057906490602e-06, + "loss": 0.0802, + "num_input_tokens_seen": 1992064, + "step": 2075 + }, + { + "epoch": 0.17651052274270196, + "grad_norm": 20.902666091918945, + "learning_rate": 1.964668723255408e-06, + "loss": 0.0644, + "num_input_tokens_seen": 1997120, + "step": 2080 + }, + { + "epoch": 0.17693482688391038, + "grad_norm": 21.382699966430664, + "learning_rate": 1.964277423626188e-06, + "loss": 0.0501, + "num_input_tokens_seen": 2001664, + "step": 2085 + }, + { + "epoch": 0.1773591310251188, + "grad_norm": 22.832244873046875, + "learning_rate": 1.9638840084614178e-06, + "loss": 0.0941, + "num_input_tokens_seen": 2006336, + "step": 2090 + }, + { + "epoch": 0.17778343516632722, + "grad_norm": 20.812602996826172, + "learning_rate": 1.963488478624214e-06, + "loss": 0.0483, + "num_input_tokens_seen": 2011264, + "step": 2095 + }, + { + "epoch": 0.17820773930753564, + "grad_norm": 20.94266128540039, + "learning_rate": 1.9630908349823315e-06, + "loss": 0.0896, + "num_input_tokens_seen": 2015680, + "step": 2100 + }, + { + "epoch": 0.17863204344874406, + "grad_norm": 22.355464935302734, + "learning_rate": 1.9626910784081647e-06, + "loss": 0.1457, + "num_input_tokens_seen": 2020352, + "step": 2105 + }, + { + "epoch": 0.17905634758995248, + "grad_norm": 8.69415283203125, + "learning_rate": 1.9622892097787426e-06, + "loss": 0.0603, + "num_input_tokens_seen": 2024832, + "step": 2110 + }, + { + "epoch": 0.1794806517311609, + "grad_norm": 12.219429016113281, + "learning_rate": 1.961885229975727e-06, + "loss": 0.0261, + "num_input_tokens_seen": 2029184, + "step": 2115 + }, + { + "epoch": 0.17990495587236932, + "grad_norm": 16.649051666259766, + "learning_rate": 1.9614791398854133e-06, + "loss": 0.0787, + "num_input_tokens_seen": 2034048, + "step": 2120 + }, + { + "epoch": 0.18032926001357774, + "grad_norm": 7.8996357917785645, + "learning_rate": 1.9610709403987244e-06, + "loss": 0.0453, + "num_input_tokens_seen": 2039232, + "step": 2125 + }, + { + "epoch": 0.18075356415478616, + "grad_norm": 13.233209609985352, + "learning_rate": 1.9606606324112134e-06, + "loss": 0.0774, + "num_input_tokens_seen": 2043712, + "step": 2130 + }, + { + "epoch": 0.18117786829599458, + "grad_norm": 17.83286476135254, + "learning_rate": 1.9602482168230576e-06, + "loss": 0.1347, + "num_input_tokens_seen": 2048576, + "step": 2135 + }, + { + "epoch": 0.18160217243720297, + "grad_norm": 16.00604820251465, + "learning_rate": 1.9598336945390584e-06, + "loss": 0.071, + "num_input_tokens_seen": 2053440, + "step": 2140 + }, + { + "epoch": 0.1820264765784114, + "grad_norm": 3.1249139308929443, + "learning_rate": 1.95941706646864e-06, + "loss": 0.0639, + "num_input_tokens_seen": 2058304, + "step": 2145 + }, + { + "epoch": 0.18245078071961982, + "grad_norm": 6.381864070892334, + "learning_rate": 1.9589983335258457e-06, + "loss": 0.0581, + "num_input_tokens_seen": 2062720, + "step": 2150 + }, + { + "epoch": 0.18287508486082824, + "grad_norm": 3.6606667041778564, + "learning_rate": 1.9585774966293365e-06, + "loss": 0.0886, + "num_input_tokens_seen": 2067264, + "step": 2155 + }, + { + "epoch": 0.18329938900203666, + "grad_norm": 11.423752784729004, + "learning_rate": 1.95815455670239e-06, + "loss": 0.0804, + "num_input_tokens_seen": 2071744, + "step": 2160 + }, + { + "epoch": 0.18372369314324508, + "grad_norm": 16.14927101135254, + "learning_rate": 1.957729514672897e-06, + "loss": 0.0776, + "num_input_tokens_seen": 2076352, + "step": 2165 + }, + { + "epoch": 0.1841479972844535, + "grad_norm": 20.34837532043457, + "learning_rate": 1.957302371473361e-06, + "loss": 0.0654, + "num_input_tokens_seen": 2081088, + "step": 2170 + }, + { + "epoch": 0.18457230142566192, + "grad_norm": 21.14691162109375, + "learning_rate": 1.9568731280408945e-06, + "loss": 0.0651, + "num_input_tokens_seen": 2085760, + "step": 2175 + }, + { + "epoch": 0.18499660556687034, + "grad_norm": 8.296751976013184, + "learning_rate": 1.956441785317217e-06, + "loss": 0.0981, + "num_input_tokens_seen": 2090624, + "step": 2180 + }, + { + "epoch": 0.18542090970807876, + "grad_norm": 8.130701065063477, + "learning_rate": 1.9560083442486565e-06, + "loss": 0.0606, + "num_input_tokens_seen": 2095936, + "step": 2185 + }, + { + "epoch": 0.18584521384928718, + "grad_norm": 8.967162132263184, + "learning_rate": 1.955572805786141e-06, + "loss": 0.081, + "num_input_tokens_seen": 2100608, + "step": 2190 + }, + { + "epoch": 0.1862695179904956, + "grad_norm": 20.573461532592773, + "learning_rate": 1.9551351708852015e-06, + "loss": 0.0897, + "num_input_tokens_seen": 2105856, + "step": 2195 + }, + { + "epoch": 0.18669382213170402, + "grad_norm": 7.168735027313232, + "learning_rate": 1.9546954405059697e-06, + "loss": 0.0448, + "num_input_tokens_seen": 2110464, + "step": 2200 + }, + { + "epoch": 0.1871181262729124, + "grad_norm": 13.694960594177246, + "learning_rate": 1.954253615613173e-06, + "loss": 0.1086, + "num_input_tokens_seen": 2115648, + "step": 2205 + }, + { + "epoch": 0.18754243041412083, + "grad_norm": 3.1330201625823975, + "learning_rate": 1.9538096971761343e-06, + "loss": 0.043, + "num_input_tokens_seen": 2120256, + "step": 2210 + }, + { + "epoch": 0.18796673455532925, + "grad_norm": 4.10610294342041, + "learning_rate": 1.9533636861687696e-06, + "loss": 0.1077, + "num_input_tokens_seen": 2124672, + "step": 2215 + }, + { + "epoch": 0.18839103869653767, + "grad_norm": 43.373619079589844, + "learning_rate": 1.9529155835695855e-06, + "loss": 0.0675, + "num_input_tokens_seen": 2129344, + "step": 2220 + }, + { + "epoch": 0.1888153428377461, + "grad_norm": 23.113445281982422, + "learning_rate": 1.952465390361678e-06, + "loss": 0.0677, + "num_input_tokens_seen": 2134336, + "step": 2225 + }, + { + "epoch": 0.1892396469789545, + "grad_norm": 8.734219551086426, + "learning_rate": 1.95201310753273e-06, + "loss": 0.0558, + "num_input_tokens_seen": 2139456, + "step": 2230 + }, + { + "epoch": 0.18966395112016293, + "grad_norm": 4.216504096984863, + "learning_rate": 1.9515587360750068e-06, + "loss": 0.1059, + "num_input_tokens_seen": 2144640, + "step": 2235 + }, + { + "epoch": 0.19008825526137135, + "grad_norm": 11.680764198303223, + "learning_rate": 1.9511022769853586e-06, + "loss": 0.1091, + "num_input_tokens_seen": 2149760, + "step": 2240 + }, + { + "epoch": 0.19051255940257977, + "grad_norm": 22.223356246948242, + "learning_rate": 1.9506437312652144e-06, + "loss": 0.1657, + "num_input_tokens_seen": 2155200, + "step": 2245 + }, + { + "epoch": 0.1909368635437882, + "grad_norm": 2.622177839279175, + "learning_rate": 1.9501830999205806e-06, + "loss": 0.1252, + "num_input_tokens_seen": 2159872, + "step": 2250 + }, + { + "epoch": 0.1913611676849966, + "grad_norm": 17.01951789855957, + "learning_rate": 1.9497203839620398e-06, + "loss": 0.0864, + "num_input_tokens_seen": 2164544, + "step": 2255 + }, + { + "epoch": 0.19178547182620503, + "grad_norm": 4.09209680557251, + "learning_rate": 1.9492555844047483e-06, + "loss": 0.0384, + "num_input_tokens_seen": 2169856, + "step": 2260 + }, + { + "epoch": 0.19220977596741345, + "grad_norm": 0.8658658266067505, + "learning_rate": 1.9487887022684334e-06, + "loss": 0.0385, + "num_input_tokens_seen": 2174400, + "step": 2265 + }, + { + "epoch": 0.19263408010862185, + "grad_norm": 7.3754730224609375, + "learning_rate": 1.9483197385773913e-06, + "loss": 0.0915, + "num_input_tokens_seen": 2179200, + "step": 2270 + }, + { + "epoch": 0.19305838424983027, + "grad_norm": 7.779440402984619, + "learning_rate": 1.947848694360485e-06, + "loss": 0.0774, + "num_input_tokens_seen": 2184768, + "step": 2275 + }, + { + "epoch": 0.1934826883910387, + "grad_norm": 50.24574279785156, + "learning_rate": 1.947375570651142e-06, + "loss": 0.0947, + "num_input_tokens_seen": 2189824, + "step": 2280 + }, + { + "epoch": 0.1939069925322471, + "grad_norm": 1.2048569917678833, + "learning_rate": 1.9469003684873514e-06, + "loss": 0.0549, + "num_input_tokens_seen": 2194752, + "step": 2285 + }, + { + "epoch": 0.19433129667345553, + "grad_norm": 26.04237174987793, + "learning_rate": 1.946423088911664e-06, + "loss": 0.1116, + "num_input_tokens_seen": 2199552, + "step": 2290 + }, + { + "epoch": 0.19475560081466395, + "grad_norm": 17.550045013427734, + "learning_rate": 1.9459437329711865e-06, + "loss": 0.1976, + "num_input_tokens_seen": 2204288, + "step": 2295 + }, + { + "epoch": 0.19517990495587237, + "grad_norm": 24.590728759765625, + "learning_rate": 1.945462301717581e-06, + "loss": 0.0683, + "num_input_tokens_seen": 2209344, + "step": 2300 + }, + { + "epoch": 0.1956042090970808, + "grad_norm": 7.054686546325684, + "learning_rate": 1.944978796207064e-06, + "loss": 0.0837, + "num_input_tokens_seen": 2214208, + "step": 2305 + }, + { + "epoch": 0.1960285132382892, + "grad_norm": 22.503530502319336, + "learning_rate": 1.9444932175004017e-06, + "loss": 0.0974, + "num_input_tokens_seen": 2218624, + "step": 2310 + }, + { + "epoch": 0.19645281737949763, + "grad_norm": 20.48867416381836, + "learning_rate": 1.9440055666629087e-06, + "loss": 0.0717, + "num_input_tokens_seen": 2223872, + "step": 2315 + }, + { + "epoch": 0.19687712152070605, + "grad_norm": 1.9906337261199951, + "learning_rate": 1.943515844764446e-06, + "loss": 0.0729, + "num_input_tokens_seen": 2228096, + "step": 2320 + }, + { + "epoch": 0.19730142566191447, + "grad_norm": 25.787109375, + "learning_rate": 1.943024052879418e-06, + "loss": 0.1151, + "num_input_tokens_seen": 2232384, + "step": 2325 + }, + { + "epoch": 0.1977257298031229, + "grad_norm": 4.576552867889404, + "learning_rate": 1.9425301920867703e-06, + "loss": 0.0336, + "num_input_tokens_seen": 2237184, + "step": 2330 + }, + { + "epoch": 0.19815003394433128, + "grad_norm": 18.02105140686035, + "learning_rate": 1.942034263469989e-06, + "loss": 0.1116, + "num_input_tokens_seen": 2242048, + "step": 2335 + }, + { + "epoch": 0.1985743380855397, + "grad_norm": 3.6412253379821777, + "learning_rate": 1.941536268117095e-06, + "loss": 0.0395, + "num_input_tokens_seen": 2247104, + "step": 2340 + }, + { + "epoch": 0.19899864222674812, + "grad_norm": 9.237351417541504, + "learning_rate": 1.9410362071206436e-06, + "loss": 0.089, + "num_input_tokens_seen": 2251840, + "step": 2345 + }, + { + "epoch": 0.19942294636795654, + "grad_norm": 29.391401290893555, + "learning_rate": 1.9405340815777232e-06, + "loss": 0.1879, + "num_input_tokens_seen": 2256832, + "step": 2350 + }, + { + "epoch": 0.19984725050916496, + "grad_norm": 29.297670364379883, + "learning_rate": 1.9400298925899505e-06, + "loss": 0.0277, + "num_input_tokens_seen": 2261120, + "step": 2355 + }, + { + "epoch": 0.20027155465037338, + "grad_norm": 8.756644248962402, + "learning_rate": 1.939523641263469e-06, + "loss": 0.1436, + "num_input_tokens_seen": 2266496, + "step": 2360 + }, + { + "epoch": 0.20027155465037338, + "eval_loss": 0.0887659341096878, + "eval_runtime": 15.9713, + "eval_samples_per_second": 655.866, + "eval_steps_per_second": 82.022, + "num_input_tokens_seen": 2266496, + "step": 2360 + }, + { + "epoch": 0.2006958587915818, + "grad_norm": 5.741673469543457, + "learning_rate": 1.9390153287089485e-06, + "loss": 0.0249, + "num_input_tokens_seen": 2271040, + "step": 2365 + }, + { + "epoch": 0.20112016293279023, + "grad_norm": 30.594064712524414, + "learning_rate": 1.938504956041579e-06, + "loss": 0.1026, + "num_input_tokens_seen": 2276096, + "step": 2370 + }, + { + "epoch": 0.20154446707399865, + "grad_norm": 20.060314178466797, + "learning_rate": 1.937992524381071e-06, + "loss": 0.0498, + "num_input_tokens_seen": 2280192, + "step": 2375 + }, + { + "epoch": 0.20196877121520707, + "grad_norm": 17.804256439208984, + "learning_rate": 1.9374780348516525e-06, + "loss": 0.0528, + "num_input_tokens_seen": 2284672, + "step": 2380 + }, + { + "epoch": 0.20239307535641549, + "grad_norm": 6.91187047958374, + "learning_rate": 1.9369614885820657e-06, + "loss": 0.1108, + "num_input_tokens_seen": 2289664, + "step": 2385 + }, + { + "epoch": 0.2028173794976239, + "grad_norm": 25.068439483642578, + "learning_rate": 1.9364428867055655e-06, + "loss": 0.0978, + "num_input_tokens_seen": 2294976, + "step": 2390 + }, + { + "epoch": 0.20324168363883233, + "grad_norm": 0.30525487661361694, + "learning_rate": 1.935922230359916e-06, + "loss": 0.0427, + "num_input_tokens_seen": 2299584, + "step": 2395 + }, + { + "epoch": 0.20366598778004075, + "grad_norm": 19.536846160888672, + "learning_rate": 1.9353995206873898e-06, + "loss": 0.067, + "num_input_tokens_seen": 2304320, + "step": 2400 + }, + { + "epoch": 0.20409029192124914, + "grad_norm": 18.92000961303711, + "learning_rate": 1.9348747588347637e-06, + "loss": 0.1673, + "num_input_tokens_seen": 2308928, + "step": 2405 + }, + { + "epoch": 0.20451459606245756, + "grad_norm": 2.1541614532470703, + "learning_rate": 1.9343479459533157e-06, + "loss": 0.0387, + "num_input_tokens_seen": 2313280, + "step": 2410 + }, + { + "epoch": 0.20493890020366598, + "grad_norm": 15.7988920211792, + "learning_rate": 1.933819083198826e-06, + "loss": 0.1093, + "num_input_tokens_seen": 2318400, + "step": 2415 + }, + { + "epoch": 0.2053632043448744, + "grad_norm": 13.255326271057129, + "learning_rate": 1.9332881717315694e-06, + "loss": 0.0704, + "num_input_tokens_seen": 2323712, + "step": 2420 + }, + { + "epoch": 0.20578750848608282, + "grad_norm": 7.066593647003174, + "learning_rate": 1.9327552127163172e-06, + "loss": 0.0282, + "num_input_tokens_seen": 2327936, + "step": 2425 + }, + { + "epoch": 0.20621181262729124, + "grad_norm": 0.11941832304000854, + "learning_rate": 1.932220207322332e-06, + "loss": 0.0612, + "num_input_tokens_seen": 2332224, + "step": 2430 + }, + { + "epoch": 0.20663611676849966, + "grad_norm": 20.786226272583008, + "learning_rate": 1.931683156723366e-06, + "loss": 0.0718, + "num_input_tokens_seen": 2336704, + "step": 2435 + }, + { + "epoch": 0.20706042090970808, + "grad_norm": 0.507086455821991, + "learning_rate": 1.9311440620976595e-06, + "loss": 0.0876, + "num_input_tokens_seen": 2341888, + "step": 2440 + }, + { + "epoch": 0.2074847250509165, + "grad_norm": 2.0232927799224854, + "learning_rate": 1.930602924627935e-06, + "loss": 0.0747, + "num_input_tokens_seen": 2346432, + "step": 2445 + }, + { + "epoch": 0.20790902919212492, + "grad_norm": 0.4495334327220917, + "learning_rate": 1.930059745501399e-06, + "loss": 0.0499, + "num_input_tokens_seen": 2350656, + "step": 2450 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 16.482301712036133, + "learning_rate": 1.9295145259097362e-06, + "loss": 0.0438, + "num_input_tokens_seen": 2355392, + "step": 2455 + }, + { + "epoch": 0.20875763747454176, + "grad_norm": 0.18059544265270233, + "learning_rate": 1.9289672670491076e-06, + "loss": 0.0241, + "num_input_tokens_seen": 2360320, + "step": 2460 + }, + { + "epoch": 0.20918194161575018, + "grad_norm": 2.5006070137023926, + "learning_rate": 1.928417970120149e-06, + "loss": 0.0112, + "num_input_tokens_seen": 2365120, + "step": 2465 + }, + { + "epoch": 0.20960624575695858, + "grad_norm": 4.708000659942627, + "learning_rate": 1.9278666363279664e-06, + "loss": 0.1338, + "num_input_tokens_seen": 2369920, + "step": 2470 + }, + { + "epoch": 0.210030549898167, + "grad_norm": 9.103507041931152, + "learning_rate": 1.9273132668821363e-06, + "loss": 0.0943, + "num_input_tokens_seen": 2374848, + "step": 2475 + }, + { + "epoch": 0.21045485403937542, + "grad_norm": 0.2682408392429352, + "learning_rate": 1.926757862996699e-06, + "loss": 0.0583, + "num_input_tokens_seen": 2380032, + "step": 2480 + }, + { + "epoch": 0.21087915818058384, + "grad_norm": 35.878334045410156, + "learning_rate": 1.92620042589016e-06, + "loss": 0.1324, + "num_input_tokens_seen": 2384704, + "step": 2485 + }, + { + "epoch": 0.21130346232179226, + "grad_norm": 51.964813232421875, + "learning_rate": 1.9256409567854847e-06, + "loss": 0.0661, + "num_input_tokens_seen": 2389568, + "step": 2490 + }, + { + "epoch": 0.21172776646300068, + "grad_norm": 23.085674285888672, + "learning_rate": 1.9250794569100963e-06, + "loss": 0.1469, + "num_input_tokens_seen": 2394560, + "step": 2495 + }, + { + "epoch": 0.2121520706042091, + "grad_norm": 2.490314245223999, + "learning_rate": 1.9245159274958737e-06, + "loss": 0.0676, + "num_input_tokens_seen": 2399232, + "step": 2500 + }, + { + "epoch": 0.21257637474541752, + "grad_norm": 14.327190399169922, + "learning_rate": 1.9239503697791487e-06, + "loss": 0.1362, + "num_input_tokens_seen": 2404032, + "step": 2505 + }, + { + "epoch": 0.21300067888662594, + "grad_norm": 10.138527870178223, + "learning_rate": 1.9233827850007024e-06, + "loss": 0.0744, + "num_input_tokens_seen": 2408576, + "step": 2510 + }, + { + "epoch": 0.21342498302783436, + "grad_norm": 10.438551902770996, + "learning_rate": 1.9228131744057633e-06, + "loss": 0.0727, + "num_input_tokens_seen": 2413440, + "step": 2515 + }, + { + "epoch": 0.21384928716904278, + "grad_norm": 10.9727144241333, + "learning_rate": 1.922241539244005e-06, + "loss": 0.0503, + "num_input_tokens_seen": 2417664, + "step": 2520 + }, + { + "epoch": 0.2142735913102512, + "grad_norm": 4.456893444061279, + "learning_rate": 1.921667880769541e-06, + "loss": 0.0807, + "num_input_tokens_seen": 2422464, + "step": 2525 + }, + { + "epoch": 0.21469789545145962, + "grad_norm": 9.519829750061035, + "learning_rate": 1.921092200240926e-06, + "loss": 0.039, + "num_input_tokens_seen": 2427200, + "step": 2530 + }, + { + "epoch": 0.215122199592668, + "grad_norm": 0.6565377712249756, + "learning_rate": 1.9205144989211495e-06, + "loss": 0.0421, + "num_input_tokens_seen": 2431616, + "step": 2535 + }, + { + "epoch": 0.21554650373387643, + "grad_norm": 8.525800704956055, + "learning_rate": 1.919934778077635e-06, + "loss": 0.0555, + "num_input_tokens_seen": 2436352, + "step": 2540 + }, + { + "epoch": 0.21597080787508485, + "grad_norm": 8.354979515075684, + "learning_rate": 1.9193530389822362e-06, + "loss": 0.0429, + "num_input_tokens_seen": 2440704, + "step": 2545 + }, + { + "epoch": 0.21639511201629327, + "grad_norm": 8.60445785522461, + "learning_rate": 1.918769282911235e-06, + "loss": 0.0246, + "num_input_tokens_seen": 2446272, + "step": 2550 + }, + { + "epoch": 0.2168194161575017, + "grad_norm": 30.532001495361328, + "learning_rate": 1.9181835111453383e-06, + "loss": 0.0731, + "num_input_tokens_seen": 2450496, + "step": 2555 + }, + { + "epoch": 0.2172437202987101, + "grad_norm": 49.82064437866211, + "learning_rate": 1.9175957249696755e-06, + "loss": 0.0483, + "num_input_tokens_seen": 2455424, + "step": 2560 + }, + { + "epoch": 0.21766802443991853, + "grad_norm": 38.16300964355469, + "learning_rate": 1.9170059256737946e-06, + "loss": 0.0535, + "num_input_tokens_seen": 2460352, + "step": 2565 + }, + { + "epoch": 0.21809232858112695, + "grad_norm": 14.842656135559082, + "learning_rate": 1.9164141145516613e-06, + "loss": 0.1311, + "num_input_tokens_seen": 2464896, + "step": 2570 + }, + { + "epoch": 0.21851663272233537, + "grad_norm": 0.09945914894342422, + "learning_rate": 1.915820292901654e-06, + "loss": 0.1178, + "num_input_tokens_seen": 2469824, + "step": 2575 + }, + { + "epoch": 0.2189409368635438, + "grad_norm": 12.554542541503906, + "learning_rate": 1.915224462026563e-06, + "loss": 0.1612, + "num_input_tokens_seen": 2474176, + "step": 2580 + }, + { + "epoch": 0.21936524100475221, + "grad_norm": 16.17306900024414, + "learning_rate": 1.9146266232335854e-06, + "loss": 0.1505, + "num_input_tokens_seen": 2479232, + "step": 2585 + }, + { + "epoch": 0.21978954514596064, + "grad_norm": 18.73937225341797, + "learning_rate": 1.914026777834325e-06, + "loss": 0.1221, + "num_input_tokens_seen": 2483648, + "step": 2590 + }, + { + "epoch": 0.22021384928716906, + "grad_norm": 9.550419807434082, + "learning_rate": 1.9134249271447872e-06, + "loss": 0.0517, + "num_input_tokens_seen": 2488064, + "step": 2595 + }, + { + "epoch": 0.22063815342837745, + "grad_norm": 4.426822185516357, + "learning_rate": 1.9128210724853765e-06, + "loss": 0.0604, + "num_input_tokens_seen": 2493184, + "step": 2600 + }, + { + "epoch": 0.22106245756958587, + "grad_norm": 8.261371612548828, + "learning_rate": 1.912215215180894e-06, + "loss": 0.094, + "num_input_tokens_seen": 2498048, + "step": 2605 + }, + { + "epoch": 0.2214867617107943, + "grad_norm": 17.701379776000977, + "learning_rate": 1.9116073565605347e-06, + "loss": 0.1097, + "num_input_tokens_seen": 2504064, + "step": 2610 + }, + { + "epoch": 0.2219110658520027, + "grad_norm": 6.866028785705566, + "learning_rate": 1.9109974979578847e-06, + "loss": 0.0698, + "num_input_tokens_seen": 2508800, + "step": 2615 + }, + { + "epoch": 0.22233536999321113, + "grad_norm": 3.621572732925415, + "learning_rate": 1.9103856407109172e-06, + "loss": 0.0328, + "num_input_tokens_seen": 2513280, + "step": 2620 + }, + { + "epoch": 0.22275967413441955, + "grad_norm": 8.503085136413574, + "learning_rate": 1.9097717861619907e-06, + "loss": 0.0484, + "num_input_tokens_seen": 2518080, + "step": 2625 + }, + { + "epoch": 0.22318397827562797, + "grad_norm": 4.364653587341309, + "learning_rate": 1.9091559356578445e-06, + "loss": 0.0655, + "num_input_tokens_seen": 2522688, + "step": 2630 + }, + { + "epoch": 0.2236082824168364, + "grad_norm": 0.2347300499677658, + "learning_rate": 1.9085380905495985e-06, + "loss": 0.0933, + "num_input_tokens_seen": 2527040, + "step": 2635 + }, + { + "epoch": 0.2240325865580448, + "grad_norm": 3.144343852996826, + "learning_rate": 1.9079182521927475e-06, + "loss": 0.0743, + "num_input_tokens_seen": 2531584, + "step": 2640 + }, + { + "epoch": 0.22445689069925323, + "grad_norm": 0.9802240133285522, + "learning_rate": 1.9072964219471594e-06, + "loss": 0.1144, + "num_input_tokens_seen": 2536384, + "step": 2645 + }, + { + "epoch": 0.22488119484046165, + "grad_norm": 21.657424926757812, + "learning_rate": 1.9066726011770724e-06, + "loss": 0.0821, + "num_input_tokens_seen": 2540800, + "step": 2650 + }, + { + "epoch": 0.22530549898167007, + "grad_norm": 1.9864215850830078, + "learning_rate": 1.906046791251092e-06, + "loss": 0.02, + "num_input_tokens_seen": 2545600, + "step": 2655 + }, + { + "epoch": 0.2257298031228785, + "grad_norm": 36.35599899291992, + "learning_rate": 1.9054189935421868e-06, + "loss": 0.1331, + "num_input_tokens_seen": 2550528, + "step": 2660 + }, + { + "epoch": 0.22615410726408688, + "grad_norm": 9.535066604614258, + "learning_rate": 1.9047892094276871e-06, + "loss": 0.0412, + "num_input_tokens_seen": 2555328, + "step": 2665 + }, + { + "epoch": 0.2265784114052953, + "grad_norm": 13.776278495788574, + "learning_rate": 1.9041574402892813e-06, + "loss": 0.0844, + "num_input_tokens_seen": 2560320, + "step": 2670 + }, + { + "epoch": 0.22700271554650372, + "grad_norm": 6.368226051330566, + "learning_rate": 1.903523687513012e-06, + "loss": 0.0467, + "num_input_tokens_seen": 2564800, + "step": 2675 + }, + { + "epoch": 0.22742701968771215, + "grad_norm": 20.76290512084961, + "learning_rate": 1.902887952489275e-06, + "loss": 0.0676, + "num_input_tokens_seen": 2569664, + "step": 2680 + }, + { + "epoch": 0.22785132382892057, + "grad_norm": 0.438536137342453, + "learning_rate": 1.9022502366128132e-06, + "loss": 0.0301, + "num_input_tokens_seen": 2574592, + "step": 2685 + }, + { + "epoch": 0.22827562797012899, + "grad_norm": 8.510451316833496, + "learning_rate": 1.9016105412827173e-06, + "loss": 0.1049, + "num_input_tokens_seen": 2579328, + "step": 2690 + }, + { + "epoch": 0.2286999321113374, + "grad_norm": 5.517171859741211, + "learning_rate": 1.9009688679024189e-06, + "loss": 0.0181, + "num_input_tokens_seen": 2584896, + "step": 2695 + }, + { + "epoch": 0.22912423625254583, + "grad_norm": 15.41623592376709, + "learning_rate": 1.9003252178796907e-06, + "loss": 0.1308, + "num_input_tokens_seen": 2589504, + "step": 2700 + }, + { + "epoch": 0.22954854039375425, + "grad_norm": 24.895021438598633, + "learning_rate": 1.8996795926266412e-06, + "loss": 0.0851, + "num_input_tokens_seen": 2594304, + "step": 2705 + }, + { + "epoch": 0.22997284453496267, + "grad_norm": 17.133085250854492, + "learning_rate": 1.899031993559712e-06, + "loss": 0.0292, + "num_input_tokens_seen": 2598784, + "step": 2710 + }, + { + "epoch": 0.2303971486761711, + "grad_norm": 25.906352996826172, + "learning_rate": 1.8983824220996764e-06, + "loss": 0.0709, + "num_input_tokens_seen": 2603712, + "step": 2715 + }, + { + "epoch": 0.2308214528173795, + "grad_norm": 21.616334915161133, + "learning_rate": 1.8977308796716338e-06, + "loss": 0.0578, + "num_input_tokens_seen": 2608320, + "step": 2720 + }, + { + "epoch": 0.23124575695858793, + "grad_norm": 1.3934400081634521, + "learning_rate": 1.897077367705008e-06, + "loss": 0.0302, + "num_input_tokens_seen": 2613248, + "step": 2725 + }, + { + "epoch": 0.23167006109979632, + "grad_norm": 24.00430679321289, + "learning_rate": 1.896421887633544e-06, + "loss": 0.0637, + "num_input_tokens_seen": 2617664, + "step": 2730 + }, + { + "epoch": 0.23209436524100474, + "grad_norm": 15.404330253601074, + "learning_rate": 1.8957644408953044e-06, + "loss": 0.0716, + "num_input_tokens_seen": 2622016, + "step": 2735 + }, + { + "epoch": 0.23251866938221316, + "grad_norm": 23.977699279785156, + "learning_rate": 1.8951050289326664e-06, + "loss": 0.0438, + "num_input_tokens_seen": 2626368, + "step": 2740 + }, + { + "epoch": 0.23294297352342158, + "grad_norm": 6.890242099761963, + "learning_rate": 1.8944436531923193e-06, + "loss": 0.032, + "num_input_tokens_seen": 2630720, + "step": 2745 + }, + { + "epoch": 0.23336727766463, + "grad_norm": 39.53364181518555, + "learning_rate": 1.8937803151252603e-06, + "loss": 0.1554, + "num_input_tokens_seen": 2635456, + "step": 2750 + }, + { + "epoch": 0.23379158180583842, + "grad_norm": 24.008249282836914, + "learning_rate": 1.8931150161867915e-06, + "loss": 0.1055, + "num_input_tokens_seen": 2639744, + "step": 2755 + }, + { + "epoch": 0.23421588594704684, + "grad_norm": 34.880165100097656, + "learning_rate": 1.8924477578365177e-06, + "loss": 0.0627, + "num_input_tokens_seen": 2643904, + "step": 2760 + }, + { + "epoch": 0.23464019008825526, + "grad_norm": 9.298295021057129, + "learning_rate": 1.8917785415383415e-06, + "loss": 0.0403, + "num_input_tokens_seen": 2648768, + "step": 2765 + }, + { + "epoch": 0.23506449422946368, + "grad_norm": 7.759624004364014, + "learning_rate": 1.8911073687604622e-06, + "loss": 0.0271, + "num_input_tokens_seen": 2653952, + "step": 2770 + }, + { + "epoch": 0.2354887983706721, + "grad_norm": 0.04529079049825668, + "learning_rate": 1.8904342409753703e-06, + "loss": 0.0031, + "num_input_tokens_seen": 2658432, + "step": 2775 + }, + { + "epoch": 0.23591310251188052, + "grad_norm": 1.6445941925048828, + "learning_rate": 1.8897591596598464e-06, + "loss": 0.054, + "num_input_tokens_seen": 2663168, + "step": 2780 + }, + { + "epoch": 0.23633740665308894, + "grad_norm": 27.741294860839844, + "learning_rate": 1.8890821262949564e-06, + "loss": 0.1288, + "num_input_tokens_seen": 2668096, + "step": 2785 + }, + { + "epoch": 0.23676171079429736, + "grad_norm": 0.5501040816307068, + "learning_rate": 1.8884031423660488e-06, + "loss": 0.1241, + "num_input_tokens_seen": 2672576, + "step": 2790 + }, + { + "epoch": 0.23718601493550576, + "grad_norm": 25.323272705078125, + "learning_rate": 1.8877222093627517e-06, + "loss": 0.0832, + "num_input_tokens_seen": 2678336, + "step": 2795 + }, + { + "epoch": 0.23761031907671418, + "grad_norm": 8.880626678466797, + "learning_rate": 1.8870393287789694e-06, + "loss": 0.1352, + "num_input_tokens_seen": 2684288, + "step": 2800 + }, + { + "epoch": 0.2380346232179226, + "grad_norm": 25.005939483642578, + "learning_rate": 1.8863545021128781e-06, + "loss": 0.0542, + "num_input_tokens_seen": 2688704, + "step": 2805 + }, + { + "epoch": 0.23845892735913102, + "grad_norm": 15.758960723876953, + "learning_rate": 1.885667730866925e-06, + "loss": 0.0608, + "num_input_tokens_seen": 2693056, + "step": 2810 + }, + { + "epoch": 0.23888323150033944, + "grad_norm": 12.390170097351074, + "learning_rate": 1.884979016547822e-06, + "loss": 0.0844, + "num_input_tokens_seen": 2697280, + "step": 2815 + }, + { + "epoch": 0.23930753564154786, + "grad_norm": 8.885245323181152, + "learning_rate": 1.8842883606665457e-06, + "loss": 0.0769, + "num_input_tokens_seen": 2701440, + "step": 2820 + }, + { + "epoch": 0.23973183978275628, + "grad_norm": 2.2123122215270996, + "learning_rate": 1.88359576473833e-06, + "loss": 0.0935, + "num_input_tokens_seen": 2707264, + "step": 2825 + }, + { + "epoch": 0.2401561439239647, + "grad_norm": 16.648462295532227, + "learning_rate": 1.8829012302826674e-06, + "loss": 0.1271, + "num_input_tokens_seen": 2711680, + "step": 2830 + }, + { + "epoch": 0.24058044806517312, + "grad_norm": 0.9231572151184082, + "learning_rate": 1.8822047588233017e-06, + "loss": 0.0232, + "num_input_tokens_seen": 2716352, + "step": 2835 + }, + { + "epoch": 0.24100475220638154, + "grad_norm": 2.5872323513031006, + "learning_rate": 1.881506351888227e-06, + "loss": 0.0673, + "num_input_tokens_seen": 2721024, + "step": 2840 + }, + { + "epoch": 0.24142905634758996, + "grad_norm": 21.090572357177734, + "learning_rate": 1.8808060110096839e-06, + "loss": 0.0675, + "num_input_tokens_seen": 2725696, + "step": 2845 + }, + { + "epoch": 0.24185336048879838, + "grad_norm": 21.68260383605957, + "learning_rate": 1.8801037377241553e-06, + "loss": 0.0625, + "num_input_tokens_seen": 2731328, + "step": 2850 + }, + { + "epoch": 0.2422776646300068, + "grad_norm": 5.974689483642578, + "learning_rate": 1.879399533572364e-06, + "loss": 0.115, + "num_input_tokens_seen": 2736192, + "step": 2855 + }, + { + "epoch": 0.24270196877121522, + "grad_norm": 19.78734588623047, + "learning_rate": 1.8786934000992688e-06, + "loss": 0.0828, + "num_input_tokens_seen": 2740544, + "step": 2860 + }, + { + "epoch": 0.2431262729124236, + "grad_norm": 12.459096908569336, + "learning_rate": 1.877985338854061e-06, + "loss": 0.0668, + "num_input_tokens_seen": 2745472, + "step": 2865 + }, + { + "epoch": 0.24355057705363203, + "grad_norm": 1.3515022993087769, + "learning_rate": 1.877275351390162e-06, + "loss": 0.0126, + "num_input_tokens_seen": 2750528, + "step": 2870 + }, + { + "epoch": 0.24397488119484045, + "grad_norm": 0.6821182370185852, + "learning_rate": 1.8765634392652183e-06, + "loss": 0.0341, + "num_input_tokens_seen": 2755328, + "step": 2875 + }, + { + "epoch": 0.24439918533604887, + "grad_norm": 21.59737777709961, + "learning_rate": 1.8758496040410998e-06, + "loss": 0.0878, + "num_input_tokens_seen": 2760128, + "step": 2880 + }, + { + "epoch": 0.2448234894772573, + "grad_norm": 1.4333782196044922, + "learning_rate": 1.8751338472838942e-06, + "loss": 0.0316, + "num_input_tokens_seen": 2764992, + "step": 2885 + }, + { + "epoch": 0.24524779361846571, + "grad_norm": 18.27245330810547, + "learning_rate": 1.8744161705639065e-06, + "loss": 0.0563, + "num_input_tokens_seen": 2769408, + "step": 2890 + }, + { + "epoch": 0.24567209775967414, + "grad_norm": 16.279781341552734, + "learning_rate": 1.8736965754556526e-06, + "loss": 0.0999, + "num_input_tokens_seen": 2774336, + "step": 2895 + }, + { + "epoch": 0.24609640190088256, + "grad_norm": 25.436418533325195, + "learning_rate": 1.8729750635378578e-06, + "loss": 0.1851, + "num_input_tokens_seen": 2779136, + "step": 2900 + }, + { + "epoch": 0.24652070604209098, + "grad_norm": 25.405763626098633, + "learning_rate": 1.872251636393453e-06, + "loss": 0.0546, + "num_input_tokens_seen": 2784256, + "step": 2905 + }, + { + "epoch": 0.2469450101832994, + "grad_norm": 19.037561416625977, + "learning_rate": 1.8715262956095694e-06, + "loss": 0.1097, + "num_input_tokens_seen": 2788864, + "step": 2910 + }, + { + "epoch": 0.24736931432450782, + "grad_norm": 18.466859817504883, + "learning_rate": 1.8707990427775386e-06, + "loss": 0.0549, + "num_input_tokens_seen": 2793152, + "step": 2915 + }, + { + "epoch": 0.24779361846571624, + "grad_norm": 7.623482704162598, + "learning_rate": 1.870069879492886e-06, + "loss": 0.0782, + "num_input_tokens_seen": 2798144, + "step": 2920 + }, + { + "epoch": 0.24821792260692466, + "grad_norm": 0.18756820261478424, + "learning_rate": 1.869338807355328e-06, + "loss": 0.0769, + "num_input_tokens_seen": 2803136, + "step": 2925 + }, + { + "epoch": 0.24864222674813305, + "grad_norm": 18.723121643066406, + "learning_rate": 1.8686058279687699e-06, + "loss": 0.0514, + "num_input_tokens_seen": 2807744, + "step": 2930 + }, + { + "epoch": 0.24906653088934147, + "grad_norm": 1.2718958854675293, + "learning_rate": 1.8678709429413e-06, + "loss": 0.038, + "num_input_tokens_seen": 2812928, + "step": 2935 + }, + { + "epoch": 0.2494908350305499, + "grad_norm": 12.092676162719727, + "learning_rate": 1.867134153885189e-06, + "loss": 0.0578, + "num_input_tokens_seen": 2817536, + "step": 2940 + }, + { + "epoch": 0.2499151391717583, + "grad_norm": 2.50976300239563, + "learning_rate": 1.8663954624168832e-06, + "loss": 0.0818, + "num_input_tokens_seen": 2822784, + "step": 2945 + }, + { + "epoch": 0.25033944331296676, + "grad_norm": 0.1713555008172989, + "learning_rate": 1.8656548701570039e-06, + "loss": 0.0749, + "num_input_tokens_seen": 2827328, + "step": 2950 + }, + { + "epoch": 0.25033944331296676, + "eval_loss": 0.07611989974975586, + "eval_runtime": 15.9014, + "eval_samples_per_second": 658.746, + "eval_steps_per_second": 82.383, + "num_input_tokens_seen": 2827328, + "step": 2950 + }, + { + "epoch": 0.2507637474541752, + "grad_norm": 7.6525068283081055, + "learning_rate": 1.864912378730342e-06, + "loss": 0.0738, + "num_input_tokens_seen": 2832128, + "step": 2955 + }, + { + "epoch": 0.25118805159538354, + "grad_norm": 24.655349731445312, + "learning_rate": 1.8641679897658551e-06, + "loss": 0.149, + "num_input_tokens_seen": 2837824, + "step": 2960 + }, + { + "epoch": 0.25161235573659196, + "grad_norm": 3.085174798965454, + "learning_rate": 1.8634217048966633e-06, + "loss": 0.063, + "num_input_tokens_seen": 2842240, + "step": 2965 + }, + { + "epoch": 0.2520366598778004, + "grad_norm": 24.828632354736328, + "learning_rate": 1.8626735257600475e-06, + "loss": 0.0512, + "num_input_tokens_seen": 2846848, + "step": 2970 + }, + { + "epoch": 0.2524609640190088, + "grad_norm": 12.527300834655762, + "learning_rate": 1.8619234539974429e-06, + "loss": 0.0923, + "num_input_tokens_seen": 2851392, + "step": 2975 + }, + { + "epoch": 0.2528852681602172, + "grad_norm": 5.862428665161133, + "learning_rate": 1.8611714912544376e-06, + "loss": 0.0373, + "num_input_tokens_seen": 2855680, + "step": 2980 + }, + { + "epoch": 0.25330957230142565, + "grad_norm": 12.940213203430176, + "learning_rate": 1.860417639180769e-06, + "loss": 0.0624, + "num_input_tokens_seen": 2860544, + "step": 2985 + }, + { + "epoch": 0.25373387644263407, + "grad_norm": 8.12386417388916, + "learning_rate": 1.8596618994303183e-06, + "loss": 0.0577, + "num_input_tokens_seen": 2865152, + "step": 2990 + }, + { + "epoch": 0.2541581805838425, + "grad_norm": 0.8909376263618469, + "learning_rate": 1.858904273661109e-06, + "loss": 0.066, + "num_input_tokens_seen": 2870144, + "step": 2995 + }, + { + "epoch": 0.2545824847250509, + "grad_norm": 26.337459564208984, + "learning_rate": 1.8581447635353019e-06, + "loss": 0.0492, + "num_input_tokens_seen": 2875200, + "step": 3000 + }, + { + "epoch": 0.2550067888662593, + "grad_norm": 11.455595016479492, + "learning_rate": 1.8573833707191918e-06, + "loss": 0.1037, + "num_input_tokens_seen": 2880192, + "step": 3005 + }, + { + "epoch": 0.25543109300746775, + "grad_norm": 2.723041296005249, + "learning_rate": 1.8566200968832044e-06, + "loss": 0.1198, + "num_input_tokens_seen": 2884800, + "step": 3010 + }, + { + "epoch": 0.25585539714867617, + "grad_norm": 25.147613525390625, + "learning_rate": 1.855854943701892e-06, + "loss": 0.0739, + "num_input_tokens_seen": 2890176, + "step": 3015 + }, + { + "epoch": 0.2562797012898846, + "grad_norm": 23.688777923583984, + "learning_rate": 1.85508791285393e-06, + "loss": 0.1248, + "num_input_tokens_seen": 2895040, + "step": 3020 + }, + { + "epoch": 0.256704005431093, + "grad_norm": 5.152799606323242, + "learning_rate": 1.8543190060221125e-06, + "loss": 0.0693, + "num_input_tokens_seen": 2899776, + "step": 3025 + }, + { + "epoch": 0.25712830957230143, + "grad_norm": 6.354460716247559, + "learning_rate": 1.853548224893351e-06, + "loss": 0.0601, + "num_input_tokens_seen": 2904064, + "step": 3030 + }, + { + "epoch": 0.25755261371350985, + "grad_norm": 10.099621772766113, + "learning_rate": 1.8527755711586678e-06, + "loss": 0.0395, + "num_input_tokens_seen": 2908800, + "step": 3035 + }, + { + "epoch": 0.25797691785471827, + "grad_norm": 20.724409103393555, + "learning_rate": 1.8520010465131935e-06, + "loss": 0.0432, + "num_input_tokens_seen": 2913216, + "step": 3040 + }, + { + "epoch": 0.2584012219959267, + "grad_norm": 12.310056686401367, + "learning_rate": 1.8512246526561636e-06, + "loss": 0.0755, + "num_input_tokens_seen": 2917504, + "step": 3045 + }, + { + "epoch": 0.2588255261371351, + "grad_norm": 21.255277633666992, + "learning_rate": 1.8504463912909149e-06, + "loss": 0.1289, + "num_input_tokens_seen": 2922752, + "step": 3050 + }, + { + "epoch": 0.25924983027834353, + "grad_norm": 16.850648880004883, + "learning_rate": 1.8496662641248807e-06, + "loss": 0.0354, + "num_input_tokens_seen": 2928000, + "step": 3055 + }, + { + "epoch": 0.25967413441955195, + "grad_norm": 26.931428909301758, + "learning_rate": 1.8488842728695874e-06, + "loss": 0.0658, + "num_input_tokens_seen": 2932736, + "step": 3060 + }, + { + "epoch": 0.26009843856076037, + "grad_norm": 22.07209014892578, + "learning_rate": 1.8481004192406525e-06, + "loss": 0.1191, + "num_input_tokens_seen": 2937664, + "step": 3065 + }, + { + "epoch": 0.2605227427019688, + "grad_norm": 43.74690628051758, + "learning_rate": 1.8473147049577773e-06, + "loss": 0.1139, + "num_input_tokens_seen": 2942784, + "step": 3070 + }, + { + "epoch": 0.2609470468431772, + "grad_norm": 18.454248428344727, + "learning_rate": 1.8465271317447474e-06, + "loss": 0.1196, + "num_input_tokens_seen": 2947840, + "step": 3075 + }, + { + "epoch": 0.26137135098438563, + "grad_norm": 33.043155670166016, + "learning_rate": 1.845737701329425e-06, + "loss": 0.0741, + "num_input_tokens_seen": 2952448, + "step": 3080 + }, + { + "epoch": 0.26179565512559405, + "grad_norm": 2.562422752380371, + "learning_rate": 1.8449464154437475e-06, + "loss": 0.0241, + "num_input_tokens_seen": 2957120, + "step": 3085 + }, + { + "epoch": 0.26221995926680247, + "grad_norm": 12.61534309387207, + "learning_rate": 1.8441532758237233e-06, + "loss": 0.1096, + "num_input_tokens_seen": 2961728, + "step": 3090 + }, + { + "epoch": 0.26264426340801084, + "grad_norm": 6.174483776092529, + "learning_rate": 1.8433582842094273e-06, + "loss": 0.0875, + "num_input_tokens_seen": 2966208, + "step": 3095 + }, + { + "epoch": 0.26306856754921926, + "grad_norm": 30.785140991210938, + "learning_rate": 1.8425614423449974e-06, + "loss": 0.0347, + "num_input_tokens_seen": 2972288, + "step": 3100 + }, + { + "epoch": 0.2634928716904277, + "grad_norm": 11.550189971923828, + "learning_rate": 1.8417627519786313e-06, + "loss": 0.0902, + "num_input_tokens_seen": 2976512, + "step": 3105 + }, + { + "epoch": 0.2639171758316361, + "grad_norm": 3.7005128860473633, + "learning_rate": 1.840962214862582e-06, + "loss": 0.0626, + "num_input_tokens_seen": 2981248, + "step": 3110 + }, + { + "epoch": 0.2643414799728445, + "grad_norm": 19.367431640625, + "learning_rate": 1.8401598327531533e-06, + "loss": 0.0862, + "num_input_tokens_seen": 2985728, + "step": 3115 + }, + { + "epoch": 0.26476578411405294, + "grad_norm": 24.754112243652344, + "learning_rate": 1.839355607410698e-06, + "loss": 0.0586, + "num_input_tokens_seen": 2990144, + "step": 3120 + }, + { + "epoch": 0.26519008825526136, + "grad_norm": 17.672439575195312, + "learning_rate": 1.8385495405996119e-06, + "loss": 0.0669, + "num_input_tokens_seen": 2994560, + "step": 3125 + }, + { + "epoch": 0.2656143923964698, + "grad_norm": 33.57306671142578, + "learning_rate": 1.8377416340883312e-06, + "loss": 0.0625, + "num_input_tokens_seen": 2999488, + "step": 3130 + }, + { + "epoch": 0.2660386965376782, + "grad_norm": 5.751327991485596, + "learning_rate": 1.836931889649328e-06, + "loss": 0.0634, + "num_input_tokens_seen": 3004096, + "step": 3135 + }, + { + "epoch": 0.2664630006788866, + "grad_norm": 36.13847732543945, + "learning_rate": 1.8361203090591068e-06, + "loss": 0.1258, + "num_input_tokens_seen": 3008512, + "step": 3140 + }, + { + "epoch": 0.26688730482009504, + "grad_norm": 16.493362426757812, + "learning_rate": 1.8353068940982006e-06, + "loss": 0.0708, + "num_input_tokens_seen": 3013504, + "step": 3145 + }, + { + "epoch": 0.26731160896130346, + "grad_norm": 0.15408827364444733, + "learning_rate": 1.8344916465511664e-06, + "loss": 0.0164, + "num_input_tokens_seen": 3018112, + "step": 3150 + }, + { + "epoch": 0.2677359131025119, + "grad_norm": 14.348245620727539, + "learning_rate": 1.833674568206582e-06, + "loss": 0.1132, + "num_input_tokens_seen": 3023168, + "step": 3155 + }, + { + "epoch": 0.2681602172437203, + "grad_norm": 10.401468276977539, + "learning_rate": 1.832855660857042e-06, + "loss": 0.0723, + "num_input_tokens_seen": 3027840, + "step": 3160 + }, + { + "epoch": 0.2685845213849287, + "grad_norm": 30.41805648803711, + "learning_rate": 1.8320349262991532e-06, + "loss": 0.1293, + "num_input_tokens_seen": 3034176, + "step": 3165 + }, + { + "epoch": 0.26900882552613714, + "grad_norm": 15.705052375793457, + "learning_rate": 1.8312123663335316e-06, + "loss": 0.063, + "num_input_tokens_seen": 3038464, + "step": 3170 + }, + { + "epoch": 0.26943312966734556, + "grad_norm": 13.934436798095703, + "learning_rate": 1.8303879827647974e-06, + "loss": 0.0748, + "num_input_tokens_seen": 3042944, + "step": 3175 + }, + { + "epoch": 0.269857433808554, + "grad_norm": 2.7212295532226562, + "learning_rate": 1.8295617774015724e-06, + "loss": 0.0582, + "num_input_tokens_seen": 3048000, + "step": 3180 + }, + { + "epoch": 0.2702817379497624, + "grad_norm": 12.423938751220703, + "learning_rate": 1.8287337520564744e-06, + "loss": 0.0863, + "num_input_tokens_seen": 3053056, + "step": 3185 + }, + { + "epoch": 0.2707060420909708, + "grad_norm": 20.587114334106445, + "learning_rate": 1.8279039085461148e-06, + "loss": 0.1082, + "num_input_tokens_seen": 3057792, + "step": 3190 + }, + { + "epoch": 0.27113034623217924, + "grad_norm": 1.5044630765914917, + "learning_rate": 1.8270722486910933e-06, + "loss": 0.1442, + "num_input_tokens_seen": 3062784, + "step": 3195 + }, + { + "epoch": 0.27155465037338766, + "grad_norm": 21.748659133911133, + "learning_rate": 1.8262387743159948e-06, + "loss": 0.1048, + "num_input_tokens_seen": 3067712, + "step": 3200 + }, + { + "epoch": 0.2719789545145961, + "grad_norm": 18.255653381347656, + "learning_rate": 1.8254034872493853e-06, + "loss": 0.0471, + "num_input_tokens_seen": 3072000, + "step": 3205 + }, + { + "epoch": 0.2724032586558045, + "grad_norm": 0.24077212810516357, + "learning_rate": 1.8245663893238072e-06, + "loss": 0.0572, + "num_input_tokens_seen": 3076416, + "step": 3210 + }, + { + "epoch": 0.2728275627970129, + "grad_norm": 18.335908889770508, + "learning_rate": 1.823727482375776e-06, + "loss": 0.0936, + "num_input_tokens_seen": 3081792, + "step": 3215 + }, + { + "epoch": 0.27325186693822134, + "grad_norm": 5.29511833190918, + "learning_rate": 1.8228867682457762e-06, + "loss": 0.1179, + "num_input_tokens_seen": 3086656, + "step": 3220 + }, + { + "epoch": 0.2736761710794297, + "grad_norm": 8.807744979858398, + "learning_rate": 1.8220442487782565e-06, + "loss": 0.0912, + "num_input_tokens_seen": 3091328, + "step": 3225 + }, + { + "epoch": 0.27410047522063813, + "grad_norm": 1.064199686050415, + "learning_rate": 1.8211999258216273e-06, + "loss": 0.0176, + "num_input_tokens_seen": 3096448, + "step": 3230 + }, + { + "epoch": 0.27452477936184655, + "grad_norm": 5.58245325088501, + "learning_rate": 1.8203538012282548e-06, + "loss": 0.0774, + "num_input_tokens_seen": 3102400, + "step": 3235 + }, + { + "epoch": 0.27494908350305497, + "grad_norm": 3.3104500770568848, + "learning_rate": 1.8195058768544583e-06, + "loss": 0.0535, + "num_input_tokens_seen": 3107008, + "step": 3240 + }, + { + "epoch": 0.2753733876442634, + "grad_norm": 0.2806529104709625, + "learning_rate": 1.8186561545605052e-06, + "loss": 0.0291, + "num_input_tokens_seen": 3111872, + "step": 3245 + }, + { + "epoch": 0.2757976917854718, + "grad_norm": 23.801342010498047, + "learning_rate": 1.8178046362106083e-06, + "loss": 0.0274, + "num_input_tokens_seen": 3116544, + "step": 3250 + }, + { + "epoch": 0.27622199592668023, + "grad_norm": 9.284271240234375, + "learning_rate": 1.8169513236729195e-06, + "loss": 0.1035, + "num_input_tokens_seen": 3121024, + "step": 3255 + }, + { + "epoch": 0.27664630006788865, + "grad_norm": 24.860389709472656, + "learning_rate": 1.8160962188195278e-06, + "loss": 0.0561, + "num_input_tokens_seen": 3125696, + "step": 3260 + }, + { + "epoch": 0.27707060420909707, + "grad_norm": 7.064362049102783, + "learning_rate": 1.8152393235264545e-06, + "loss": 0.0989, + "num_input_tokens_seen": 3130752, + "step": 3265 + }, + { + "epoch": 0.2774949083503055, + "grad_norm": 21.392122268676758, + "learning_rate": 1.8143806396736486e-06, + "loss": 0.0991, + "num_input_tokens_seen": 3135360, + "step": 3270 + }, + { + "epoch": 0.2779192124915139, + "grad_norm": 26.925817489624023, + "learning_rate": 1.813520169144983e-06, + "loss": 0.1135, + "num_input_tokens_seen": 3140032, + "step": 3275 + }, + { + "epoch": 0.27834351663272233, + "grad_norm": 0.9765627980232239, + "learning_rate": 1.8126579138282501e-06, + "loss": 0.0532, + "num_input_tokens_seen": 3144960, + "step": 3280 + }, + { + "epoch": 0.27876782077393075, + "grad_norm": 1.4220398664474487, + "learning_rate": 1.8117938756151592e-06, + "loss": 0.065, + "num_input_tokens_seen": 3150016, + "step": 3285 + }, + { + "epoch": 0.2791921249151392, + "grad_norm": 12.359588623046875, + "learning_rate": 1.8109280564013297e-06, + "loss": 0.1384, + "num_input_tokens_seen": 3155200, + "step": 3290 + }, + { + "epoch": 0.2796164290563476, + "grad_norm": 22.804471969604492, + "learning_rate": 1.8100604580862898e-06, + "loss": 0.0494, + "num_input_tokens_seen": 3160000, + "step": 3295 + }, + { + "epoch": 0.280040733197556, + "grad_norm": 0.858443558216095, + "learning_rate": 1.8091910825734686e-06, + "loss": 0.0524, + "num_input_tokens_seen": 3164672, + "step": 3300 + }, + { + "epoch": 0.28046503733876443, + "grad_norm": 10.366905212402344, + "learning_rate": 1.808319931770197e-06, + "loss": 0.052, + "num_input_tokens_seen": 3169152, + "step": 3305 + }, + { + "epoch": 0.28088934147997285, + "grad_norm": 38.595821380615234, + "learning_rate": 1.8074470075876983e-06, + "loss": 0.0446, + "num_input_tokens_seen": 3173888, + "step": 3310 + }, + { + "epoch": 0.2813136456211813, + "grad_norm": 0.30433109402656555, + "learning_rate": 1.8065723119410884e-06, + "loss": 0.0326, + "num_input_tokens_seen": 3179072, + "step": 3315 + }, + { + "epoch": 0.2817379497623897, + "grad_norm": 18.26148223876953, + "learning_rate": 1.8056958467493678e-06, + "loss": 0.0651, + "num_input_tokens_seen": 3183552, + "step": 3320 + }, + { + "epoch": 0.2821622539035981, + "grad_norm": 7.407904148101807, + "learning_rate": 1.8048176139354207e-06, + "loss": 0.0082, + "num_input_tokens_seen": 3187968, + "step": 3325 + }, + { + "epoch": 0.28258655804480654, + "grad_norm": 0.13984189927577972, + "learning_rate": 1.8039376154260086e-06, + "loss": 0.1037, + "num_input_tokens_seen": 3192704, + "step": 3330 + }, + { + "epoch": 0.28301086218601496, + "grad_norm": 14.583263397216797, + "learning_rate": 1.803055853151767e-06, + "loss": 0.0991, + "num_input_tokens_seen": 3197760, + "step": 3335 + }, + { + "epoch": 0.2834351663272234, + "grad_norm": 32.27197265625, + "learning_rate": 1.8021723290472007e-06, + "loss": 0.0358, + "num_input_tokens_seen": 3202368, + "step": 3340 + }, + { + "epoch": 0.2838594704684318, + "grad_norm": 10.743328094482422, + "learning_rate": 1.8012870450506798e-06, + "loss": 0.1846, + "num_input_tokens_seen": 3207360, + "step": 3345 + }, + { + "epoch": 0.2842837746096402, + "grad_norm": 10.31937313079834, + "learning_rate": 1.800400003104436e-06, + "loss": 0.0893, + "num_input_tokens_seen": 3213632, + "step": 3350 + }, + { + "epoch": 0.2847080787508486, + "grad_norm": 8.19094181060791, + "learning_rate": 1.799511205154557e-06, + "loss": 0.0597, + "num_input_tokens_seen": 3219136, + "step": 3355 + }, + { + "epoch": 0.285132382892057, + "grad_norm": 1.475780725479126, + "learning_rate": 1.7986206531509835e-06, + "loss": 0.0728, + "num_input_tokens_seen": 3225088, + "step": 3360 + }, + { + "epoch": 0.2855566870332654, + "grad_norm": 6.953364372253418, + "learning_rate": 1.7977283490475043e-06, + "loss": 0.0789, + "num_input_tokens_seen": 3229504, + "step": 3365 + }, + { + "epoch": 0.28598099117447384, + "grad_norm": 7.935585975646973, + "learning_rate": 1.796834294801752e-06, + "loss": 0.0618, + "num_input_tokens_seen": 3234368, + "step": 3370 + }, + { + "epoch": 0.28640529531568226, + "grad_norm": 7.753322124481201, + "learning_rate": 1.7959384923751993e-06, + "loss": 0.082, + "num_input_tokens_seen": 3239232, + "step": 3375 + }, + { + "epoch": 0.2868295994568907, + "grad_norm": 28.726940155029297, + "learning_rate": 1.7950409437331535e-06, + "loss": 0.064, + "num_input_tokens_seen": 3244288, + "step": 3380 + }, + { + "epoch": 0.2872539035980991, + "grad_norm": 14.288199424743652, + "learning_rate": 1.7941416508447534e-06, + "loss": 0.1214, + "num_input_tokens_seen": 3248640, + "step": 3385 + }, + { + "epoch": 0.2876782077393075, + "grad_norm": 0.3123435378074646, + "learning_rate": 1.7932406156829649e-06, + "loss": 0.0186, + "num_input_tokens_seen": 3253440, + "step": 3390 + }, + { + "epoch": 0.28810251188051594, + "grad_norm": 12.358621597290039, + "learning_rate": 1.7923378402245756e-06, + "loss": 0.0676, + "num_input_tokens_seen": 3258048, + "step": 3395 + }, + { + "epoch": 0.28852681602172436, + "grad_norm": 8.790231704711914, + "learning_rate": 1.7914333264501913e-06, + "loss": 0.0332, + "num_input_tokens_seen": 3262912, + "step": 3400 + }, + { + "epoch": 0.2889511201629328, + "grad_norm": 20.035654067993164, + "learning_rate": 1.790527076344232e-06, + "loss": 0.0887, + "num_input_tokens_seen": 3267776, + "step": 3405 + }, + { + "epoch": 0.2893754243041412, + "grad_norm": 20.470781326293945, + "learning_rate": 1.7896190918949266e-06, + "loss": 0.0464, + "num_input_tokens_seen": 3272448, + "step": 3410 + }, + { + "epoch": 0.2897997284453496, + "grad_norm": 14.918984413146973, + "learning_rate": 1.7887093750943088e-06, + "loss": 0.07, + "num_input_tokens_seen": 3277056, + "step": 3415 + }, + { + "epoch": 0.29022403258655805, + "grad_norm": 9.069342613220215, + "learning_rate": 1.7877979279382131e-06, + "loss": 0.0767, + "num_input_tokens_seen": 3282048, + "step": 3420 + }, + { + "epoch": 0.29064833672776647, + "grad_norm": 11.887508392333984, + "learning_rate": 1.7868847524262708e-06, + "loss": 0.0967, + "num_input_tokens_seen": 3286336, + "step": 3425 + }, + { + "epoch": 0.2910726408689749, + "grad_norm": 1.5049360990524292, + "learning_rate": 1.7859698505619043e-06, + "loss": 0.0277, + "num_input_tokens_seen": 3290880, + "step": 3430 + }, + { + "epoch": 0.2914969450101833, + "grad_norm": 0.22411267459392548, + "learning_rate": 1.7850532243523238e-06, + "loss": 0.0378, + "num_input_tokens_seen": 3295360, + "step": 3435 + }, + { + "epoch": 0.2919212491513917, + "grad_norm": 0.26940274238586426, + "learning_rate": 1.7841348758085224e-06, + "loss": 0.0274, + "num_input_tokens_seen": 3299840, + "step": 3440 + }, + { + "epoch": 0.29234555329260015, + "grad_norm": 1.3497130870819092, + "learning_rate": 1.7832148069452719e-06, + "loss": 0.0298, + "num_input_tokens_seen": 3304448, + "step": 3445 + }, + { + "epoch": 0.29276985743380857, + "grad_norm": 0.14612747728824615, + "learning_rate": 1.7822930197811186e-06, + "loss": 0.1044, + "num_input_tokens_seen": 3308928, + "step": 3450 + }, + { + "epoch": 0.293194161575017, + "grad_norm": 6.200453758239746, + "learning_rate": 1.781369516338378e-06, + "loss": 0.1, + "num_input_tokens_seen": 3313408, + "step": 3455 + }, + { + "epoch": 0.2936184657162254, + "grad_norm": 8.402563095092773, + "learning_rate": 1.7804442986431317e-06, + "loss": 0.0865, + "num_input_tokens_seen": 3318080, + "step": 3460 + }, + { + "epoch": 0.29404276985743383, + "grad_norm": 18.582571029663086, + "learning_rate": 1.7795173687252213e-06, + "loss": 0.0947, + "num_input_tokens_seen": 3323136, + "step": 3465 + }, + { + "epoch": 0.29446707399864225, + "grad_norm": 19.33099937438965, + "learning_rate": 1.778588728618246e-06, + "loss": 0.0454, + "num_input_tokens_seen": 3327936, + "step": 3470 + }, + { + "epoch": 0.29489137813985067, + "grad_norm": 19.658811569213867, + "learning_rate": 1.777658380359556e-06, + "loss": 0.0718, + "num_input_tokens_seen": 3332864, + "step": 3475 + }, + { + "epoch": 0.2953156822810591, + "grad_norm": 0.3303367495536804, + "learning_rate": 1.7767263259902494e-06, + "loss": 0.0759, + "num_input_tokens_seen": 3338048, + "step": 3480 + }, + { + "epoch": 0.2957399864222675, + "grad_norm": 14.395822525024414, + "learning_rate": 1.7757925675551672e-06, + "loss": 0.1356, + "num_input_tokens_seen": 3343104, + "step": 3485 + }, + { + "epoch": 0.2961642905634759, + "grad_norm": 11.50323486328125, + "learning_rate": 1.7748571071028898e-06, + "loss": 0.0701, + "num_input_tokens_seen": 3347712, + "step": 3490 + }, + { + "epoch": 0.2965885947046843, + "grad_norm": 10.81116771697998, + "learning_rate": 1.7739199466857301e-06, + "loss": 0.0489, + "num_input_tokens_seen": 3352000, + "step": 3495 + }, + { + "epoch": 0.2970128988458927, + "grad_norm": 6.09246301651001, + "learning_rate": 1.772981088359732e-06, + "loss": 0.0291, + "num_input_tokens_seen": 3356480, + "step": 3500 + }, + { + "epoch": 0.29743720298710113, + "grad_norm": 13.909074783325195, + "learning_rate": 1.7720405341846636e-06, + "loss": 0.0997, + "num_input_tokens_seen": 3361536, + "step": 3505 + }, + { + "epoch": 0.29786150712830956, + "grad_norm": 10.681073188781738, + "learning_rate": 1.771098286224014e-06, + "loss": 0.0796, + "num_input_tokens_seen": 3367296, + "step": 3510 + }, + { + "epoch": 0.298285811269518, + "grad_norm": 0.8761059641838074, + "learning_rate": 1.7701543465449884e-06, + "loss": 0.0678, + "num_input_tokens_seen": 3372096, + "step": 3515 + }, + { + "epoch": 0.2987101154107264, + "grad_norm": 0.13927964866161346, + "learning_rate": 1.7692087172185026e-06, + "loss": 0.0632, + "num_input_tokens_seen": 3376384, + "step": 3520 + }, + { + "epoch": 0.2991344195519348, + "grad_norm": 8.352365493774414, + "learning_rate": 1.7682614003191805e-06, + "loss": 0.0409, + "num_input_tokens_seen": 3381504, + "step": 3525 + }, + { + "epoch": 0.29955872369314324, + "grad_norm": 20.81534767150879, + "learning_rate": 1.7673123979253475e-06, + "loss": 0.0469, + "num_input_tokens_seen": 3386112, + "step": 3530 + }, + { + "epoch": 0.29998302783435166, + "grad_norm": 1.3498769998550415, + "learning_rate": 1.7663617121190271e-06, + "loss": 0.05, + "num_input_tokens_seen": 3395072, + "step": 3535 + }, + { + "epoch": 0.3004073319755601, + "grad_norm": 0.6542552709579468, + "learning_rate": 1.7654093449859367e-06, + "loss": 0.0141, + "num_input_tokens_seen": 3399808, + "step": 3540 + }, + { + "epoch": 0.3004073319755601, + "eval_loss": 0.08620841801166534, + "eval_runtime": 15.9677, + "eval_samples_per_second": 656.01, + "eval_steps_per_second": 82.04, + "num_input_tokens_seen": 3399808, + "step": 3540 + }, + { + "epoch": 0.3008316361167685, + "grad_norm": 32.648826599121094, + "learning_rate": 1.764455298615481e-06, + "loss": 0.0416, + "num_input_tokens_seen": 3404544, + "step": 3545 + }, + { + "epoch": 0.3012559402579769, + "grad_norm": 11.315652847290039, + "learning_rate": 1.7634995751007499e-06, + "loss": 0.1003, + "num_input_tokens_seen": 3408896, + "step": 3550 + }, + { + "epoch": 0.30168024439918534, + "grad_norm": 26.142065048217773, + "learning_rate": 1.7625421765385124e-06, + "loss": 0.0709, + "num_input_tokens_seen": 3413824, + "step": 3555 + }, + { + "epoch": 0.30210454854039376, + "grad_norm": 21.637981414794922, + "learning_rate": 1.7615831050292127e-06, + "loss": 0.097, + "num_input_tokens_seen": 3418240, + "step": 3560 + }, + { + "epoch": 0.3025288526816022, + "grad_norm": 6.655580997467041, + "learning_rate": 1.760622362676965e-06, + "loss": 0.1312, + "num_input_tokens_seen": 3423168, + "step": 3565 + }, + { + "epoch": 0.3029531568228106, + "grad_norm": 23.639205932617188, + "learning_rate": 1.7596599515895486e-06, + "loss": 0.0642, + "num_input_tokens_seen": 3428224, + "step": 3570 + }, + { + "epoch": 0.303377460964019, + "grad_norm": 0.2874506413936615, + "learning_rate": 1.7586958738784055e-06, + "loss": 0.0485, + "num_input_tokens_seen": 3432896, + "step": 3575 + }, + { + "epoch": 0.30380176510522744, + "grad_norm": 48.77882766723633, + "learning_rate": 1.7577301316586323e-06, + "loss": 0.1002, + "num_input_tokens_seen": 3437632, + "step": 3580 + }, + { + "epoch": 0.30422606924643586, + "grad_norm": 33.99219512939453, + "learning_rate": 1.7567627270489787e-06, + "loss": 0.061, + "num_input_tokens_seen": 3442112, + "step": 3585 + }, + { + "epoch": 0.3046503733876443, + "grad_norm": 8.705355644226074, + "learning_rate": 1.7557936621718406e-06, + "loss": 0.0435, + "num_input_tokens_seen": 3448064, + "step": 3590 + }, + { + "epoch": 0.3050746775288527, + "grad_norm": 49.27440643310547, + "learning_rate": 1.754822939153257e-06, + "loss": 0.1354, + "num_input_tokens_seen": 3452800, + "step": 3595 + }, + { + "epoch": 0.3054989816700611, + "grad_norm": 31.726367950439453, + "learning_rate": 1.7538505601229043e-06, + "loss": 0.1403, + "num_input_tokens_seen": 3457856, + "step": 3600 + }, + { + "epoch": 0.30592328581126954, + "grad_norm": 1.0165332555770874, + "learning_rate": 1.7528765272140927e-06, + "loss": 0.036, + "num_input_tokens_seen": 3462720, + "step": 3605 + }, + { + "epoch": 0.30634758995247796, + "grad_norm": 15.145169258117676, + "learning_rate": 1.7519008425637597e-06, + "loss": 0.1049, + "num_input_tokens_seen": 3467264, + "step": 3610 + }, + { + "epoch": 0.3067718940936864, + "grad_norm": 17.913949966430664, + "learning_rate": 1.7509235083124679e-06, + "loss": 0.0713, + "num_input_tokens_seen": 3472832, + "step": 3615 + }, + { + "epoch": 0.30719619823489475, + "grad_norm": 8.478503227233887, + "learning_rate": 1.749944526604398e-06, + "loss": 0.061, + "num_input_tokens_seen": 3478016, + "step": 3620 + }, + { + "epoch": 0.30762050237610317, + "grad_norm": 0.49673381447792053, + "learning_rate": 1.7489638995873453e-06, + "loss": 0.0438, + "num_input_tokens_seen": 3482688, + "step": 3625 + }, + { + "epoch": 0.3080448065173116, + "grad_norm": 19.62775230407715, + "learning_rate": 1.7479816294127149e-06, + "loss": 0.0193, + "num_input_tokens_seen": 3487296, + "step": 3630 + }, + { + "epoch": 0.30846911065852, + "grad_norm": 0.37304261326789856, + "learning_rate": 1.746997718235517e-06, + "loss": 0.0555, + "num_input_tokens_seen": 3491712, + "step": 3635 + }, + { + "epoch": 0.3088934147997284, + "grad_norm": 4.202676296234131, + "learning_rate": 1.7460121682143616e-06, + "loss": 0.0876, + "num_input_tokens_seen": 3496256, + "step": 3640 + }, + { + "epoch": 0.30931771894093685, + "grad_norm": 22.060909271240234, + "learning_rate": 1.7450249815114545e-06, + "loss": 0.1017, + "num_input_tokens_seen": 3500672, + "step": 3645 + }, + { + "epoch": 0.30974202308214527, + "grad_norm": 0.10195865482091904, + "learning_rate": 1.744036160292592e-06, + "loss": 0.0117, + "num_input_tokens_seen": 3505536, + "step": 3650 + }, + { + "epoch": 0.3101663272233537, + "grad_norm": 0.29044589400291443, + "learning_rate": 1.7430457067271563e-06, + "loss": 0.0618, + "num_input_tokens_seen": 3510400, + "step": 3655 + }, + { + "epoch": 0.3105906313645621, + "grad_norm": 0.09018034487962723, + "learning_rate": 1.742053622988111e-06, + "loss": 0.03, + "num_input_tokens_seen": 3514880, + "step": 3660 + }, + { + "epoch": 0.31101493550577053, + "grad_norm": 33.10944747924805, + "learning_rate": 1.7410599112519969e-06, + "loss": 0.1128, + "num_input_tokens_seen": 3520192, + "step": 3665 + }, + { + "epoch": 0.31143923964697895, + "grad_norm": 5.516417026519775, + "learning_rate": 1.7400645736989246e-06, + "loss": 0.0743, + "num_input_tokens_seen": 3524544, + "step": 3670 + }, + { + "epoch": 0.31186354378818737, + "grad_norm": 1.1226091384887695, + "learning_rate": 1.7390676125125733e-06, + "loss": 0.0105, + "num_input_tokens_seen": 3528896, + "step": 3675 + }, + { + "epoch": 0.3122878479293958, + "grad_norm": 0.11550328880548477, + "learning_rate": 1.7380690298801836e-06, + "loss": 0.0761, + "num_input_tokens_seen": 3533568, + "step": 3680 + }, + { + "epoch": 0.3127121520706042, + "grad_norm": 24.203899383544922, + "learning_rate": 1.7370688279925538e-06, + "loss": 0.0437, + "num_input_tokens_seen": 3539008, + "step": 3685 + }, + { + "epoch": 0.31313645621181263, + "grad_norm": 0.20447641611099243, + "learning_rate": 1.736067009044034e-06, + "loss": 0.0234, + "num_input_tokens_seen": 3544448, + "step": 3690 + }, + { + "epoch": 0.31356076035302105, + "grad_norm": 22.803014755249023, + "learning_rate": 1.7350635752325222e-06, + "loss": 0.1307, + "num_input_tokens_seen": 3549184, + "step": 3695 + }, + { + "epoch": 0.31398506449422947, + "grad_norm": 0.733262836933136, + "learning_rate": 1.7340585287594603e-06, + "loss": 0.0346, + "num_input_tokens_seen": 3554176, + "step": 3700 + }, + { + "epoch": 0.3144093686354379, + "grad_norm": 25.992023468017578, + "learning_rate": 1.733051871829826e-06, + "loss": 0.0626, + "num_input_tokens_seen": 3558720, + "step": 3705 + }, + { + "epoch": 0.3148336727766463, + "grad_norm": 13.120671272277832, + "learning_rate": 1.7320436066521333e-06, + "loss": 0.0735, + "num_input_tokens_seen": 3563648, + "step": 3710 + }, + { + "epoch": 0.31525797691785473, + "grad_norm": 8.363951683044434, + "learning_rate": 1.7310337354384214e-06, + "loss": 0.0464, + "num_input_tokens_seen": 3568704, + "step": 3715 + }, + { + "epoch": 0.31568228105906315, + "grad_norm": 5.674199104309082, + "learning_rate": 1.7300222604042552e-06, + "loss": 0.0382, + "num_input_tokens_seen": 3573184, + "step": 3720 + }, + { + "epoch": 0.3161065852002716, + "grad_norm": 31.29104995727539, + "learning_rate": 1.7290091837687172e-06, + "loss": 0.0764, + "num_input_tokens_seen": 3578432, + "step": 3725 + }, + { + "epoch": 0.31653088934148, + "grad_norm": 2.064466953277588, + "learning_rate": 1.7279945077544036e-06, + "loss": 0.0116, + "num_input_tokens_seen": 3582848, + "step": 3730 + }, + { + "epoch": 0.3169551934826884, + "grad_norm": 0.26752981543540955, + "learning_rate": 1.7269782345874203e-06, + "loss": 0.0319, + "num_input_tokens_seen": 3587840, + "step": 3735 + }, + { + "epoch": 0.31737949762389683, + "grad_norm": 0.43561768531799316, + "learning_rate": 1.7259603664973766e-06, + "loss": 0.1165, + "num_input_tokens_seen": 3592576, + "step": 3740 + }, + { + "epoch": 0.31780380176510525, + "grad_norm": 14.585902214050293, + "learning_rate": 1.7249409057173806e-06, + "loss": 0.1014, + "num_input_tokens_seen": 3597376, + "step": 3745 + }, + { + "epoch": 0.3182281059063136, + "grad_norm": 1.0415359735488892, + "learning_rate": 1.7239198544840354e-06, + "loss": 0.0935, + "num_input_tokens_seen": 3601728, + "step": 3750 + }, + { + "epoch": 0.31865241004752204, + "grad_norm": 11.442963600158691, + "learning_rate": 1.7228972150374332e-06, + "loss": 0.1359, + "num_input_tokens_seen": 3606592, + "step": 3755 + }, + { + "epoch": 0.31907671418873046, + "grad_norm": 6.534305095672607, + "learning_rate": 1.7218729896211504e-06, + "loss": 0.0588, + "num_input_tokens_seen": 3611328, + "step": 3760 + }, + { + "epoch": 0.3195010183299389, + "grad_norm": 0.5454856157302856, + "learning_rate": 1.7208471804822425e-06, + "loss": 0.0536, + "num_input_tokens_seen": 3616000, + "step": 3765 + }, + { + "epoch": 0.3199253224711473, + "grad_norm": 0.16123202443122864, + "learning_rate": 1.71981978987124e-06, + "loss": 0.0302, + "num_input_tokens_seen": 3620288, + "step": 3770 + }, + { + "epoch": 0.3203496266123557, + "grad_norm": 0.17035053670406342, + "learning_rate": 1.7187908200421432e-06, + "loss": 0.0396, + "num_input_tokens_seen": 3624704, + "step": 3775 + }, + { + "epoch": 0.32077393075356414, + "grad_norm": 0.20689719915390015, + "learning_rate": 1.717760273252417e-06, + "loss": 0.0919, + "num_input_tokens_seen": 3628736, + "step": 3780 + }, + { + "epoch": 0.32119823489477256, + "grad_norm": 20.81116485595703, + "learning_rate": 1.7167281517629854e-06, + "loss": 0.0607, + "num_input_tokens_seen": 3633664, + "step": 3785 + }, + { + "epoch": 0.321622539035981, + "grad_norm": 18.75973129272461, + "learning_rate": 1.7156944578382277e-06, + "loss": 0.0758, + "num_input_tokens_seen": 3638400, + "step": 3790 + }, + { + "epoch": 0.3220468431771894, + "grad_norm": 22.641748428344727, + "learning_rate": 1.7146591937459732e-06, + "loss": 0.0976, + "num_input_tokens_seen": 3643200, + "step": 3795 + }, + { + "epoch": 0.3224711473183978, + "grad_norm": 29.938251495361328, + "learning_rate": 1.713622361757495e-06, + "loss": 0.1564, + "num_input_tokens_seen": 3647680, + "step": 3800 + }, + { + "epoch": 0.32289545145960624, + "grad_norm": 0.21171480417251587, + "learning_rate": 1.712583964147507e-06, + "loss": 0.0291, + "num_input_tokens_seen": 3653120, + "step": 3805 + }, + { + "epoch": 0.32331975560081466, + "grad_norm": 0.950505256652832, + "learning_rate": 1.7115440031941572e-06, + "loss": 0.1444, + "num_input_tokens_seen": 3657856, + "step": 3810 + }, + { + "epoch": 0.3237440597420231, + "grad_norm": 34.6823844909668, + "learning_rate": 1.7105024811790248e-06, + "loss": 0.0922, + "num_input_tokens_seen": 3662656, + "step": 3815 + }, + { + "epoch": 0.3241683638832315, + "grad_norm": 28.47976303100586, + "learning_rate": 1.7094594003871116e-06, + "loss": 0.0862, + "num_input_tokens_seen": 3668096, + "step": 3820 + }, + { + "epoch": 0.3245926680244399, + "grad_norm": 14.752837181091309, + "learning_rate": 1.7084147631068415e-06, + "loss": 0.0375, + "num_input_tokens_seen": 3673280, + "step": 3825 + }, + { + "epoch": 0.32501697216564834, + "grad_norm": 22.75851821899414, + "learning_rate": 1.7073685716300517e-06, + "loss": 0.084, + "num_input_tokens_seen": 3677824, + "step": 3830 + }, + { + "epoch": 0.32544127630685676, + "grad_norm": 2.2120866775512695, + "learning_rate": 1.7063208282519894e-06, + "loss": 0.0866, + "num_input_tokens_seen": 3682624, + "step": 3835 + }, + { + "epoch": 0.3258655804480652, + "grad_norm": 18.671844482421875, + "learning_rate": 1.7052715352713074e-06, + "loss": 0.0973, + "num_input_tokens_seen": 3687296, + "step": 3840 + }, + { + "epoch": 0.3262898845892736, + "grad_norm": 27.980823516845703, + "learning_rate": 1.7042206949900568e-06, + "loss": 0.0854, + "num_input_tokens_seen": 3692352, + "step": 3845 + }, + { + "epoch": 0.326714188730482, + "grad_norm": 23.52008628845215, + "learning_rate": 1.703168309713684e-06, + "loss": 0.0305, + "num_input_tokens_seen": 3697472, + "step": 3850 + }, + { + "epoch": 0.32713849287169044, + "grad_norm": 10.77466869354248, + "learning_rate": 1.7021143817510262e-06, + "loss": 0.0659, + "num_input_tokens_seen": 3702528, + "step": 3855 + }, + { + "epoch": 0.32756279701289887, + "grad_norm": 21.211490631103516, + "learning_rate": 1.7010589134143025e-06, + "loss": 0.1094, + "num_input_tokens_seen": 3706560, + "step": 3860 + }, + { + "epoch": 0.3279871011541073, + "grad_norm": 14.143378257751465, + "learning_rate": 1.7000019070191138e-06, + "loss": 0.067, + "num_input_tokens_seen": 3711104, + "step": 3865 + }, + { + "epoch": 0.3284114052953157, + "grad_norm": 7.531778335571289, + "learning_rate": 1.698943364884434e-06, + "loss": 0.0876, + "num_input_tokens_seen": 3715712, + "step": 3870 + }, + { + "epoch": 0.3288357094365241, + "grad_norm": 0.37810540199279785, + "learning_rate": 1.697883289332607e-06, + "loss": 0.0276, + "num_input_tokens_seen": 3720960, + "step": 3875 + }, + { + "epoch": 0.3292600135777325, + "grad_norm": 8.426651000976562, + "learning_rate": 1.6968216826893405e-06, + "loss": 0.0555, + "num_input_tokens_seen": 3725504, + "step": 3880 + }, + { + "epoch": 0.3296843177189409, + "grad_norm": 23.238388061523438, + "learning_rate": 1.6957585472837014e-06, + "loss": 0.0693, + "num_input_tokens_seen": 3729856, + "step": 3885 + }, + { + "epoch": 0.33010862186014933, + "grad_norm": 9.211931228637695, + "learning_rate": 1.6946938854481103e-06, + "loss": 0.1188, + "num_input_tokens_seen": 3735040, + "step": 3890 + }, + { + "epoch": 0.33053292600135775, + "grad_norm": 34.77646255493164, + "learning_rate": 1.6936276995183371e-06, + "loss": 0.0129, + "num_input_tokens_seen": 3740288, + "step": 3895 + }, + { + "epoch": 0.33095723014256617, + "grad_norm": 1.0880327224731445, + "learning_rate": 1.6925599918334954e-06, + "loss": 0.0761, + "num_input_tokens_seen": 3744960, + "step": 3900 + }, + { + "epoch": 0.3313815342837746, + "grad_norm": 0.20256610214710236, + "learning_rate": 1.6914907647360367e-06, + "loss": 0.0783, + "num_input_tokens_seen": 3749120, + "step": 3905 + }, + { + "epoch": 0.331805838424983, + "grad_norm": 24.49281883239746, + "learning_rate": 1.6904200205717467e-06, + "loss": 0.0857, + "num_input_tokens_seen": 3753984, + "step": 3910 + }, + { + "epoch": 0.33223014256619143, + "grad_norm": 0.2078077346086502, + "learning_rate": 1.689347761689739e-06, + "loss": 0.0364, + "num_input_tokens_seen": 3758464, + "step": 3915 + }, + { + "epoch": 0.33265444670739985, + "grad_norm": 39.527366638183594, + "learning_rate": 1.6882739904424507e-06, + "loss": 0.0956, + "num_input_tokens_seen": 3762624, + "step": 3920 + }, + { + "epoch": 0.3330787508486083, + "grad_norm": 23.886940002441406, + "learning_rate": 1.6871987091856366e-06, + "loss": 0.1428, + "num_input_tokens_seen": 3767616, + "step": 3925 + }, + { + "epoch": 0.3335030549898167, + "grad_norm": 3.738818407058716, + "learning_rate": 1.6861219202783644e-06, + "loss": 0.0047, + "num_input_tokens_seen": 3772864, + "step": 3930 + }, + { + "epoch": 0.3339273591310251, + "grad_norm": 14.304019927978516, + "learning_rate": 1.6850436260830093e-06, + "loss": 0.0992, + "num_input_tokens_seen": 3777728, + "step": 3935 + }, + { + "epoch": 0.33435166327223353, + "grad_norm": 19.410703659057617, + "learning_rate": 1.683963828965249e-06, + "loss": 0.0653, + "num_input_tokens_seen": 3782912, + "step": 3940 + }, + { + "epoch": 0.33477596741344195, + "grad_norm": 25.328245162963867, + "learning_rate": 1.6828825312940592e-06, + "loss": 0.0656, + "num_input_tokens_seen": 3788160, + "step": 3945 + }, + { + "epoch": 0.3352002715546504, + "grad_norm": 29.521923065185547, + "learning_rate": 1.6817997354417066e-06, + "loss": 0.0815, + "num_input_tokens_seen": 3792448, + "step": 3950 + }, + { + "epoch": 0.3356245756958588, + "grad_norm": 0.7535111308097839, + "learning_rate": 1.6807154437837453e-06, + "loss": 0.0654, + "num_input_tokens_seen": 3797376, + "step": 3955 + }, + { + "epoch": 0.3360488798370672, + "grad_norm": 9.811915397644043, + "learning_rate": 1.6796296586990108e-06, + "loss": 0.0956, + "num_input_tokens_seen": 3802496, + "step": 3960 + }, + { + "epoch": 0.33647318397827564, + "grad_norm": 1.2682443857192993, + "learning_rate": 1.6785423825696156e-06, + "loss": 0.0604, + "num_input_tokens_seen": 3806912, + "step": 3965 + }, + { + "epoch": 0.33689748811948406, + "grad_norm": 0.8005596995353699, + "learning_rate": 1.6774536177809426e-06, + "loss": 0.042, + "num_input_tokens_seen": 3811648, + "step": 3970 + }, + { + "epoch": 0.3373217922606925, + "grad_norm": 0.733005166053772, + "learning_rate": 1.6763633667216416e-06, + "loss": 0.0873, + "num_input_tokens_seen": 3817024, + "step": 3975 + }, + { + "epoch": 0.3377460964019009, + "grad_norm": 1.7879672050476074, + "learning_rate": 1.6752716317836226e-06, + "loss": 0.0224, + "num_input_tokens_seen": 3821440, + "step": 3980 + }, + { + "epoch": 0.3381704005431093, + "grad_norm": 0.19908781349658966, + "learning_rate": 1.6741784153620508e-06, + "loss": 0.0261, + "num_input_tokens_seen": 3825984, + "step": 3985 + }, + { + "epoch": 0.33859470468431774, + "grad_norm": 23.788095474243164, + "learning_rate": 1.6730837198553422e-06, + "loss": 0.057, + "num_input_tokens_seen": 3831104, + "step": 3990 + }, + { + "epoch": 0.33901900882552616, + "grad_norm": 6.659988880157471, + "learning_rate": 1.6719875476651577e-06, + "loss": 0.083, + "num_input_tokens_seen": 3836160, + "step": 3995 + }, + { + "epoch": 0.3394433129667346, + "grad_norm": 15.46704387664795, + "learning_rate": 1.6708899011963978e-06, + "loss": 0.099, + "num_input_tokens_seen": 3840640, + "step": 4000 + }, + { + "epoch": 0.339867617107943, + "grad_norm": 0.8073393106460571, + "learning_rate": 1.6697907828571966e-06, + "loss": 0.0456, + "num_input_tokens_seen": 3845440, + "step": 4005 + }, + { + "epoch": 0.3402919212491514, + "grad_norm": 17.0804386138916, + "learning_rate": 1.6686901950589193e-06, + "loss": 0.1105, + "num_input_tokens_seen": 3850368, + "step": 4010 + }, + { + "epoch": 0.3407162253903598, + "grad_norm": 7.3930182456970215, + "learning_rate": 1.6675881402161536e-06, + "loss": 0.0449, + "num_input_tokens_seen": 3855296, + "step": 4015 + }, + { + "epoch": 0.3411405295315682, + "grad_norm": 2.1560728549957275, + "learning_rate": 1.6664846207467054e-06, + "loss": 0.073, + "num_input_tokens_seen": 3859648, + "step": 4020 + }, + { + "epoch": 0.3415648336727766, + "grad_norm": 29.044185638427734, + "learning_rate": 1.665379639071595e-06, + "loss": 0.0364, + "num_input_tokens_seen": 3864512, + "step": 4025 + }, + { + "epoch": 0.34198913781398504, + "grad_norm": 10.980119705200195, + "learning_rate": 1.6642731976150492e-06, + "loss": 0.0528, + "num_input_tokens_seen": 3868800, + "step": 4030 + }, + { + "epoch": 0.34241344195519346, + "grad_norm": 0.7191017866134644, + "learning_rate": 1.6631652988044995e-06, + "loss": 0.059, + "num_input_tokens_seen": 3873664, + "step": 4035 + }, + { + "epoch": 0.3428377460964019, + "grad_norm": 24.94672966003418, + "learning_rate": 1.6620559450705728e-06, + "loss": 0.0991, + "num_input_tokens_seen": 3878528, + "step": 4040 + }, + { + "epoch": 0.3432620502376103, + "grad_norm": 59.45679473876953, + "learning_rate": 1.6609451388470885e-06, + "loss": 0.0806, + "num_input_tokens_seen": 3883136, + "step": 4045 + }, + { + "epoch": 0.3436863543788187, + "grad_norm": 7.993133544921875, + "learning_rate": 1.6598328825710533e-06, + "loss": 0.095, + "num_input_tokens_seen": 3888384, + "step": 4050 + }, + { + "epoch": 0.34411065852002715, + "grad_norm": 1.8137032985687256, + "learning_rate": 1.6587191786826543e-06, + "loss": 0.0117, + "num_input_tokens_seen": 3893056, + "step": 4055 + }, + { + "epoch": 0.34453496266123557, + "grad_norm": 0.13727472722530365, + "learning_rate": 1.6576040296252553e-06, + "loss": 0.0724, + "num_input_tokens_seen": 3897600, + "step": 4060 + }, + { + "epoch": 0.344959266802444, + "grad_norm": 25.691621780395508, + "learning_rate": 1.65648743784539e-06, + "loss": 0.1141, + "num_input_tokens_seen": 3902080, + "step": 4065 + }, + { + "epoch": 0.3453835709436524, + "grad_norm": 6.447222709655762, + "learning_rate": 1.6553694057927573e-06, + "loss": 0.061, + "num_input_tokens_seen": 3906880, + "step": 4070 + }, + { + "epoch": 0.3458078750848608, + "grad_norm": 19.07137680053711, + "learning_rate": 1.654249935920217e-06, + "loss": 0.1306, + "num_input_tokens_seen": 3911040, + "step": 4075 + }, + { + "epoch": 0.34623217922606925, + "grad_norm": 5.483658313751221, + "learning_rate": 1.6531290306837817e-06, + "loss": 0.0689, + "num_input_tokens_seen": 3915712, + "step": 4080 + }, + { + "epoch": 0.34665648336727767, + "grad_norm": 4.867100238800049, + "learning_rate": 1.6520066925426143e-06, + "loss": 0.0916, + "num_input_tokens_seen": 3919936, + "step": 4085 + }, + { + "epoch": 0.3470807875084861, + "grad_norm": 2.243724822998047, + "learning_rate": 1.650882923959021e-06, + "loss": 0.1051, + "num_input_tokens_seen": 3924480, + "step": 4090 + }, + { + "epoch": 0.3475050916496945, + "grad_norm": 5.008316516876221, + "learning_rate": 1.649757727398446e-06, + "loss": 0.1395, + "num_input_tokens_seen": 3929728, + "step": 4095 + }, + { + "epoch": 0.34792939579090293, + "grad_norm": 6.448604583740234, + "learning_rate": 1.6486311053294669e-06, + "loss": 0.0495, + "num_input_tokens_seen": 3934080, + "step": 4100 + }, + { + "epoch": 0.34835369993211135, + "grad_norm": 8.177726745605469, + "learning_rate": 1.6475030602237876e-06, + "loss": 0.0759, + "num_input_tokens_seen": 3938624, + "step": 4105 + }, + { + "epoch": 0.34877800407331977, + "grad_norm": 12.904138565063477, + "learning_rate": 1.646373594556236e-06, + "loss": 0.0924, + "num_input_tokens_seen": 3944448, + "step": 4110 + }, + { + "epoch": 0.3492023082145282, + "grad_norm": 8.9033203125, + "learning_rate": 1.6452427108047542e-06, + "loss": 0.0257, + "num_input_tokens_seen": 3949184, + "step": 4115 + }, + { + "epoch": 0.3496266123557366, + "grad_norm": 9.575577735900879, + "learning_rate": 1.6441104114503977e-06, + "loss": 0.0649, + "num_input_tokens_seen": 3953664, + "step": 4120 + }, + { + "epoch": 0.35005091649694503, + "grad_norm": 12.641286849975586, + "learning_rate": 1.642976698977326e-06, + "loss": 0.0585, + "num_input_tokens_seen": 3958336, + "step": 4125 + }, + { + "epoch": 0.35047522063815345, + "grad_norm": 3.237588882446289, + "learning_rate": 1.6418415758727995e-06, + "loss": 0.0051, + "num_input_tokens_seen": 3963584, + "step": 4130 + }, + { + "epoch": 0.35047522063815345, + "eval_loss": 0.07100464403629303, + "eval_runtime": 15.9339, + "eval_samples_per_second": 657.403, + "eval_steps_per_second": 82.215, + "num_input_tokens_seen": 3963584, + "step": 4130 + }, + { + "epoch": 0.35089952477936187, + "grad_norm": 9.41185474395752, + "learning_rate": 1.6407050446271738e-06, + "loss": 0.0893, + "num_input_tokens_seen": 3968896, + "step": 4135 + }, + { + "epoch": 0.3513238289205703, + "grad_norm": 9.226503372192383, + "learning_rate": 1.6395671077338928e-06, + "loss": 0.1264, + "num_input_tokens_seen": 3973440, + "step": 4140 + }, + { + "epoch": 0.35174813306177866, + "grad_norm": 20.192955017089844, + "learning_rate": 1.6384277676894855e-06, + "loss": 0.0501, + "num_input_tokens_seen": 3978176, + "step": 4145 + }, + { + "epoch": 0.3521724372029871, + "grad_norm": 19.124189376831055, + "learning_rate": 1.6372870269935583e-06, + "loss": 0.0844, + "num_input_tokens_seen": 3983360, + "step": 4150 + }, + { + "epoch": 0.3525967413441955, + "grad_norm": 51.20574951171875, + "learning_rate": 1.6361448881487912e-06, + "loss": 0.0303, + "num_input_tokens_seen": 3987584, + "step": 4155 + }, + { + "epoch": 0.3530210454854039, + "grad_norm": 0.0988692194223404, + "learning_rate": 1.6350013536609307e-06, + "loss": 0.0368, + "num_input_tokens_seen": 3992576, + "step": 4160 + }, + { + "epoch": 0.35344534962661234, + "grad_norm": 0.7540062665939331, + "learning_rate": 1.6338564260387861e-06, + "loss": 0.0488, + "num_input_tokens_seen": 3997824, + "step": 4165 + }, + { + "epoch": 0.35386965376782076, + "grad_norm": 11.16374683380127, + "learning_rate": 1.6327101077942228e-06, + "loss": 0.1147, + "num_input_tokens_seen": 4002048, + "step": 4170 + }, + { + "epoch": 0.3542939579090292, + "grad_norm": 37.31918716430664, + "learning_rate": 1.631562401442157e-06, + "loss": 0.0454, + "num_input_tokens_seen": 4006656, + "step": 4175 + }, + { + "epoch": 0.3547182620502376, + "grad_norm": 34.546321868896484, + "learning_rate": 1.6304133095005505e-06, + "loss": 0.0978, + "num_input_tokens_seen": 4011136, + "step": 4180 + }, + { + "epoch": 0.355142566191446, + "grad_norm": 62.22642517089844, + "learning_rate": 1.6292628344904048e-06, + "loss": 0.0658, + "num_input_tokens_seen": 4015808, + "step": 4185 + }, + { + "epoch": 0.35556687033265444, + "grad_norm": 17.028743743896484, + "learning_rate": 1.628110978935756e-06, + "loss": 0.0771, + "num_input_tokens_seen": 4020480, + "step": 4190 + }, + { + "epoch": 0.35599117447386286, + "grad_norm": 0.28512778878211975, + "learning_rate": 1.626957745363668e-06, + "loss": 0.0722, + "num_input_tokens_seen": 4025088, + "step": 4195 + }, + { + "epoch": 0.3564154786150713, + "grad_norm": 1.972703456878662, + "learning_rate": 1.6258031363042291e-06, + "loss": 0.0513, + "num_input_tokens_seen": 4030272, + "step": 4200 + }, + { + "epoch": 0.3568397827562797, + "grad_norm": 23.032922744750977, + "learning_rate": 1.624647154290545e-06, + "loss": 0.0864, + "num_input_tokens_seen": 4035072, + "step": 4205 + }, + { + "epoch": 0.3572640868974881, + "grad_norm": 10.1510009765625, + "learning_rate": 1.6234898018587336e-06, + "loss": 0.0615, + "num_input_tokens_seen": 4039488, + "step": 4210 + }, + { + "epoch": 0.35768839103869654, + "grad_norm": 16.575807571411133, + "learning_rate": 1.6223310815479186e-06, + "loss": 0.1352, + "num_input_tokens_seen": 4044480, + "step": 4215 + }, + { + "epoch": 0.35811269517990496, + "grad_norm": 0.11152282357215881, + "learning_rate": 1.6211709959002255e-06, + "loss": 0.0714, + "num_input_tokens_seen": 4048768, + "step": 4220 + }, + { + "epoch": 0.3585369993211134, + "grad_norm": 1.276023030281067, + "learning_rate": 1.620009547460775e-06, + "loss": 0.0638, + "num_input_tokens_seen": 4053504, + "step": 4225 + }, + { + "epoch": 0.3589613034623218, + "grad_norm": 2.9088714122772217, + "learning_rate": 1.6188467387776779e-06, + "loss": 0.0221, + "num_input_tokens_seen": 4057856, + "step": 4230 + }, + { + "epoch": 0.3593856076035302, + "grad_norm": 14.828264236450195, + "learning_rate": 1.6176825724020286e-06, + "loss": 0.0434, + "num_input_tokens_seen": 4062784, + "step": 4235 + }, + { + "epoch": 0.35980991174473864, + "grad_norm": 0.575377881526947, + "learning_rate": 1.6165170508879007e-06, + "loss": 0.0282, + "num_input_tokens_seen": 4067328, + "step": 4240 + }, + { + "epoch": 0.36023421588594706, + "grad_norm": 70.52108764648438, + "learning_rate": 1.6153501767923408e-06, + "loss": 0.0755, + "num_input_tokens_seen": 4072704, + "step": 4245 + }, + { + "epoch": 0.3606585200271555, + "grad_norm": 11.265511512756348, + "learning_rate": 1.6141819526753626e-06, + "loss": 0.1098, + "num_input_tokens_seen": 4077504, + "step": 4250 + }, + { + "epoch": 0.3610828241683639, + "grad_norm": 0.06153067946434021, + "learning_rate": 1.613012381099942e-06, + "loss": 0.1086, + "num_input_tokens_seen": 4082240, + "step": 4255 + }, + { + "epoch": 0.3615071283095723, + "grad_norm": 6.542662620544434, + "learning_rate": 1.6118414646320111e-06, + "loss": 0.0658, + "num_input_tokens_seen": 4086272, + "step": 4260 + }, + { + "epoch": 0.36193143245078074, + "grad_norm": 58.30453872680664, + "learning_rate": 1.6106692058404518e-06, + "loss": 0.0744, + "num_input_tokens_seen": 4090880, + "step": 4265 + }, + { + "epoch": 0.36235573659198916, + "grad_norm": 14.486126899719238, + "learning_rate": 1.6094956072970924e-06, + "loss": 0.0715, + "num_input_tokens_seen": 4095552, + "step": 4270 + }, + { + "epoch": 0.36278004073319753, + "grad_norm": 0.5152646899223328, + "learning_rate": 1.608320671576699e-06, + "loss": 0.0807, + "num_input_tokens_seen": 4100352, + "step": 4275 + }, + { + "epoch": 0.36320434487440595, + "grad_norm": 1.4671602249145508, + "learning_rate": 1.6071444012569723e-06, + "loss": 0.052, + "num_input_tokens_seen": 4104704, + "step": 4280 + }, + { + "epoch": 0.36362864901561437, + "grad_norm": 14.76534366607666, + "learning_rate": 1.6059667989185405e-06, + "loss": 0.1166, + "num_input_tokens_seen": 4109376, + "step": 4285 + }, + { + "epoch": 0.3640529531568228, + "grad_norm": 8.199237823486328, + "learning_rate": 1.6047878671449544e-06, + "loss": 0.0883, + "num_input_tokens_seen": 4114496, + "step": 4290 + }, + { + "epoch": 0.3644772572980312, + "grad_norm": 21.092506408691406, + "learning_rate": 1.6036076085226812e-06, + "loss": 0.0369, + "num_input_tokens_seen": 4119552, + "step": 4295 + }, + { + "epoch": 0.36490156143923963, + "grad_norm": 10.888265609741211, + "learning_rate": 1.6024260256410995e-06, + "loss": 0.0301, + "num_input_tokens_seen": 4124352, + "step": 4300 + }, + { + "epoch": 0.36532586558044805, + "grad_norm": 48.6574592590332, + "learning_rate": 1.601243121092493e-06, + "loss": 0.0234, + "num_input_tokens_seen": 4129152, + "step": 4305 + }, + { + "epoch": 0.36575016972165647, + "grad_norm": 8.204540252685547, + "learning_rate": 1.6000588974720443e-06, + "loss": 0.0369, + "num_input_tokens_seen": 4134144, + "step": 4310 + }, + { + "epoch": 0.3661744738628649, + "grad_norm": 9.99583625793457, + "learning_rate": 1.5988733573778314e-06, + "loss": 0.1336, + "num_input_tokens_seen": 4138816, + "step": 4315 + }, + { + "epoch": 0.3665987780040733, + "grad_norm": 22.33359146118164, + "learning_rate": 1.597686503410819e-06, + "loss": 0.1023, + "num_input_tokens_seen": 4143680, + "step": 4320 + }, + { + "epoch": 0.36702308214528173, + "grad_norm": 13.462114334106445, + "learning_rate": 1.596498338174856e-06, + "loss": 0.0953, + "num_input_tokens_seen": 4149120, + "step": 4325 + }, + { + "epoch": 0.36744738628649015, + "grad_norm": 3.512903928756714, + "learning_rate": 1.595308864276666e-06, + "loss": 0.0331, + "num_input_tokens_seen": 4154432, + "step": 4330 + }, + { + "epoch": 0.36787169042769857, + "grad_norm": 0.18234136700630188, + "learning_rate": 1.5941180843258452e-06, + "loss": 0.0485, + "num_input_tokens_seen": 4159360, + "step": 4335 + }, + { + "epoch": 0.368295994568907, + "grad_norm": 1.4561611413955688, + "learning_rate": 1.5929260009348551e-06, + "loss": 0.0196, + "num_input_tokens_seen": 4163520, + "step": 4340 + }, + { + "epoch": 0.3687202987101154, + "grad_norm": 37.51924133300781, + "learning_rate": 1.5917326167190163e-06, + "loss": 0.1079, + "num_input_tokens_seen": 4168640, + "step": 4345 + }, + { + "epoch": 0.36914460285132383, + "grad_norm": 33.85417556762695, + "learning_rate": 1.5905379342965033e-06, + "loss": 0.1585, + "num_input_tokens_seen": 4173312, + "step": 4350 + }, + { + "epoch": 0.36956890699253225, + "grad_norm": 6.61181640625, + "learning_rate": 1.589341956288339e-06, + "loss": 0.1323, + "num_input_tokens_seen": 4177664, + "step": 4355 + }, + { + "epoch": 0.3699932111337407, + "grad_norm": 2.2639856338500977, + "learning_rate": 1.5881446853183888e-06, + "loss": 0.0261, + "num_input_tokens_seen": 4182016, + "step": 4360 + }, + { + "epoch": 0.3704175152749491, + "grad_norm": 1.7123826742172241, + "learning_rate": 1.586946124013354e-06, + "loss": 0.0039, + "num_input_tokens_seen": 4186624, + "step": 4365 + }, + { + "epoch": 0.3708418194161575, + "grad_norm": 6.56783390045166, + "learning_rate": 1.585746275002768e-06, + "loss": 0.0708, + "num_input_tokens_seen": 4191936, + "step": 4370 + }, + { + "epoch": 0.37126612355736593, + "grad_norm": 7.986996173858643, + "learning_rate": 1.5845451409189887e-06, + "loss": 0.087, + "num_input_tokens_seen": 4196352, + "step": 4375 + }, + { + "epoch": 0.37169042769857435, + "grad_norm": 23.848651885986328, + "learning_rate": 1.5833427243971927e-06, + "loss": 0.0648, + "num_input_tokens_seen": 4200960, + "step": 4380 + }, + { + "epoch": 0.3721147318397828, + "grad_norm": 0.21783022582530975, + "learning_rate": 1.582139028075371e-06, + "loss": 0.0723, + "num_input_tokens_seen": 4206208, + "step": 4385 + }, + { + "epoch": 0.3725390359809912, + "grad_norm": 11.511324882507324, + "learning_rate": 1.580934054594322e-06, + "loss": 0.0497, + "num_input_tokens_seen": 4210688, + "step": 4390 + }, + { + "epoch": 0.3729633401221996, + "grad_norm": 5.814783096313477, + "learning_rate": 1.5797278065976463e-06, + "loss": 0.0667, + "num_input_tokens_seen": 4215424, + "step": 4395 + }, + { + "epoch": 0.37338764426340804, + "grad_norm": 8.122594833374023, + "learning_rate": 1.5785202867317407e-06, + "loss": 0.0108, + "num_input_tokens_seen": 4220160, + "step": 4400 + }, + { + "epoch": 0.37381194840461646, + "grad_norm": 4.401556968688965, + "learning_rate": 1.5773114976457915e-06, + "loss": 0.1083, + "num_input_tokens_seen": 4224832, + "step": 4405 + }, + { + "epoch": 0.3742362525458248, + "grad_norm": 21.742496490478516, + "learning_rate": 1.576101441991771e-06, + "loss": 0.0833, + "num_input_tokens_seen": 4230464, + "step": 4410 + }, + { + "epoch": 0.37466055668703324, + "grad_norm": 1.350370168685913, + "learning_rate": 1.574890122424429e-06, + "loss": 0.0739, + "num_input_tokens_seen": 4234816, + "step": 4415 + }, + { + "epoch": 0.37508486082824166, + "grad_norm": 3.1516149044036865, + "learning_rate": 1.573677541601289e-06, + "loss": 0.1099, + "num_input_tokens_seen": 4239424, + "step": 4420 + }, + { + "epoch": 0.3755091649694501, + "grad_norm": 22.65239715576172, + "learning_rate": 1.5724637021826409e-06, + "loss": 0.0576, + "num_input_tokens_seen": 4244160, + "step": 4425 + }, + { + "epoch": 0.3759334691106585, + "grad_norm": 4.091480255126953, + "learning_rate": 1.5712486068315367e-06, + "loss": 0.0613, + "num_input_tokens_seen": 4248320, + "step": 4430 + }, + { + "epoch": 0.3763577732518669, + "grad_norm": 26.138118743896484, + "learning_rate": 1.5700322582137826e-06, + "loss": 0.0184, + "num_input_tokens_seen": 4252672, + "step": 4435 + }, + { + "epoch": 0.37678207739307534, + "grad_norm": 0.11472512781620026, + "learning_rate": 1.5688146589979358e-06, + "loss": 0.0768, + "num_input_tokens_seen": 4257280, + "step": 4440 + }, + { + "epoch": 0.37720638153428376, + "grad_norm": 4.047384262084961, + "learning_rate": 1.5675958118552962e-06, + "loss": 0.0366, + "num_input_tokens_seen": 4262592, + "step": 4445 + }, + { + "epoch": 0.3776306856754922, + "grad_norm": 6.992406845092773, + "learning_rate": 1.5663757194599013e-06, + "loss": 0.0796, + "num_input_tokens_seen": 4267840, + "step": 4450 + }, + { + "epoch": 0.3780549898167006, + "grad_norm": 0.738090991973877, + "learning_rate": 1.5651543844885216e-06, + "loss": 0.0535, + "num_input_tokens_seen": 4272576, + "step": 4455 + }, + { + "epoch": 0.378479293957909, + "grad_norm": 42.81803512573242, + "learning_rate": 1.5639318096206533e-06, + "loss": 0.0441, + "num_input_tokens_seen": 4277568, + "step": 4460 + }, + { + "epoch": 0.37890359809911744, + "grad_norm": 11.038164138793945, + "learning_rate": 1.562707997538512e-06, + "loss": 0.0889, + "num_input_tokens_seen": 4282880, + "step": 4465 + }, + { + "epoch": 0.37932790224032586, + "grad_norm": 16.20423126220703, + "learning_rate": 1.5614829509270288e-06, + "loss": 0.087, + "num_input_tokens_seen": 4287296, + "step": 4470 + }, + { + "epoch": 0.3797522063815343, + "grad_norm": 0.10790825635194778, + "learning_rate": 1.5602566724738426e-06, + "loss": 0.0615, + "num_input_tokens_seen": 4291712, + "step": 4475 + }, + { + "epoch": 0.3801765105227427, + "grad_norm": 20.115352630615234, + "learning_rate": 1.5590291648692952e-06, + "loss": 0.0883, + "num_input_tokens_seen": 4297088, + "step": 4480 + }, + { + "epoch": 0.3806008146639511, + "grad_norm": 16.331439971923828, + "learning_rate": 1.5578004308064245e-06, + "loss": 0.0393, + "num_input_tokens_seen": 4302784, + "step": 4485 + }, + { + "epoch": 0.38102511880515955, + "grad_norm": 18.88719367980957, + "learning_rate": 1.55657047298096e-06, + "loss": 0.0433, + "num_input_tokens_seen": 4307584, + "step": 4490 + }, + { + "epoch": 0.38144942294636797, + "grad_norm": 0.11818954348564148, + "learning_rate": 1.5553392940913148e-06, + "loss": 0.0148, + "num_input_tokens_seen": 4312064, + "step": 4495 + }, + { + "epoch": 0.3818737270875764, + "grad_norm": 0.27669757604599, + "learning_rate": 1.554106896838582e-06, + "loss": 0.0059, + "num_input_tokens_seen": 4316672, + "step": 4500 + }, + { + "epoch": 0.3822980312287848, + "grad_norm": 5.000036716461182, + "learning_rate": 1.5528732839265272e-06, + "loss": 0.0332, + "num_input_tokens_seen": 4321088, + "step": 4505 + }, + { + "epoch": 0.3827223353699932, + "grad_norm": 41.28630065917969, + "learning_rate": 1.5516384580615832e-06, + "loss": 0.0341, + "num_input_tokens_seen": 4326208, + "step": 4510 + }, + { + "epoch": 0.38314663951120165, + "grad_norm": 0.11097334325313568, + "learning_rate": 1.5504024219528437e-06, + "loss": 0.0553, + "num_input_tokens_seen": 4331008, + "step": 4515 + }, + { + "epoch": 0.38357094365241007, + "grad_norm": 7.243147373199463, + "learning_rate": 1.5491651783120578e-06, + "loss": 0.019, + "num_input_tokens_seen": 4335936, + "step": 4520 + }, + { + "epoch": 0.3839952477936185, + "grad_norm": 0.0719473585486412, + "learning_rate": 1.5479267298536238e-06, + "loss": 0.0442, + "num_input_tokens_seen": 4340608, + "step": 4525 + }, + { + "epoch": 0.3844195519348269, + "grad_norm": 23.928747177124023, + "learning_rate": 1.5466870792945828e-06, + "loss": 0.0841, + "num_input_tokens_seen": 4345344, + "step": 4530 + }, + { + "epoch": 0.38484385607603533, + "grad_norm": 13.379002571105957, + "learning_rate": 1.545446229354614e-06, + "loss": 0.1063, + "num_input_tokens_seen": 4351680, + "step": 4535 + }, + { + "epoch": 0.3852681602172437, + "grad_norm": 26.05900764465332, + "learning_rate": 1.5442041827560272e-06, + "loss": 0.12, + "num_input_tokens_seen": 4356032, + "step": 4540 + }, + { + "epoch": 0.3856924643584521, + "grad_norm": 41.182865142822266, + "learning_rate": 1.542960942223758e-06, + "loss": 0.0567, + "num_input_tokens_seen": 4360576, + "step": 4545 + }, + { + "epoch": 0.38611676849966053, + "grad_norm": 22.600698471069336, + "learning_rate": 1.541716510485361e-06, + "loss": 0.1224, + "num_input_tokens_seen": 4365376, + "step": 4550 + }, + { + "epoch": 0.38654107264086895, + "grad_norm": 0.17062042653560638, + "learning_rate": 1.5404708902710048e-06, + "loss": 0.015, + "num_input_tokens_seen": 4370368, + "step": 4555 + }, + { + "epoch": 0.3869653767820774, + "grad_norm": 8.25340747833252, + "learning_rate": 1.5392240843134648e-06, + "loss": 0.0918, + "num_input_tokens_seen": 4375488, + "step": 4560 + }, + { + "epoch": 0.3873896809232858, + "grad_norm": 23.542274475097656, + "learning_rate": 1.5379760953481178e-06, + "loss": 0.1303, + "num_input_tokens_seen": 4380352, + "step": 4565 + }, + { + "epoch": 0.3878139850644942, + "grad_norm": 21.426984786987305, + "learning_rate": 1.5367269261129367e-06, + "loss": 0.0735, + "num_input_tokens_seen": 4385664, + "step": 4570 + }, + { + "epoch": 0.38823828920570264, + "grad_norm": 15.733993530273438, + "learning_rate": 1.5354765793484831e-06, + "loss": 0.1257, + "num_input_tokens_seen": 4390528, + "step": 4575 + }, + { + "epoch": 0.38866259334691106, + "grad_norm": 0.2743380069732666, + "learning_rate": 1.5342250577979023e-06, + "loss": 0.0082, + "num_input_tokens_seen": 4395136, + "step": 4580 + }, + { + "epoch": 0.3890868974881195, + "grad_norm": 6.8629279136657715, + "learning_rate": 1.532972364206917e-06, + "loss": 0.0835, + "num_input_tokens_seen": 4400320, + "step": 4585 + }, + { + "epoch": 0.3895112016293279, + "grad_norm": 0.2883586883544922, + "learning_rate": 1.5317185013238209e-06, + "loss": 0.0612, + "num_input_tokens_seen": 4405056, + "step": 4590 + }, + { + "epoch": 0.3899355057705363, + "grad_norm": 17.891014099121094, + "learning_rate": 1.5304634718994738e-06, + "loss": 0.0862, + "num_input_tokens_seen": 4410624, + "step": 4595 + }, + { + "epoch": 0.39035980991174474, + "grad_norm": 6.251929759979248, + "learning_rate": 1.5292072786872938e-06, + "loss": 0.0614, + "num_input_tokens_seen": 4415424, + "step": 4600 + }, + { + "epoch": 0.39078411405295316, + "grad_norm": 9.538578033447266, + "learning_rate": 1.527949924443253e-06, + "loss": 0.0369, + "num_input_tokens_seen": 4420224, + "step": 4605 + }, + { + "epoch": 0.3912084181941616, + "grad_norm": 27.938451766967773, + "learning_rate": 1.52669141192587e-06, + "loss": 0.0958, + "num_input_tokens_seen": 4424896, + "step": 4610 + }, + { + "epoch": 0.39163272233537, + "grad_norm": 29.00919532775879, + "learning_rate": 1.5254317438962052e-06, + "loss": 0.0946, + "num_input_tokens_seen": 4429312, + "step": 4615 + }, + { + "epoch": 0.3920570264765784, + "grad_norm": 8.168607711791992, + "learning_rate": 1.5241709231178539e-06, + "loss": 0.1145, + "num_input_tokens_seen": 4433920, + "step": 4620 + }, + { + "epoch": 0.39248133061778684, + "grad_norm": 14.62310791015625, + "learning_rate": 1.5229089523569405e-06, + "loss": 0.0557, + "num_input_tokens_seen": 4438464, + "step": 4625 + }, + { + "epoch": 0.39290563475899526, + "grad_norm": 5.12725830078125, + "learning_rate": 1.5216458343821122e-06, + "loss": 0.0481, + "num_input_tokens_seen": 4443584, + "step": 4630 + }, + { + "epoch": 0.3933299389002037, + "grad_norm": 1.6804157495498657, + "learning_rate": 1.5203815719645328e-06, + "loss": 0.0451, + "num_input_tokens_seen": 4448960, + "step": 4635 + }, + { + "epoch": 0.3937542430414121, + "grad_norm": 0.8322094678878784, + "learning_rate": 1.5191161678778773e-06, + "loss": 0.0307, + "num_input_tokens_seen": 4453504, + "step": 4640 + }, + { + "epoch": 0.3941785471826205, + "grad_norm": 32.25953674316406, + "learning_rate": 1.5178496248983251e-06, + "loss": 0.108, + "num_input_tokens_seen": 4458048, + "step": 4645 + }, + { + "epoch": 0.39460285132382894, + "grad_norm": 13.34233283996582, + "learning_rate": 1.5165819458045554e-06, + "loss": 0.0384, + "num_input_tokens_seen": 4463168, + "step": 4650 + }, + { + "epoch": 0.39502715546503736, + "grad_norm": 33.31865692138672, + "learning_rate": 1.5153131333777377e-06, + "loss": 0.1386, + "num_input_tokens_seen": 4467520, + "step": 4655 + }, + { + "epoch": 0.3954514596062458, + "grad_norm": 13.36380386352539, + "learning_rate": 1.51404319040153e-06, + "loss": 0.0864, + "num_input_tokens_seen": 4472960, + "step": 4660 + }, + { + "epoch": 0.3958757637474542, + "grad_norm": 13.95639705657959, + "learning_rate": 1.5127721196620697e-06, + "loss": 0.058, + "num_input_tokens_seen": 4477312, + "step": 4665 + }, + { + "epoch": 0.39630006788866257, + "grad_norm": 2.9383718967437744, + "learning_rate": 1.5114999239479685e-06, + "loss": 0.0255, + "num_input_tokens_seen": 4482432, + "step": 4670 + }, + { + "epoch": 0.396724372029871, + "grad_norm": 15.002151489257812, + "learning_rate": 1.5102266060503063e-06, + "loss": 0.0754, + "num_input_tokens_seen": 4486464, + "step": 4675 + }, + { + "epoch": 0.3971486761710794, + "grad_norm": 5.086097717285156, + "learning_rate": 1.508952168762624e-06, + "loss": 0.104, + "num_input_tokens_seen": 4491520, + "step": 4680 + }, + { + "epoch": 0.3975729803122878, + "grad_norm": 3.3495185375213623, + "learning_rate": 1.5076766148809209e-06, + "loss": 0.0473, + "num_input_tokens_seen": 4496256, + "step": 4685 + }, + { + "epoch": 0.39799728445349625, + "grad_norm": 11.387035369873047, + "learning_rate": 1.506399947203643e-06, + "loss": 0.0532, + "num_input_tokens_seen": 4500416, + "step": 4690 + }, + { + "epoch": 0.39842158859470467, + "grad_norm": 20.36817169189453, + "learning_rate": 1.5051221685316815e-06, + "loss": 0.052, + "num_input_tokens_seen": 4505536, + "step": 4695 + }, + { + "epoch": 0.3988458927359131, + "grad_norm": 5.638185977935791, + "learning_rate": 1.5038432816683652e-06, + "loss": 0.1351, + "num_input_tokens_seen": 4510400, + "step": 4700 + }, + { + "epoch": 0.3992701968771215, + "grad_norm": 16.21558380126953, + "learning_rate": 1.5025632894194532e-06, + "loss": 0.0894, + "num_input_tokens_seen": 4515904, + "step": 4705 + }, + { + "epoch": 0.39969450101832993, + "grad_norm": 21.98189926147461, + "learning_rate": 1.5012821945931303e-06, + "loss": 0.0939, + "num_input_tokens_seen": 4520448, + "step": 4710 + }, + { + "epoch": 0.40011880515953835, + "grad_norm": 18.89621353149414, + "learning_rate": 1.5e-06, + "loss": 0.0654, + "num_input_tokens_seen": 4525824, + "step": 4715 + }, + { + "epoch": 0.40054310930074677, + "grad_norm": 18.766685485839844, + "learning_rate": 1.498716708453079e-06, + "loss": 0.0782, + "num_input_tokens_seen": 4530304, + "step": 4720 + }, + { + "epoch": 0.40054310930074677, + "eval_loss": 0.055105432868003845, + "eval_runtime": 15.7954, + "eval_samples_per_second": 663.169, + "eval_steps_per_second": 82.936, + "num_input_tokens_seen": 4530304, + "step": 4720 + }, + { + "epoch": 0.4009674134419552, + "grad_norm": 7.12246561050415, + "learning_rate": 1.4974323227677903e-06, + "loss": 0.1067, + "num_input_tokens_seen": 4534720, + "step": 4725 + }, + { + "epoch": 0.4013917175831636, + "grad_norm": 12.245247840881348, + "learning_rate": 1.4961468457619575e-06, + "loss": 0.1018, + "num_input_tokens_seen": 4539520, + "step": 4730 + }, + { + "epoch": 0.40181602172437203, + "grad_norm": 0.7408866882324219, + "learning_rate": 1.4948602802557982e-06, + "loss": 0.0083, + "num_input_tokens_seen": 4544448, + "step": 4735 + }, + { + "epoch": 0.40224032586558045, + "grad_norm": 17.369991302490234, + "learning_rate": 1.4935726290719177e-06, + "loss": 0.0448, + "num_input_tokens_seen": 4549632, + "step": 4740 + }, + { + "epoch": 0.40266463000678887, + "grad_norm": 1.5301878452301025, + "learning_rate": 1.492283895035305e-06, + "loss": 0.0697, + "num_input_tokens_seen": 4554560, + "step": 4745 + }, + { + "epoch": 0.4030889341479973, + "grad_norm": 7.232668876647949, + "learning_rate": 1.490994080973322e-06, + "loss": 0.0605, + "num_input_tokens_seen": 4559168, + "step": 4750 + }, + { + "epoch": 0.4035132382892057, + "grad_norm": 11.598247528076172, + "learning_rate": 1.4897031897157025e-06, + "loss": 0.0556, + "num_input_tokens_seen": 4563968, + "step": 4755 + }, + { + "epoch": 0.40393754243041413, + "grad_norm": 1.8283789157867432, + "learning_rate": 1.4884112240945425e-06, + "loss": 0.0479, + "num_input_tokens_seen": 4569408, + "step": 4760 + }, + { + "epoch": 0.40436184657162255, + "grad_norm": 0.4203529357910156, + "learning_rate": 1.4871181869442952e-06, + "loss": 0.0856, + "num_input_tokens_seen": 4573824, + "step": 4765 + }, + { + "epoch": 0.40478615071283097, + "grad_norm": 18.766796112060547, + "learning_rate": 1.485824081101764e-06, + "loss": 0.126, + "num_input_tokens_seen": 4578368, + "step": 4770 + }, + { + "epoch": 0.4052104548540394, + "grad_norm": 31.071496963500977, + "learning_rate": 1.4845289094060984e-06, + "loss": 0.0938, + "num_input_tokens_seen": 4583040, + "step": 4775 + }, + { + "epoch": 0.4056347589952478, + "grad_norm": 11.54023265838623, + "learning_rate": 1.4832326746987846e-06, + "loss": 0.1106, + "num_input_tokens_seen": 4587968, + "step": 4780 + }, + { + "epoch": 0.40605906313645623, + "grad_norm": 8.134360313415527, + "learning_rate": 1.4819353798236424e-06, + "loss": 0.1431, + "num_input_tokens_seen": 4593216, + "step": 4785 + }, + { + "epoch": 0.40648336727766465, + "grad_norm": 15.816326141357422, + "learning_rate": 1.4806370276268163e-06, + "loss": 0.0959, + "num_input_tokens_seen": 4597824, + "step": 4790 + }, + { + "epoch": 0.4069076714188731, + "grad_norm": 8.704207420349121, + "learning_rate": 1.4793376209567714e-06, + "loss": 0.0695, + "num_input_tokens_seen": 4602880, + "step": 4795 + }, + { + "epoch": 0.4073319755600815, + "grad_norm": 20.96257209777832, + "learning_rate": 1.4780371626642858e-06, + "loss": 0.0287, + "num_input_tokens_seen": 4607744, + "step": 4800 + }, + { + "epoch": 0.40775627970128986, + "grad_norm": 6.630837917327881, + "learning_rate": 1.4767356556024448e-06, + "loss": 0.1188, + "num_input_tokens_seen": 4612224, + "step": 4805 + }, + { + "epoch": 0.4081805838424983, + "grad_norm": 18.014816284179688, + "learning_rate": 1.4754331026266344e-06, + "loss": 0.0525, + "num_input_tokens_seen": 4616704, + "step": 4810 + }, + { + "epoch": 0.4086048879837067, + "grad_norm": 10.613683700561523, + "learning_rate": 1.474129506594536e-06, + "loss": 0.0684, + "num_input_tokens_seen": 4621568, + "step": 4815 + }, + { + "epoch": 0.4090291921249151, + "grad_norm": 7.092163562774658, + "learning_rate": 1.472824870366118e-06, + "loss": 0.0427, + "num_input_tokens_seen": 4626176, + "step": 4820 + }, + { + "epoch": 0.40945349626612354, + "grad_norm": 8.511113166809082, + "learning_rate": 1.4715191968036324e-06, + "loss": 0.0541, + "num_input_tokens_seen": 4630400, + "step": 4825 + }, + { + "epoch": 0.40987780040733196, + "grad_norm": 1.382973074913025, + "learning_rate": 1.4702124887716058e-06, + "loss": 0.0376, + "num_input_tokens_seen": 4634688, + "step": 4830 + }, + { + "epoch": 0.4103021045485404, + "grad_norm": 2.407348871231079, + "learning_rate": 1.4689047491368354e-06, + "loss": 0.0444, + "num_input_tokens_seen": 4639104, + "step": 4835 + }, + { + "epoch": 0.4107264086897488, + "grad_norm": 33.6815185546875, + "learning_rate": 1.4675959807683808e-06, + "loss": 0.0833, + "num_input_tokens_seen": 4643328, + "step": 4840 + }, + { + "epoch": 0.4111507128309572, + "grad_norm": 2.1301493644714355, + "learning_rate": 1.4662861865375588e-06, + "loss": 0.0481, + "num_input_tokens_seen": 4648320, + "step": 4845 + }, + { + "epoch": 0.41157501697216564, + "grad_norm": 0.48759591579437256, + "learning_rate": 1.4649753693179373e-06, + "loss": 0.0984, + "num_input_tokens_seen": 4653120, + "step": 4850 + }, + { + "epoch": 0.41199932111337406, + "grad_norm": 20.999130249023438, + "learning_rate": 1.4636635319853272e-06, + "loss": 0.073, + "num_input_tokens_seen": 4658048, + "step": 4855 + }, + { + "epoch": 0.4124236252545825, + "grad_norm": 0.22578765451908112, + "learning_rate": 1.4623506774177796e-06, + "loss": 0.0463, + "num_input_tokens_seen": 4662976, + "step": 4860 + }, + { + "epoch": 0.4128479293957909, + "grad_norm": 8.64342975616455, + "learning_rate": 1.4610368084955748e-06, + "loss": 0.0902, + "num_input_tokens_seen": 4667840, + "step": 4865 + }, + { + "epoch": 0.4132722335369993, + "grad_norm": 48.03071212768555, + "learning_rate": 1.4597219281012208e-06, + "loss": 0.096, + "num_input_tokens_seen": 4673408, + "step": 4870 + }, + { + "epoch": 0.41369653767820774, + "grad_norm": 26.673458099365234, + "learning_rate": 1.4584060391194436e-06, + "loss": 0.1332, + "num_input_tokens_seen": 4679552, + "step": 4875 + }, + { + "epoch": 0.41412084181941616, + "grad_norm": 0.5152348279953003, + "learning_rate": 1.4570891444371814e-06, + "loss": 0.0965, + "num_input_tokens_seen": 4684352, + "step": 4880 + }, + { + "epoch": 0.4145451459606246, + "grad_norm": 6.625322341918945, + "learning_rate": 1.4557712469435797e-06, + "loss": 0.0692, + "num_input_tokens_seen": 4688704, + "step": 4885 + }, + { + "epoch": 0.414969450101833, + "grad_norm": 22.45545196533203, + "learning_rate": 1.4544523495299841e-06, + "loss": 0.0413, + "num_input_tokens_seen": 4693440, + "step": 4890 + }, + { + "epoch": 0.4153937542430414, + "grad_norm": 1.495713710784912, + "learning_rate": 1.4531324550899333e-06, + "loss": 0.0806, + "num_input_tokens_seen": 4698496, + "step": 4895 + }, + { + "epoch": 0.41581805838424984, + "grad_norm": 12.096963882446289, + "learning_rate": 1.451811566519154e-06, + "loss": 0.0962, + "num_input_tokens_seen": 4703616, + "step": 4900 + }, + { + "epoch": 0.41624236252545826, + "grad_norm": 5.132894992828369, + "learning_rate": 1.450489686715553e-06, + "loss": 0.0827, + "num_input_tokens_seen": 4707584, + "step": 4905 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 41.053489685058594, + "learning_rate": 1.4491668185792131e-06, + "loss": 0.1271, + "num_input_tokens_seen": 4711936, + "step": 4910 + }, + { + "epoch": 0.4170909708078751, + "grad_norm": 1.5487451553344727, + "learning_rate": 1.4478429650123851e-06, + "loss": 0.0545, + "num_input_tokens_seen": 4716992, + "step": 4915 + }, + { + "epoch": 0.4175152749490835, + "grad_norm": 24.532102584838867, + "learning_rate": 1.44651812891948e-06, + "loss": 0.0595, + "num_input_tokens_seen": 4721792, + "step": 4920 + }, + { + "epoch": 0.41793957909029195, + "grad_norm": 14.233129501342773, + "learning_rate": 1.4451923132070669e-06, + "loss": 0.0644, + "num_input_tokens_seen": 4726208, + "step": 4925 + }, + { + "epoch": 0.41836388323150037, + "grad_norm": 24.64492416381836, + "learning_rate": 1.4438655207838628e-06, + "loss": 0.0712, + "num_input_tokens_seen": 4730880, + "step": 4930 + }, + { + "epoch": 0.41878818737270873, + "grad_norm": 2.684027910232544, + "learning_rate": 1.4425377545607275e-06, + "loss": 0.0898, + "num_input_tokens_seen": 4736128, + "step": 4935 + }, + { + "epoch": 0.41921249151391715, + "grad_norm": 0.3984468877315521, + "learning_rate": 1.4412090174506567e-06, + "loss": 0.0292, + "num_input_tokens_seen": 4740608, + "step": 4940 + }, + { + "epoch": 0.41963679565512557, + "grad_norm": 1.1649671792984009, + "learning_rate": 1.4398793123687777e-06, + "loss": 0.0739, + "num_input_tokens_seen": 4745408, + "step": 4945 + }, + { + "epoch": 0.420061099796334, + "grad_norm": 0.20689576864242554, + "learning_rate": 1.4385486422323404e-06, + "loss": 0.0038, + "num_input_tokens_seen": 4750400, + "step": 4950 + }, + { + "epoch": 0.4204854039375424, + "grad_norm": 0.23738016188144684, + "learning_rate": 1.4372170099607123e-06, + "loss": 0.0556, + "num_input_tokens_seen": 4755008, + "step": 4955 + }, + { + "epoch": 0.42090970807875083, + "grad_norm": 6.202879428863525, + "learning_rate": 1.435884418475371e-06, + "loss": 0.1251, + "num_input_tokens_seen": 4759424, + "step": 4960 + }, + { + "epoch": 0.42133401221995925, + "grad_norm": 3.472230911254883, + "learning_rate": 1.4345508706998994e-06, + "loss": 0.0476, + "num_input_tokens_seen": 4763968, + "step": 4965 + }, + { + "epoch": 0.4217583163611677, + "grad_norm": 13.630236625671387, + "learning_rate": 1.433216369559978e-06, + "loss": 0.065, + "num_input_tokens_seen": 4768960, + "step": 4970 + }, + { + "epoch": 0.4221826205023761, + "grad_norm": 1.2870839834213257, + "learning_rate": 1.4318809179833791e-06, + "loss": 0.0698, + "num_input_tokens_seen": 4774592, + "step": 4975 + }, + { + "epoch": 0.4226069246435845, + "grad_norm": 13.314414024353027, + "learning_rate": 1.4305445188999596e-06, + "loss": 0.0981, + "num_input_tokens_seen": 4778944, + "step": 4980 + }, + { + "epoch": 0.42303122878479293, + "grad_norm": 1.0573667287826538, + "learning_rate": 1.4292071752416558e-06, + "loss": 0.1095, + "num_input_tokens_seen": 4783488, + "step": 4985 + }, + { + "epoch": 0.42345553292600135, + "grad_norm": 1.0888034105300903, + "learning_rate": 1.4278688899424764e-06, + "loss": 0.0264, + "num_input_tokens_seen": 4788288, + "step": 4990 + }, + { + "epoch": 0.4238798370672098, + "grad_norm": 0.6745188236236572, + "learning_rate": 1.4265296659384953e-06, + "loss": 0.0264, + "num_input_tokens_seen": 4792896, + "step": 4995 + }, + { + "epoch": 0.4243041412084182, + "grad_norm": 11.085637092590332, + "learning_rate": 1.4251895061678463e-06, + "loss": 0.1121, + "num_input_tokens_seen": 4797056, + "step": 5000 + }, + { + "epoch": 0.4247284453496266, + "grad_norm": 0.7506033182144165, + "learning_rate": 1.4238484135707162e-06, + "loss": 0.0697, + "num_input_tokens_seen": 4802304, + "step": 5005 + }, + { + "epoch": 0.42515274949083504, + "grad_norm": 11.137918472290039, + "learning_rate": 1.4225063910893384e-06, + "loss": 0.0716, + "num_input_tokens_seen": 4807424, + "step": 5010 + }, + { + "epoch": 0.42557705363204346, + "grad_norm": 51.12891387939453, + "learning_rate": 1.4211634416679855e-06, + "loss": 0.0602, + "num_input_tokens_seen": 4812224, + "step": 5015 + }, + { + "epoch": 0.4260013577732519, + "grad_norm": 28.7672061920166, + "learning_rate": 1.419819568252965e-06, + "loss": 0.1474, + "num_input_tokens_seen": 4817216, + "step": 5020 + }, + { + "epoch": 0.4264256619144603, + "grad_norm": 0.18512046337127686, + "learning_rate": 1.418474773792611e-06, + "loss": 0.0352, + "num_input_tokens_seen": 4822336, + "step": 5025 + }, + { + "epoch": 0.4268499660556687, + "grad_norm": 4.583760738372803, + "learning_rate": 1.4171290612372779e-06, + "loss": 0.0397, + "num_input_tokens_seen": 4827328, + "step": 5030 + }, + { + "epoch": 0.42727427019687714, + "grad_norm": 9.943023681640625, + "learning_rate": 1.4157824335393349e-06, + "loss": 0.0677, + "num_input_tokens_seen": 4831808, + "step": 5035 + }, + { + "epoch": 0.42769857433808556, + "grad_norm": 0.5764977335929871, + "learning_rate": 1.4144348936531588e-06, + "loss": 0.0246, + "num_input_tokens_seen": 4836096, + "step": 5040 + }, + { + "epoch": 0.428122878479294, + "grad_norm": 40.916561126708984, + "learning_rate": 1.413086444535127e-06, + "loss": 0.0595, + "num_input_tokens_seen": 4840448, + "step": 5045 + }, + { + "epoch": 0.4285471826205024, + "grad_norm": 0.2913835048675537, + "learning_rate": 1.4117370891436133e-06, + "loss": 0.0571, + "num_input_tokens_seen": 4845184, + "step": 5050 + }, + { + "epoch": 0.4289714867617108, + "grad_norm": 6.826591491699219, + "learning_rate": 1.410386830438978e-06, + "loss": 0.1268, + "num_input_tokens_seen": 4849920, + "step": 5055 + }, + { + "epoch": 0.42939579090291924, + "grad_norm": 28.76835823059082, + "learning_rate": 1.4090356713835635e-06, + "loss": 0.0976, + "num_input_tokens_seen": 4854400, + "step": 5060 + }, + { + "epoch": 0.4298200950441276, + "grad_norm": 4.77614688873291, + "learning_rate": 1.4076836149416886e-06, + "loss": 0.085, + "num_input_tokens_seen": 4859392, + "step": 5065 + }, + { + "epoch": 0.430244399185336, + "grad_norm": 1.4293068647384644, + "learning_rate": 1.4063306640796404e-06, + "loss": 0.1246, + "num_input_tokens_seen": 4864512, + "step": 5070 + }, + { + "epoch": 0.43066870332654444, + "grad_norm": 8.199060440063477, + "learning_rate": 1.4049768217656674e-06, + "loss": 0.0893, + "num_input_tokens_seen": 4869888, + "step": 5075 + }, + { + "epoch": 0.43109300746775286, + "grad_norm": 7.790666580200195, + "learning_rate": 1.4036220909699748e-06, + "loss": 0.0676, + "num_input_tokens_seen": 4874304, + "step": 5080 + }, + { + "epoch": 0.4315173116089613, + "grad_norm": 35.040306091308594, + "learning_rate": 1.4022664746647168e-06, + "loss": 0.0927, + "num_input_tokens_seen": 4879360, + "step": 5085 + }, + { + "epoch": 0.4319416157501697, + "grad_norm": 9.675673484802246, + "learning_rate": 1.40090997582399e-06, + "loss": 0.0828, + "num_input_tokens_seen": 4883520, + "step": 5090 + }, + { + "epoch": 0.4323659198913781, + "grad_norm": 3.159926652908325, + "learning_rate": 1.3995525974238278e-06, + "loss": 0.039, + "num_input_tokens_seen": 4888320, + "step": 5095 + }, + { + "epoch": 0.43279022403258655, + "grad_norm": 2.8504841327667236, + "learning_rate": 1.398194342442193e-06, + "loss": 0.0103, + "num_input_tokens_seen": 4893824, + "step": 5100 + }, + { + "epoch": 0.43321452817379497, + "grad_norm": 9.892780303955078, + "learning_rate": 1.396835213858971e-06, + "loss": 0.0693, + "num_input_tokens_seen": 4898432, + "step": 5105 + }, + { + "epoch": 0.4336388323150034, + "grad_norm": 25.05731964111328, + "learning_rate": 1.395475214655965e-06, + "loss": 0.106, + "num_input_tokens_seen": 4903040, + "step": 5110 + }, + { + "epoch": 0.4340631364562118, + "grad_norm": 11.220687866210938, + "learning_rate": 1.394114347816887e-06, + "loss": 0.0744, + "num_input_tokens_seen": 4908096, + "step": 5115 + }, + { + "epoch": 0.4344874405974202, + "grad_norm": 0.7513478994369507, + "learning_rate": 1.3927526163273538e-06, + "loss": 0.1002, + "num_input_tokens_seen": 4912640, + "step": 5120 + }, + { + "epoch": 0.43491174473862865, + "grad_norm": 17.629304885864258, + "learning_rate": 1.3913900231748776e-06, + "loss": 0.0305, + "num_input_tokens_seen": 4917504, + "step": 5125 + }, + { + "epoch": 0.43533604887983707, + "grad_norm": 0.5880158543586731, + "learning_rate": 1.3900265713488623e-06, + "loss": 0.0281, + "num_input_tokens_seen": 4922688, + "step": 5130 + }, + { + "epoch": 0.4357603530210455, + "grad_norm": 2.0686724185943604, + "learning_rate": 1.3886622638405952e-06, + "loss": 0.0589, + "num_input_tokens_seen": 4926976, + "step": 5135 + }, + { + "epoch": 0.4361846571622539, + "grad_norm": 21.587642669677734, + "learning_rate": 1.3872971036432406e-06, + "loss": 0.052, + "num_input_tokens_seen": 4931456, + "step": 5140 + }, + { + "epoch": 0.43660896130346233, + "grad_norm": 2.3363358974456787, + "learning_rate": 1.385931093751834e-06, + "loss": 0.0948, + "num_input_tokens_seen": 4936000, + "step": 5145 + }, + { + "epoch": 0.43703326544467075, + "grad_norm": 14.495381355285645, + "learning_rate": 1.384564237163275e-06, + "loss": 0.0645, + "num_input_tokens_seen": 4940288, + "step": 5150 + }, + { + "epoch": 0.43745756958587917, + "grad_norm": 9.281749725341797, + "learning_rate": 1.3831965368763203e-06, + "loss": 0.0545, + "num_input_tokens_seen": 4944576, + "step": 5155 + }, + { + "epoch": 0.4378818737270876, + "grad_norm": 14.984248161315918, + "learning_rate": 1.3818279958915785e-06, + "loss": 0.0682, + "num_input_tokens_seen": 4948992, + "step": 5160 + }, + { + "epoch": 0.438306177868296, + "grad_norm": 10.550505638122559, + "learning_rate": 1.3804586172115015e-06, + "loss": 0.0657, + "num_input_tokens_seen": 4953728, + "step": 5165 + }, + { + "epoch": 0.43873048200950443, + "grad_norm": 12.228930473327637, + "learning_rate": 1.3790884038403793e-06, + "loss": 0.055, + "num_input_tokens_seen": 4958720, + "step": 5170 + }, + { + "epoch": 0.43915478615071285, + "grad_norm": 16.7993221282959, + "learning_rate": 1.3777173587843341e-06, + "loss": 0.0761, + "num_input_tokens_seen": 4963840, + "step": 5175 + }, + { + "epoch": 0.43957909029192127, + "grad_norm": 19.838136672973633, + "learning_rate": 1.3763454850513122e-06, + "loss": 0.0099, + "num_input_tokens_seen": 4968512, + "step": 5180 + }, + { + "epoch": 0.4400033944331297, + "grad_norm": 16.498245239257812, + "learning_rate": 1.3749727856510766e-06, + "loss": 0.079, + "num_input_tokens_seen": 4972928, + "step": 5185 + }, + { + "epoch": 0.4404276985743381, + "grad_norm": 14.861262321472168, + "learning_rate": 1.373599263595204e-06, + "loss": 0.1034, + "num_input_tokens_seen": 4977664, + "step": 5190 + }, + { + "epoch": 0.4408520027155465, + "grad_norm": 1.0039514303207397, + "learning_rate": 1.3722249218970744e-06, + "loss": 0.1265, + "num_input_tokens_seen": 4982912, + "step": 5195 + }, + { + "epoch": 0.4412763068567549, + "grad_norm": 21.250131607055664, + "learning_rate": 1.3708497635718672e-06, + "loss": 0.0489, + "num_input_tokens_seen": 4988416, + "step": 5200 + }, + { + "epoch": 0.4417006109979633, + "grad_norm": 8.660135269165039, + "learning_rate": 1.3694737916365515e-06, + "loss": 0.0982, + "num_input_tokens_seen": 4993472, + "step": 5205 + }, + { + "epoch": 0.44212491513917174, + "grad_norm": 0.22283487021923065, + "learning_rate": 1.3680970091098832e-06, + "loss": 0.037, + "num_input_tokens_seen": 4998208, + "step": 5210 + }, + { + "epoch": 0.44254921928038016, + "grad_norm": 15.840060234069824, + "learning_rate": 1.366719419012396e-06, + "loss": 0.0563, + "num_input_tokens_seen": 5003008, + "step": 5215 + }, + { + "epoch": 0.4429735234215886, + "grad_norm": 14.518122673034668, + "learning_rate": 1.3653410243663951e-06, + "loss": 0.0438, + "num_input_tokens_seen": 5008704, + "step": 5220 + }, + { + "epoch": 0.443397827562797, + "grad_norm": 27.201988220214844, + "learning_rate": 1.363961828195951e-06, + "loss": 0.1245, + "num_input_tokens_seen": 5013120, + "step": 5225 + }, + { + "epoch": 0.4438221317040054, + "grad_norm": 0.40873754024505615, + "learning_rate": 1.3625818335268923e-06, + "loss": 0.0402, + "num_input_tokens_seen": 5017664, + "step": 5230 + }, + { + "epoch": 0.44424643584521384, + "grad_norm": 11.026297569274902, + "learning_rate": 1.3612010433868004e-06, + "loss": 0.058, + "num_input_tokens_seen": 5022528, + "step": 5235 + }, + { + "epoch": 0.44467073998642226, + "grad_norm": 6.463628768920898, + "learning_rate": 1.3598194608050008e-06, + "loss": 0.0781, + "num_input_tokens_seen": 5027072, + "step": 5240 + }, + { + "epoch": 0.4450950441276307, + "grad_norm": 0.26125529408454895, + "learning_rate": 1.3584370888125583e-06, + "loss": 0.0093, + "num_input_tokens_seen": 5031424, + "step": 5245 + }, + { + "epoch": 0.4455193482688391, + "grad_norm": 25.507278442382812, + "learning_rate": 1.357053930442269e-06, + "loss": 0.056, + "num_input_tokens_seen": 5036480, + "step": 5250 + }, + { + "epoch": 0.4459436524100475, + "grad_norm": 12.934303283691406, + "learning_rate": 1.355669988728655e-06, + "loss": 0.0619, + "num_input_tokens_seen": 5041792, + "step": 5255 + }, + { + "epoch": 0.44636795655125594, + "grad_norm": 0.4678743779659271, + "learning_rate": 1.3542852667079557e-06, + "loss": 0.0033, + "num_input_tokens_seen": 5046592, + "step": 5260 + }, + { + "epoch": 0.44679226069246436, + "grad_norm": 20.0618953704834, + "learning_rate": 1.352899767418124e-06, + "loss": 0.1114, + "num_input_tokens_seen": 5051456, + "step": 5265 + }, + { + "epoch": 0.4472165648336728, + "grad_norm": 21.787246704101562, + "learning_rate": 1.3515134938988168e-06, + "loss": 0.0761, + "num_input_tokens_seen": 5056320, + "step": 5270 + }, + { + "epoch": 0.4476408689748812, + "grad_norm": 0.42069345712661743, + "learning_rate": 1.3501264491913906e-06, + "loss": 0.0149, + "num_input_tokens_seen": 5061248, + "step": 5275 + }, + { + "epoch": 0.4480651731160896, + "grad_norm": 16.042268753051758, + "learning_rate": 1.348738636338893e-06, + "loss": 0.0755, + "num_input_tokens_seen": 5065664, + "step": 5280 + }, + { + "epoch": 0.44848947725729804, + "grad_norm": 11.962218284606934, + "learning_rate": 1.3473500583860568e-06, + "loss": 0.0789, + "num_input_tokens_seen": 5071552, + "step": 5285 + }, + { + "epoch": 0.44891378139850646, + "grad_norm": 0.37122446298599243, + "learning_rate": 1.3459607183792945e-06, + "loss": 0.0564, + "num_input_tokens_seen": 5076032, + "step": 5290 + }, + { + "epoch": 0.4493380855397149, + "grad_norm": 15.164850234985352, + "learning_rate": 1.344570619366689e-06, + "loss": 0.0543, + "num_input_tokens_seen": 5080384, + "step": 5295 + }, + { + "epoch": 0.4497623896809233, + "grad_norm": 17.177448272705078, + "learning_rate": 1.3431797643979894e-06, + "loss": 0.0623, + "num_input_tokens_seen": 5085376, + "step": 5300 + }, + { + "epoch": 0.4501866938221317, + "grad_norm": 7.689847469329834, + "learning_rate": 1.3417881565246027e-06, + "loss": 0.0338, + "num_input_tokens_seen": 5090112, + "step": 5305 + }, + { + "epoch": 0.45061099796334014, + "grad_norm": 56.23309326171875, + "learning_rate": 1.3403957987995882e-06, + "loss": 0.05, + "num_input_tokens_seen": 5095424, + "step": 5310 + }, + { + "epoch": 0.45061099796334014, + "eval_loss": 0.06341014802455902, + "eval_runtime": 15.8212, + "eval_samples_per_second": 662.086, + "eval_steps_per_second": 82.8, + "num_input_tokens_seen": 5095424, + "step": 5310 + }, + { + "epoch": 0.45103530210454856, + "grad_norm": 2.7271251678466797, + "learning_rate": 1.33900269427765e-06, + "loss": 0.0584, + "num_input_tokens_seen": 5100864, + "step": 5315 + }, + { + "epoch": 0.451459606245757, + "grad_norm": 9.763409614562988, + "learning_rate": 1.3376088460151306e-06, + "loss": 0.0825, + "num_input_tokens_seen": 5105088, + "step": 5320 + }, + { + "epoch": 0.4518839103869654, + "grad_norm": 26.676908493041992, + "learning_rate": 1.336214257070004e-06, + "loss": 0.044, + "num_input_tokens_seen": 5109760, + "step": 5325 + }, + { + "epoch": 0.45230821452817377, + "grad_norm": 25.818172454833984, + "learning_rate": 1.3348189305018702e-06, + "loss": 0.0885, + "num_input_tokens_seen": 5114176, + "step": 5330 + }, + { + "epoch": 0.4527325186693822, + "grad_norm": 9.313101768493652, + "learning_rate": 1.3334228693719464e-06, + "loss": 0.0254, + "num_input_tokens_seen": 5118592, + "step": 5335 + }, + { + "epoch": 0.4531568228105906, + "grad_norm": 0.3694465160369873, + "learning_rate": 1.3320260767430614e-06, + "loss": 0.1096, + "num_input_tokens_seen": 5123584, + "step": 5340 + }, + { + "epoch": 0.45358112695179903, + "grad_norm": 14.618754386901855, + "learning_rate": 1.3306285556796492e-06, + "loss": 0.0228, + "num_input_tokens_seen": 5128192, + "step": 5345 + }, + { + "epoch": 0.45400543109300745, + "grad_norm": 0.516826868057251, + "learning_rate": 1.3292303092477424e-06, + "loss": 0.0764, + "num_input_tokens_seen": 5132864, + "step": 5350 + }, + { + "epoch": 0.45442973523421587, + "grad_norm": 1.1318422555923462, + "learning_rate": 1.3278313405149638e-06, + "loss": 0.0411, + "num_input_tokens_seen": 5137216, + "step": 5355 + }, + { + "epoch": 0.4548540393754243, + "grad_norm": 8.509772300720215, + "learning_rate": 1.3264316525505216e-06, + "loss": 0.0462, + "num_input_tokens_seen": 5142528, + "step": 5360 + }, + { + "epoch": 0.4552783435166327, + "grad_norm": 15.627358436584473, + "learning_rate": 1.3250312484252021e-06, + "loss": 0.0102, + "num_input_tokens_seen": 5147968, + "step": 5365 + }, + { + "epoch": 0.45570264765784113, + "grad_norm": 0.08779313415288925, + "learning_rate": 1.3236301312113627e-06, + "loss": 0.0413, + "num_input_tokens_seen": 5152384, + "step": 5370 + }, + { + "epoch": 0.45612695179904955, + "grad_norm": 36.04643630981445, + "learning_rate": 1.3222283039829247e-06, + "loss": 0.1069, + "num_input_tokens_seen": 5156992, + "step": 5375 + }, + { + "epoch": 0.45655125594025797, + "grad_norm": 0.39356672763824463, + "learning_rate": 1.3208257698153676e-06, + "loss": 0.0965, + "num_input_tokens_seen": 5161984, + "step": 5380 + }, + { + "epoch": 0.4569755600814664, + "grad_norm": 16.60633659362793, + "learning_rate": 1.3194225317857216e-06, + "loss": 0.0604, + "num_input_tokens_seen": 5167040, + "step": 5385 + }, + { + "epoch": 0.4573998642226748, + "grad_norm": 25.81131362915039, + "learning_rate": 1.3180185929725616e-06, + "loss": 0.0475, + "num_input_tokens_seen": 5171776, + "step": 5390 + }, + { + "epoch": 0.45782416836388323, + "grad_norm": 14.490070343017578, + "learning_rate": 1.3166139564559992e-06, + "loss": 0.117, + "num_input_tokens_seen": 5176896, + "step": 5395 + }, + { + "epoch": 0.45824847250509165, + "grad_norm": 17.152679443359375, + "learning_rate": 1.3152086253176773e-06, + "loss": 0.0416, + "num_input_tokens_seen": 5181312, + "step": 5400 + }, + { + "epoch": 0.4586727766463001, + "grad_norm": 6.349189758300781, + "learning_rate": 1.313802602640763e-06, + "loss": 0.008, + "num_input_tokens_seen": 5186112, + "step": 5405 + }, + { + "epoch": 0.4590970807875085, + "grad_norm": 5.189371109008789, + "learning_rate": 1.3123958915099392e-06, + "loss": 0.0387, + "num_input_tokens_seen": 5191040, + "step": 5410 + }, + { + "epoch": 0.4595213849287169, + "grad_norm": 0.14324896037578583, + "learning_rate": 1.3109884950114005e-06, + "loss": 0.0441, + "num_input_tokens_seen": 5195584, + "step": 5415 + }, + { + "epoch": 0.45994568906992533, + "grad_norm": 22.456459045410156, + "learning_rate": 1.309580416232845e-06, + "loss": 0.0799, + "num_input_tokens_seen": 5200256, + "step": 5420 + }, + { + "epoch": 0.46036999321113375, + "grad_norm": 14.055853843688965, + "learning_rate": 1.3081716582634672e-06, + "loss": 0.0576, + "num_input_tokens_seen": 5205376, + "step": 5425 + }, + { + "epoch": 0.4607942973523422, + "grad_norm": 14.267642974853516, + "learning_rate": 1.3067622241939518e-06, + "loss": 0.0254, + "num_input_tokens_seen": 5211392, + "step": 5430 + }, + { + "epoch": 0.4612186014935506, + "grad_norm": 10.525510787963867, + "learning_rate": 1.305352117116467e-06, + "loss": 0.0247, + "num_input_tokens_seen": 5215616, + "step": 5435 + }, + { + "epoch": 0.461642905634759, + "grad_norm": 7.825259685516357, + "learning_rate": 1.3039413401246576e-06, + "loss": 0.0862, + "num_input_tokens_seen": 5220608, + "step": 5440 + }, + { + "epoch": 0.46206720977596744, + "grad_norm": 6.458859443664551, + "learning_rate": 1.3025298963136377e-06, + "loss": 0.1111, + "num_input_tokens_seen": 5225344, + "step": 5445 + }, + { + "epoch": 0.46249151391717586, + "grad_norm": 8.985774040222168, + "learning_rate": 1.3011177887799844e-06, + "loss": 0.0417, + "num_input_tokens_seen": 5230464, + "step": 5450 + }, + { + "epoch": 0.4629158180583843, + "grad_norm": 6.6957597732543945, + "learning_rate": 1.2997050206217315e-06, + "loss": 0.0548, + "num_input_tokens_seen": 5235264, + "step": 5455 + }, + { + "epoch": 0.46334012219959264, + "grad_norm": 7.47517728805542, + "learning_rate": 1.2982915949383614e-06, + "loss": 0.0883, + "num_input_tokens_seen": 5239808, + "step": 5460 + }, + { + "epoch": 0.46376442634080106, + "grad_norm": 3.9635698795318604, + "learning_rate": 1.2968775148308002e-06, + "loss": 0.0629, + "num_input_tokens_seen": 5244416, + "step": 5465 + }, + { + "epoch": 0.4641887304820095, + "grad_norm": 15.994041442871094, + "learning_rate": 1.295462783401408e-06, + "loss": 0.0882, + "num_input_tokens_seen": 5249280, + "step": 5470 + }, + { + "epoch": 0.4646130346232179, + "grad_norm": 10.046189308166504, + "learning_rate": 1.2940474037539755e-06, + "loss": 0.1072, + "num_input_tokens_seen": 5254080, + "step": 5475 + }, + { + "epoch": 0.4650373387644263, + "grad_norm": 1.4697424173355103, + "learning_rate": 1.2926313789937143e-06, + "loss": 0.0185, + "num_input_tokens_seen": 5259136, + "step": 5480 + }, + { + "epoch": 0.46546164290563474, + "grad_norm": 18.949148178100586, + "learning_rate": 1.2912147122272522e-06, + "loss": 0.0443, + "num_input_tokens_seen": 5263744, + "step": 5485 + }, + { + "epoch": 0.46588594704684316, + "grad_norm": 1.0807805061340332, + "learning_rate": 1.289797406562625e-06, + "loss": 0.0569, + "num_input_tokens_seen": 5268544, + "step": 5490 + }, + { + "epoch": 0.4663102511880516, + "grad_norm": 17.27862548828125, + "learning_rate": 1.2883794651092704e-06, + "loss": 0.0288, + "num_input_tokens_seen": 5273280, + "step": 5495 + }, + { + "epoch": 0.46673455532926, + "grad_norm": 7.288158416748047, + "learning_rate": 1.2869608909780212e-06, + "loss": 0.0231, + "num_input_tokens_seen": 5277888, + "step": 5500 + }, + { + "epoch": 0.4671588594704684, + "grad_norm": 14.408581733703613, + "learning_rate": 1.2855416872810973e-06, + "loss": 0.0518, + "num_input_tokens_seen": 5282432, + "step": 5505 + }, + { + "epoch": 0.46758316361167684, + "grad_norm": 5.324777126312256, + "learning_rate": 1.284121857132101e-06, + "loss": 0.0088, + "num_input_tokens_seen": 5288512, + "step": 5510 + }, + { + "epoch": 0.46800746775288526, + "grad_norm": 9.321609497070312, + "learning_rate": 1.2827014036460082e-06, + "loss": 0.0204, + "num_input_tokens_seen": 5292800, + "step": 5515 + }, + { + "epoch": 0.4684317718940937, + "grad_norm": 9.393115043640137, + "learning_rate": 1.2812803299391628e-06, + "loss": 0.0274, + "num_input_tokens_seen": 5297856, + "step": 5520 + }, + { + "epoch": 0.4688560760353021, + "grad_norm": 6.718729019165039, + "learning_rate": 1.2798586391292689e-06, + "loss": 0.0859, + "num_input_tokens_seen": 5302784, + "step": 5525 + }, + { + "epoch": 0.4692803801765105, + "grad_norm": 43.81071472167969, + "learning_rate": 1.2784363343353848e-06, + "loss": 0.0555, + "num_input_tokens_seen": 5307648, + "step": 5530 + }, + { + "epoch": 0.46970468431771895, + "grad_norm": 6.245013236999512, + "learning_rate": 1.2770134186779158e-06, + "loss": 0.0496, + "num_input_tokens_seen": 5311680, + "step": 5535 + }, + { + "epoch": 0.47012898845892737, + "grad_norm": 7.27108907699585, + "learning_rate": 1.2755898952786076e-06, + "loss": 0.053, + "num_input_tokens_seen": 5316288, + "step": 5540 + }, + { + "epoch": 0.4705532926001358, + "grad_norm": 17.36294937133789, + "learning_rate": 1.2741657672605385e-06, + "loss": 0.0291, + "num_input_tokens_seen": 5320448, + "step": 5545 + }, + { + "epoch": 0.4709775967413442, + "grad_norm": 1.1560553312301636, + "learning_rate": 1.272741037748114e-06, + "loss": 0.0483, + "num_input_tokens_seen": 5324928, + "step": 5550 + }, + { + "epoch": 0.4714019008825526, + "grad_norm": 24.80785369873047, + "learning_rate": 1.2713157098670588e-06, + "loss": 0.0429, + "num_input_tokens_seen": 5329792, + "step": 5555 + }, + { + "epoch": 0.47182620502376105, + "grad_norm": 13.043895721435547, + "learning_rate": 1.2698897867444112e-06, + "loss": 0.0697, + "num_input_tokens_seen": 5334720, + "step": 5560 + }, + { + "epoch": 0.47225050916496947, + "grad_norm": 17.10675621032715, + "learning_rate": 1.268463271508514e-06, + "loss": 0.0769, + "num_input_tokens_seen": 5339968, + "step": 5565 + }, + { + "epoch": 0.4726748133061779, + "grad_norm": 0.16105744242668152, + "learning_rate": 1.2670361672890099e-06, + "loss": 0.0083, + "num_input_tokens_seen": 5345216, + "step": 5570 + }, + { + "epoch": 0.4730991174473863, + "grad_norm": 15.116951942443848, + "learning_rate": 1.265608477216834e-06, + "loss": 0.116, + "num_input_tokens_seen": 5350400, + "step": 5575 + }, + { + "epoch": 0.47352342158859473, + "grad_norm": 4.960959434509277, + "learning_rate": 1.2641802044242065e-06, + "loss": 0.0868, + "num_input_tokens_seen": 5354944, + "step": 5580 + }, + { + "epoch": 0.47394772572980315, + "grad_norm": 0.19331686198711395, + "learning_rate": 1.2627513520446252e-06, + "loss": 0.1187, + "num_input_tokens_seen": 5359040, + "step": 5585 + }, + { + "epoch": 0.4743720298710115, + "grad_norm": 4.621866226196289, + "learning_rate": 1.2613219232128608e-06, + "loss": 0.0866, + "num_input_tokens_seen": 5363584, + "step": 5590 + }, + { + "epoch": 0.47479633401221993, + "grad_norm": 12.511336326599121, + "learning_rate": 1.2598919210649475e-06, + "loss": 0.0184, + "num_input_tokens_seen": 5368256, + "step": 5595 + }, + { + "epoch": 0.47522063815342835, + "grad_norm": 0.6315953731536865, + "learning_rate": 1.2584613487381787e-06, + "loss": 0.0563, + "num_input_tokens_seen": 5372800, + "step": 5600 + }, + { + "epoch": 0.4756449422946368, + "grad_norm": 6.170360088348389, + "learning_rate": 1.257030209371097e-06, + "loss": 0.0413, + "num_input_tokens_seen": 5377280, + "step": 5605 + }, + { + "epoch": 0.4760692464358452, + "grad_norm": 6.3370466232299805, + "learning_rate": 1.2555985061034902e-06, + "loss": 0.1219, + "num_input_tokens_seen": 5382208, + "step": 5610 + }, + { + "epoch": 0.4764935505770536, + "grad_norm": 11.882719993591309, + "learning_rate": 1.2541662420763832e-06, + "loss": 0.0935, + "num_input_tokens_seen": 5386816, + "step": 5615 + }, + { + "epoch": 0.47691785471826204, + "grad_norm": 11.76760482788086, + "learning_rate": 1.2527334204320306e-06, + "loss": 0.0457, + "num_input_tokens_seen": 5391360, + "step": 5620 + }, + { + "epoch": 0.47734215885947046, + "grad_norm": 14.905533790588379, + "learning_rate": 1.251300044313911e-06, + "loss": 0.0584, + "num_input_tokens_seen": 5395904, + "step": 5625 + }, + { + "epoch": 0.4777664630006789, + "grad_norm": 0.44021788239479065, + "learning_rate": 1.2498661168667188e-06, + "loss": 0.0555, + "num_input_tokens_seen": 5400448, + "step": 5630 + }, + { + "epoch": 0.4781907671418873, + "grad_norm": 9.252647399902344, + "learning_rate": 1.2484316412363585e-06, + "loss": 0.0431, + "num_input_tokens_seen": 5405824, + "step": 5635 + }, + { + "epoch": 0.4786150712830957, + "grad_norm": 0.9856930375099182, + "learning_rate": 1.246996620569937e-06, + "loss": 0.0407, + "num_input_tokens_seen": 5410688, + "step": 5640 + }, + { + "epoch": 0.47903937542430414, + "grad_norm": 6.884036064147949, + "learning_rate": 1.245561058015757e-06, + "loss": 0.0646, + "num_input_tokens_seen": 5415296, + "step": 5645 + }, + { + "epoch": 0.47946367956551256, + "grad_norm": 7.797816276550293, + "learning_rate": 1.2441249567233098e-06, + "loss": 0.1001, + "num_input_tokens_seen": 5419648, + "step": 5650 + }, + { + "epoch": 0.479887983706721, + "grad_norm": 5.869114875793457, + "learning_rate": 1.2426883198432696e-06, + "loss": 0.0398, + "num_input_tokens_seen": 5424576, + "step": 5655 + }, + { + "epoch": 0.4803122878479294, + "grad_norm": 4.114724636077881, + "learning_rate": 1.2412511505274844e-06, + "loss": 0.033, + "num_input_tokens_seen": 5429184, + "step": 5660 + }, + { + "epoch": 0.4807365919891378, + "grad_norm": 1.3931119441986084, + "learning_rate": 1.2398134519289708e-06, + "loss": 0.0589, + "num_input_tokens_seen": 5433536, + "step": 5665 + }, + { + "epoch": 0.48116089613034624, + "grad_norm": 11.368043899536133, + "learning_rate": 1.2383752272019071e-06, + "loss": 0.1094, + "num_input_tokens_seen": 5438464, + "step": 5670 + }, + { + "epoch": 0.48158520027155466, + "grad_norm": 16.947954177856445, + "learning_rate": 1.2369364795016252e-06, + "loss": 0.0679, + "num_input_tokens_seen": 5443136, + "step": 5675 + }, + { + "epoch": 0.4820095044127631, + "grad_norm": 2.892996072769165, + "learning_rate": 1.2354972119846045e-06, + "loss": 0.0332, + "num_input_tokens_seen": 5447744, + "step": 5680 + }, + { + "epoch": 0.4824338085539715, + "grad_norm": 1.6063815355300903, + "learning_rate": 1.2340574278084648e-06, + "loss": 0.0926, + "num_input_tokens_seen": 5452800, + "step": 5685 + }, + { + "epoch": 0.4828581126951799, + "grad_norm": 8.03439998626709, + "learning_rate": 1.23261713013196e-06, + "loss": 0.0565, + "num_input_tokens_seen": 5457472, + "step": 5690 + }, + { + "epoch": 0.48328241683638834, + "grad_norm": 10.29983901977539, + "learning_rate": 1.2311763221149697e-06, + "loss": 0.0754, + "num_input_tokens_seen": 5462272, + "step": 5695 + }, + { + "epoch": 0.48370672097759676, + "grad_norm": 14.466094017028809, + "learning_rate": 1.2297350069184935e-06, + "loss": 0.0176, + "num_input_tokens_seen": 5466880, + "step": 5700 + }, + { + "epoch": 0.4841310251188052, + "grad_norm": 7.963089942932129, + "learning_rate": 1.228293187704644e-06, + "loss": 0.0447, + "num_input_tokens_seen": 5471616, + "step": 5705 + }, + { + "epoch": 0.4845553292600136, + "grad_norm": 6.54095458984375, + "learning_rate": 1.2268508676366393e-06, + "loss": 0.0748, + "num_input_tokens_seen": 5476160, + "step": 5710 + }, + { + "epoch": 0.484979633401222, + "grad_norm": 19.244571685791016, + "learning_rate": 1.225408049878796e-06, + "loss": 0.1343, + "num_input_tokens_seen": 5480960, + "step": 5715 + }, + { + "epoch": 0.48540393754243044, + "grad_norm": 13.259824752807617, + "learning_rate": 1.223964737596523e-06, + "loss": 0.062, + "num_input_tokens_seen": 5486528, + "step": 5720 + }, + { + "epoch": 0.4858282416836388, + "grad_norm": 9.365538597106934, + "learning_rate": 1.2225209339563143e-06, + "loss": 0.0411, + "num_input_tokens_seen": 5491456, + "step": 5725 + }, + { + "epoch": 0.4862525458248472, + "grad_norm": 1.487982153892517, + "learning_rate": 1.2210766421257419e-06, + "loss": 0.0602, + "num_input_tokens_seen": 5496640, + "step": 5730 + }, + { + "epoch": 0.48667684996605565, + "grad_norm": 0.29796916246414185, + "learning_rate": 1.2196318652734477e-06, + "loss": 0.0293, + "num_input_tokens_seen": 5501376, + "step": 5735 + }, + { + "epoch": 0.48710115410726407, + "grad_norm": 11.434643745422363, + "learning_rate": 1.2181866065691392e-06, + "loss": 0.0521, + "num_input_tokens_seen": 5505856, + "step": 5740 + }, + { + "epoch": 0.4875254582484725, + "grad_norm": 14.557947158813477, + "learning_rate": 1.2167408691835807e-06, + "loss": 0.0564, + "num_input_tokens_seen": 5510720, + "step": 5745 + }, + { + "epoch": 0.4879497623896809, + "grad_norm": 2.0139198303222656, + "learning_rate": 1.2152946562885857e-06, + "loss": 0.0466, + "num_input_tokens_seen": 5514880, + "step": 5750 + }, + { + "epoch": 0.48837406653088933, + "grad_norm": 0.48408669233322144, + "learning_rate": 1.2138479710570123e-06, + "loss": 0.0491, + "num_input_tokens_seen": 5519616, + "step": 5755 + }, + { + "epoch": 0.48879837067209775, + "grad_norm": 17.641517639160156, + "learning_rate": 1.2124008166627535e-06, + "loss": 0.0656, + "num_input_tokens_seen": 5523968, + "step": 5760 + }, + { + "epoch": 0.48922267481330617, + "grad_norm": 0.3832457661628723, + "learning_rate": 1.2109531962807332e-06, + "loss": 0.0673, + "num_input_tokens_seen": 5528960, + "step": 5765 + }, + { + "epoch": 0.4896469789545146, + "grad_norm": 0.15046310424804688, + "learning_rate": 1.2095051130868959e-06, + "loss": 0.0817, + "num_input_tokens_seen": 5535488, + "step": 5770 + }, + { + "epoch": 0.490071283095723, + "grad_norm": 1.9801909923553467, + "learning_rate": 1.2080565702582027e-06, + "loss": 0.1018, + "num_input_tokens_seen": 5540288, + "step": 5775 + }, + { + "epoch": 0.49049558723693143, + "grad_norm": 39.119529724121094, + "learning_rate": 1.2066075709726225e-06, + "loss": 0.0485, + "num_input_tokens_seen": 5545792, + "step": 5780 + }, + { + "epoch": 0.49091989137813985, + "grad_norm": 13.806150436401367, + "learning_rate": 1.2051581184091263e-06, + "loss": 0.0674, + "num_input_tokens_seen": 5550336, + "step": 5785 + }, + { + "epoch": 0.49134419551934827, + "grad_norm": 10.708805084228516, + "learning_rate": 1.2037082157476782e-06, + "loss": 0.0727, + "num_input_tokens_seen": 5555328, + "step": 5790 + }, + { + "epoch": 0.4917684996605567, + "grad_norm": 30.937210083007812, + "learning_rate": 1.2022578661692312e-06, + "loss": 0.0649, + "num_input_tokens_seen": 5560896, + "step": 5795 + }, + { + "epoch": 0.4921928038017651, + "grad_norm": 12.959268569946289, + "learning_rate": 1.2008070728557185e-06, + "loss": 0.0633, + "num_input_tokens_seen": 5565824, + "step": 5800 + }, + { + "epoch": 0.49261710794297353, + "grad_norm": 4.291776180267334, + "learning_rate": 1.1993558389900462e-06, + "loss": 0.0425, + "num_input_tokens_seen": 5570368, + "step": 5805 + }, + { + "epoch": 0.49304141208418195, + "grad_norm": 13.795291900634766, + "learning_rate": 1.197904167756087e-06, + "loss": 0.0633, + "num_input_tokens_seen": 5574848, + "step": 5810 + }, + { + "epoch": 0.49346571622539037, + "grad_norm": 2.7180373668670654, + "learning_rate": 1.1964520623386741e-06, + "loss": 0.0689, + "num_input_tokens_seen": 5579456, + "step": 5815 + }, + { + "epoch": 0.4938900203665988, + "grad_norm": 1.1406170129776, + "learning_rate": 1.1949995259235919e-06, + "loss": 0.0423, + "num_input_tokens_seen": 5584384, + "step": 5820 + }, + { + "epoch": 0.4943143245078072, + "grad_norm": 28.64975357055664, + "learning_rate": 1.1935465616975716e-06, + "loss": 0.123, + "num_input_tokens_seen": 5589632, + "step": 5825 + }, + { + "epoch": 0.49473862864901563, + "grad_norm": 23.525083541870117, + "learning_rate": 1.192093172848282e-06, + "loss": 0.0281, + "num_input_tokens_seen": 5594048, + "step": 5830 + }, + { + "epoch": 0.49516293279022405, + "grad_norm": 27.362192153930664, + "learning_rate": 1.1906393625643242e-06, + "loss": 0.0515, + "num_input_tokens_seen": 5598720, + "step": 5835 + }, + { + "epoch": 0.4955872369314325, + "grad_norm": 10.059873580932617, + "learning_rate": 1.1891851340352235e-06, + "loss": 0.1113, + "num_input_tokens_seen": 5603136, + "step": 5840 + }, + { + "epoch": 0.4960115410726409, + "grad_norm": 0.4605356454849243, + "learning_rate": 1.1877304904514232e-06, + "loss": 0.0388, + "num_input_tokens_seen": 5607872, + "step": 5845 + }, + { + "epoch": 0.4964358452138493, + "grad_norm": 2.450542688369751, + "learning_rate": 1.1862754350042764e-06, + "loss": 0.039, + "num_input_tokens_seen": 5612352, + "step": 5850 + }, + { + "epoch": 0.4968601493550577, + "grad_norm": 0.1743314415216446, + "learning_rate": 1.1848199708860404e-06, + "loss": 0.077, + "num_input_tokens_seen": 5617472, + "step": 5855 + }, + { + "epoch": 0.4972844534962661, + "grad_norm": 10.397873878479004, + "learning_rate": 1.183364101289869e-06, + "loss": 0.0934, + "num_input_tokens_seen": 5621824, + "step": 5860 + }, + { + "epoch": 0.4977087576374745, + "grad_norm": 0.9554150104522705, + "learning_rate": 1.1819078294098057e-06, + "loss": 0.0631, + "num_input_tokens_seen": 5626304, + "step": 5865 + }, + { + "epoch": 0.49813306177868294, + "grad_norm": 18.06666374206543, + "learning_rate": 1.180451158440776e-06, + "loss": 0.0413, + "num_input_tokens_seen": 5631680, + "step": 5870 + }, + { + "epoch": 0.49855736591989136, + "grad_norm": 7.613068580627441, + "learning_rate": 1.1789940915785823e-06, + "loss": 0.115, + "num_input_tokens_seen": 5635904, + "step": 5875 + }, + { + "epoch": 0.4989816700610998, + "grad_norm": 0.9774286150932312, + "learning_rate": 1.177536632019894e-06, + "loss": 0.0422, + "num_input_tokens_seen": 5640512, + "step": 5880 + }, + { + "epoch": 0.4994059742023082, + "grad_norm": 1.4857728481292725, + "learning_rate": 1.1760787829622423e-06, + "loss": 0.0691, + "num_input_tokens_seen": 5646464, + "step": 5885 + }, + { + "epoch": 0.4998302783435166, + "grad_norm": 0.16748501360416412, + "learning_rate": 1.1746205476040137e-06, + "loss": 0.0249, + "num_input_tokens_seen": 5651008, + "step": 5890 + }, + { + "epoch": 0.5002545824847251, + "grad_norm": 17.213905334472656, + "learning_rate": 1.173161929144442e-06, + "loss": 0.0536, + "num_input_tokens_seen": 5655616, + "step": 5895 + }, + { + "epoch": 0.5006788866259335, + "grad_norm": 0.5089601278305054, + "learning_rate": 1.171702930783601e-06, + "loss": 0.0293, + "num_input_tokens_seen": 5660352, + "step": 5900 + }, + { + "epoch": 0.5006788866259335, + "eval_loss": 0.05498848110437393, + "eval_runtime": 15.8725, + "eval_samples_per_second": 659.946, + "eval_steps_per_second": 82.533, + "num_input_tokens_seen": 5660352, + "step": 5900 + }, + { + "epoch": 0.5011031907671419, + "grad_norm": 0.7019211649894714, + "learning_rate": 1.1702435557223986e-06, + "loss": 0.0627, + "num_input_tokens_seen": 5664832, + "step": 5905 + }, + { + "epoch": 0.5015274949083504, + "grad_norm": 1.023059368133545, + "learning_rate": 1.1687838071625684e-06, + "loss": 0.0832, + "num_input_tokens_seen": 5669824, + "step": 5910 + }, + { + "epoch": 0.5019517990495588, + "grad_norm": 0.6381323337554932, + "learning_rate": 1.167323688306664e-06, + "loss": 0.0744, + "num_input_tokens_seen": 5674240, + "step": 5915 + }, + { + "epoch": 0.5023761031907671, + "grad_norm": 12.75153636932373, + "learning_rate": 1.1658632023580515e-06, + "loss": 0.0557, + "num_input_tokens_seen": 5679296, + "step": 5920 + }, + { + "epoch": 0.5028004073319755, + "grad_norm": 25.601726531982422, + "learning_rate": 1.1644023525209014e-06, + "loss": 0.0411, + "num_input_tokens_seen": 5683840, + "step": 5925 + }, + { + "epoch": 0.5032247114731839, + "grad_norm": 15.45758056640625, + "learning_rate": 1.162941142000184e-06, + "loss": 0.0754, + "num_input_tokens_seen": 5688896, + "step": 5930 + }, + { + "epoch": 0.5036490156143923, + "grad_norm": 2.823531150817871, + "learning_rate": 1.1614795740016598e-06, + "loss": 0.0428, + "num_input_tokens_seen": 5693440, + "step": 5935 + }, + { + "epoch": 0.5040733197556008, + "grad_norm": 0.22975222766399384, + "learning_rate": 1.160017651731874e-06, + "loss": 0.073, + "num_input_tokens_seen": 5697920, + "step": 5940 + }, + { + "epoch": 0.5044976238968092, + "grad_norm": 0.3254455327987671, + "learning_rate": 1.1585553783981486e-06, + "loss": 0.0417, + "num_input_tokens_seen": 5702528, + "step": 5945 + }, + { + "epoch": 0.5049219280380176, + "grad_norm": 18.13775062561035, + "learning_rate": 1.1570927572085766e-06, + "loss": 0.0924, + "num_input_tokens_seen": 5707584, + "step": 5950 + }, + { + "epoch": 0.505346232179226, + "grad_norm": 9.583961486816406, + "learning_rate": 1.1556297913720137e-06, + "loss": 0.046, + "num_input_tokens_seen": 5712192, + "step": 5955 + }, + { + "epoch": 0.5057705363204344, + "grad_norm": 32.913612365722656, + "learning_rate": 1.1541664840980715e-06, + "loss": 0.1587, + "num_input_tokens_seen": 5717632, + "step": 5960 + }, + { + "epoch": 0.5061948404616429, + "grad_norm": 4.711894989013672, + "learning_rate": 1.1527028385971107e-06, + "loss": 0.0854, + "num_input_tokens_seen": 5722176, + "step": 5965 + }, + { + "epoch": 0.5066191446028513, + "grad_norm": 12.714658737182617, + "learning_rate": 1.1512388580802348e-06, + "loss": 0.0459, + "num_input_tokens_seen": 5726720, + "step": 5970 + }, + { + "epoch": 0.5070434487440597, + "grad_norm": 10.083311080932617, + "learning_rate": 1.1497745457592815e-06, + "loss": 0.0672, + "num_input_tokens_seen": 5731328, + "step": 5975 + }, + { + "epoch": 0.5074677528852681, + "grad_norm": 1.8064274787902832, + "learning_rate": 1.1483099048468168e-06, + "loss": 0.0614, + "num_input_tokens_seen": 5736256, + "step": 5980 + }, + { + "epoch": 0.5078920570264766, + "grad_norm": 12.606705665588379, + "learning_rate": 1.1468449385561272e-06, + "loss": 0.0764, + "num_input_tokens_seen": 5741248, + "step": 5985 + }, + { + "epoch": 0.508316361167685, + "grad_norm": 5.615732669830322, + "learning_rate": 1.145379650101214e-06, + "loss": 0.0344, + "num_input_tokens_seen": 5746304, + "step": 5990 + }, + { + "epoch": 0.5087406653088934, + "grad_norm": 0.6430853605270386, + "learning_rate": 1.143914042696784e-06, + "loss": 0.097, + "num_input_tokens_seen": 5751552, + "step": 5995 + }, + { + "epoch": 0.5091649694501018, + "grad_norm": 2.67842173576355, + "learning_rate": 1.1424481195582445e-06, + "loss": 0.0568, + "num_input_tokens_seen": 5756032, + "step": 6000 + }, + { + "epoch": 0.5095892735913102, + "grad_norm": 17.653491973876953, + "learning_rate": 1.1409818839016958e-06, + "loss": 0.0818, + "num_input_tokens_seen": 5761600, + "step": 6005 + }, + { + "epoch": 0.5100135777325187, + "grad_norm": 1.1732429265975952, + "learning_rate": 1.1395153389439231e-06, + "loss": 0.0286, + "num_input_tokens_seen": 5766336, + "step": 6010 + }, + { + "epoch": 0.5104378818737271, + "grad_norm": 17.17027473449707, + "learning_rate": 1.1380484879023903e-06, + "loss": 0.0889, + "num_input_tokens_seen": 5771392, + "step": 6015 + }, + { + "epoch": 0.5108621860149355, + "grad_norm": 25.292057037353516, + "learning_rate": 1.1365813339952334e-06, + "loss": 0.0375, + "num_input_tokens_seen": 5775808, + "step": 6020 + }, + { + "epoch": 0.5112864901561439, + "grad_norm": 1.390062928199768, + "learning_rate": 1.1351138804412524e-06, + "loss": 0.0905, + "num_input_tokens_seen": 5780800, + "step": 6025 + }, + { + "epoch": 0.5117107942973523, + "grad_norm": 26.388877868652344, + "learning_rate": 1.1336461304599047e-06, + "loss": 0.031, + "num_input_tokens_seen": 5786304, + "step": 6030 + }, + { + "epoch": 0.5121350984385608, + "grad_norm": 0.2944334149360657, + "learning_rate": 1.1321780872712983e-06, + "loss": 0.0295, + "num_input_tokens_seen": 5791360, + "step": 6035 + }, + { + "epoch": 0.5125594025797692, + "grad_norm": 1.198797583580017, + "learning_rate": 1.1307097540961838e-06, + "loss": 0.019, + "num_input_tokens_seen": 5795840, + "step": 6040 + }, + { + "epoch": 0.5129837067209776, + "grad_norm": 16.838085174560547, + "learning_rate": 1.129241134155949e-06, + "loss": 0.0386, + "num_input_tokens_seen": 5800576, + "step": 6045 + }, + { + "epoch": 0.513408010862186, + "grad_norm": 0.14390717446804047, + "learning_rate": 1.1277722306726103e-06, + "loss": 0.0621, + "num_input_tokens_seen": 5805632, + "step": 6050 + }, + { + "epoch": 0.5138323150033944, + "grad_norm": 0.09065728634595871, + "learning_rate": 1.1263030468688057e-06, + "loss": 0.0254, + "num_input_tokens_seen": 5810688, + "step": 6055 + }, + { + "epoch": 0.5142566191446029, + "grad_norm": 0.3492352366447449, + "learning_rate": 1.1248335859677891e-06, + "loss": 0.0513, + "num_input_tokens_seen": 5815616, + "step": 6060 + }, + { + "epoch": 0.5146809232858113, + "grad_norm": 16.324581146240234, + "learning_rate": 1.1233638511934218e-06, + "loss": 0.0772, + "num_input_tokens_seen": 5820672, + "step": 6065 + }, + { + "epoch": 0.5151052274270197, + "grad_norm": 6.548031806945801, + "learning_rate": 1.121893845770166e-06, + "loss": 0.0595, + "num_input_tokens_seen": 5824896, + "step": 6070 + }, + { + "epoch": 0.5155295315682281, + "grad_norm": 1.5588712692260742, + "learning_rate": 1.120423572923078e-06, + "loss": 0.0458, + "num_input_tokens_seen": 5829632, + "step": 6075 + }, + { + "epoch": 0.5159538357094365, + "grad_norm": 8.870634078979492, + "learning_rate": 1.1189530358778004e-06, + "loss": 0.0292, + "num_input_tokens_seen": 5834240, + "step": 6080 + }, + { + "epoch": 0.516378139850645, + "grad_norm": 0.9702350497245789, + "learning_rate": 1.1174822378605551e-06, + "loss": 0.0951, + "num_input_tokens_seen": 5838784, + "step": 6085 + }, + { + "epoch": 0.5168024439918534, + "grad_norm": 19.710203170776367, + "learning_rate": 1.116011182098138e-06, + "loss": 0.0588, + "num_input_tokens_seen": 5843072, + "step": 6090 + }, + { + "epoch": 0.5172267481330618, + "grad_norm": 15.01016902923584, + "learning_rate": 1.1145398718179085e-06, + "loss": 0.0476, + "num_input_tokens_seen": 5847360, + "step": 6095 + }, + { + "epoch": 0.5176510522742702, + "grad_norm": 0.10758156329393387, + "learning_rate": 1.1130683102477862e-06, + "loss": 0.019, + "num_input_tokens_seen": 5852224, + "step": 6100 + }, + { + "epoch": 0.5180753564154786, + "grad_norm": 5.596349239349365, + "learning_rate": 1.1115965006162405e-06, + "loss": 0.0241, + "num_input_tokens_seen": 5857152, + "step": 6105 + }, + { + "epoch": 0.5184996605566871, + "grad_norm": 0.04970073699951172, + "learning_rate": 1.110124446152286e-06, + "loss": 0.1214, + "num_input_tokens_seen": 5861888, + "step": 6110 + }, + { + "epoch": 0.5189239646978955, + "grad_norm": 10.609586715698242, + "learning_rate": 1.1086521500854744e-06, + "loss": 0.1294, + "num_input_tokens_seen": 5866496, + "step": 6115 + }, + { + "epoch": 0.5193482688391039, + "grad_norm": 13.855015754699707, + "learning_rate": 1.1071796156458868e-06, + "loss": 0.0606, + "num_input_tokens_seen": 5870912, + "step": 6120 + }, + { + "epoch": 0.5197725729803123, + "grad_norm": 0.7808138728141785, + "learning_rate": 1.1057068460641281e-06, + "loss": 0.0881, + "num_input_tokens_seen": 5876672, + "step": 6125 + }, + { + "epoch": 0.5201968771215207, + "grad_norm": 16.916934967041016, + "learning_rate": 1.1042338445713183e-06, + "loss": 0.0278, + "num_input_tokens_seen": 5881024, + "step": 6130 + }, + { + "epoch": 0.5206211812627292, + "grad_norm": 8.180154800415039, + "learning_rate": 1.1027606143990867e-06, + "loss": 0.1703, + "num_input_tokens_seen": 5886080, + "step": 6135 + }, + { + "epoch": 0.5210454854039376, + "grad_norm": 15.013484954833984, + "learning_rate": 1.1012871587795638e-06, + "loss": 0.0505, + "num_input_tokens_seen": 5890880, + "step": 6140 + }, + { + "epoch": 0.521469789545146, + "grad_norm": 1.3397859334945679, + "learning_rate": 1.0998134809453756e-06, + "loss": 0.0369, + "num_input_tokens_seen": 5895424, + "step": 6145 + }, + { + "epoch": 0.5218940936863544, + "grad_norm": 0.7837854623794556, + "learning_rate": 1.0983395841296347e-06, + "loss": 0.0887, + "num_input_tokens_seen": 5900352, + "step": 6150 + }, + { + "epoch": 0.5223183978275628, + "grad_norm": 22.695222854614258, + "learning_rate": 1.0968654715659347e-06, + "loss": 0.0695, + "num_input_tokens_seen": 5904960, + "step": 6155 + }, + { + "epoch": 0.5227427019687713, + "grad_norm": 8.58603572845459, + "learning_rate": 1.095391146488342e-06, + "loss": 0.0687, + "num_input_tokens_seen": 5910016, + "step": 6160 + }, + { + "epoch": 0.5231670061099797, + "grad_norm": 6.1412200927734375, + "learning_rate": 1.09391661213139e-06, + "loss": 0.0779, + "num_input_tokens_seen": 5914944, + "step": 6165 + }, + { + "epoch": 0.5235913102511881, + "grad_norm": 8.769179344177246, + "learning_rate": 1.0924418717300707e-06, + "loss": 0.0389, + "num_input_tokens_seen": 5920448, + "step": 6170 + }, + { + "epoch": 0.5240156143923965, + "grad_norm": 20.312427520751953, + "learning_rate": 1.090966928519828e-06, + "loss": 0.0357, + "num_input_tokens_seen": 5925696, + "step": 6175 + }, + { + "epoch": 0.5244399185336049, + "grad_norm": 5.0691938400268555, + "learning_rate": 1.0894917857365511e-06, + "loss": 0.0356, + "num_input_tokens_seen": 5930624, + "step": 6180 + }, + { + "epoch": 0.5248642226748133, + "grad_norm": 10.31592082977295, + "learning_rate": 1.0880164466165673e-06, + "loss": 0.0751, + "num_input_tokens_seen": 5935168, + "step": 6185 + }, + { + "epoch": 0.5252885268160217, + "grad_norm": 0.4381231665611267, + "learning_rate": 1.0865409143966338e-06, + "loss": 0.0332, + "num_input_tokens_seen": 5939712, + "step": 6190 + }, + { + "epoch": 0.5257128309572301, + "grad_norm": 3.2070534229278564, + "learning_rate": 1.0850651923139317e-06, + "loss": 0.0108, + "num_input_tokens_seen": 5944576, + "step": 6195 + }, + { + "epoch": 0.5261371350984385, + "grad_norm": 27.044889450073242, + "learning_rate": 1.0835892836060598e-06, + "loss": 0.0517, + "num_input_tokens_seen": 5949184, + "step": 6200 + }, + { + "epoch": 0.5265614392396469, + "grad_norm": 1.5127665996551514, + "learning_rate": 1.0821131915110246e-06, + "loss": 0.0215, + "num_input_tokens_seen": 5954176, + "step": 6205 + }, + { + "epoch": 0.5269857433808554, + "grad_norm": 13.82748794555664, + "learning_rate": 1.080636919267236e-06, + "loss": 0.0753, + "num_input_tokens_seen": 5958656, + "step": 6210 + }, + { + "epoch": 0.5274100475220638, + "grad_norm": 13.576017379760742, + "learning_rate": 1.079160470113499e-06, + "loss": 0.0789, + "num_input_tokens_seen": 5963264, + "step": 6215 + }, + { + "epoch": 0.5278343516632722, + "grad_norm": 11.991615295410156, + "learning_rate": 1.0776838472890064e-06, + "loss": 0.0936, + "num_input_tokens_seen": 5968576, + "step": 6220 + }, + { + "epoch": 0.5282586558044806, + "grad_norm": 0.9006063342094421, + "learning_rate": 1.0762070540333322e-06, + "loss": 0.0034, + "num_input_tokens_seen": 5973248, + "step": 6225 + }, + { + "epoch": 0.528682959945689, + "grad_norm": 0.429584264755249, + "learning_rate": 1.0747300935864243e-06, + "loss": 0.0488, + "num_input_tokens_seen": 5977920, + "step": 6230 + }, + { + "epoch": 0.5291072640868975, + "grad_norm": 12.29738998413086, + "learning_rate": 1.0732529691885977e-06, + "loss": 0.0588, + "num_input_tokens_seen": 5982656, + "step": 6235 + }, + { + "epoch": 0.5295315682281059, + "grad_norm": 6.842536926269531, + "learning_rate": 1.0717756840805263e-06, + "loss": 0.0431, + "num_input_tokens_seen": 5987392, + "step": 6240 + }, + { + "epoch": 0.5299558723693143, + "grad_norm": 0.5230674743652344, + "learning_rate": 1.0702982415032378e-06, + "loss": 0.0505, + "num_input_tokens_seen": 5993280, + "step": 6245 + }, + { + "epoch": 0.5303801765105227, + "grad_norm": 23.495798110961914, + "learning_rate": 1.068820644698104e-06, + "loss": 0.0423, + "num_input_tokens_seen": 5998272, + "step": 6250 + }, + { + "epoch": 0.5308044806517311, + "grad_norm": 11.772712707519531, + "learning_rate": 1.0673428969068363e-06, + "loss": 0.023, + "num_input_tokens_seen": 6002816, + "step": 6255 + }, + { + "epoch": 0.5312287847929396, + "grad_norm": 23.144899368286133, + "learning_rate": 1.0658650013714765e-06, + "loss": 0.028, + "num_input_tokens_seen": 6007744, + "step": 6260 + }, + { + "epoch": 0.531653088934148, + "grad_norm": 0.028380636125802994, + "learning_rate": 1.0643869613343906e-06, + "loss": 0.099, + "num_input_tokens_seen": 6011776, + "step": 6265 + }, + { + "epoch": 0.5320773930753564, + "grad_norm": 0.04158276692032814, + "learning_rate": 1.062908780038262e-06, + "loss": 0.0939, + "num_input_tokens_seen": 6017344, + "step": 6270 + }, + { + "epoch": 0.5325016972165648, + "grad_norm": 9.953067779541016, + "learning_rate": 1.0614304607260843e-06, + "loss": 0.0401, + "num_input_tokens_seen": 6022144, + "step": 6275 + }, + { + "epoch": 0.5329260013577732, + "grad_norm": 22.902023315429688, + "learning_rate": 1.0599520066411529e-06, + "loss": 0.0351, + "num_input_tokens_seen": 6027712, + "step": 6280 + }, + { + "epoch": 0.5333503054989817, + "grad_norm": 13.597489356994629, + "learning_rate": 1.0584734210270597e-06, + "loss": 0.0586, + "num_input_tokens_seen": 6032064, + "step": 6285 + }, + { + "epoch": 0.5337746096401901, + "grad_norm": 0.03597741574048996, + "learning_rate": 1.0569947071276845e-06, + "loss": 0.0382, + "num_input_tokens_seen": 6036288, + "step": 6290 + }, + { + "epoch": 0.5341989137813985, + "grad_norm": 17.50570297241211, + "learning_rate": 1.0555158681871897e-06, + "loss": 0.0912, + "num_input_tokens_seen": 6040960, + "step": 6295 + }, + { + "epoch": 0.5346232179226069, + "grad_norm": 15.3289155960083, + "learning_rate": 1.0540369074500103e-06, + "loss": 0.0313, + "num_input_tokens_seen": 6045376, + "step": 6300 + }, + { + "epoch": 0.5350475220638153, + "grad_norm": 10.554159164428711, + "learning_rate": 1.0525578281608503e-06, + "loss": 0.0809, + "num_input_tokens_seen": 6049856, + "step": 6305 + }, + { + "epoch": 0.5354718262050238, + "grad_norm": 0.8359688520431519, + "learning_rate": 1.0510786335646725e-06, + "loss": 0.0089, + "num_input_tokens_seen": 6054144, + "step": 6310 + }, + { + "epoch": 0.5358961303462322, + "grad_norm": 6.878500938415527, + "learning_rate": 1.0495993269066935e-06, + "loss": 0.0477, + "num_input_tokens_seen": 6060032, + "step": 6315 + }, + { + "epoch": 0.5363204344874406, + "grad_norm": 15.124363899230957, + "learning_rate": 1.0481199114323746e-06, + "loss": 0.132, + "num_input_tokens_seen": 6065280, + "step": 6320 + }, + { + "epoch": 0.536744738628649, + "grad_norm": 0.35588526725769043, + "learning_rate": 1.0466403903874175e-06, + "loss": 0.0846, + "num_input_tokens_seen": 6070080, + "step": 6325 + }, + { + "epoch": 0.5371690427698574, + "grad_norm": 6.994991779327393, + "learning_rate": 1.0451607670177543e-06, + "loss": 0.0642, + "num_input_tokens_seen": 6076032, + "step": 6330 + }, + { + "epoch": 0.5375933469110659, + "grad_norm": 11.849706649780273, + "learning_rate": 1.0436810445695421e-06, + "loss": 0.0577, + "num_input_tokens_seen": 6080768, + "step": 6335 + }, + { + "epoch": 0.5380176510522743, + "grad_norm": 10.08267879486084, + "learning_rate": 1.0422012262891548e-06, + "loss": 0.0294, + "num_input_tokens_seen": 6085312, + "step": 6340 + }, + { + "epoch": 0.5384419551934827, + "grad_norm": 19.3542537689209, + "learning_rate": 1.0407213154231774e-06, + "loss": 0.0297, + "num_input_tokens_seen": 6090048, + "step": 6345 + }, + { + "epoch": 0.5388662593346911, + "grad_norm": 56.21127700805664, + "learning_rate": 1.0392413152183973e-06, + "loss": 0.0992, + "num_input_tokens_seen": 6094720, + "step": 6350 + }, + { + "epoch": 0.5392905634758995, + "grad_norm": 18.282859802246094, + "learning_rate": 1.0377612289217982e-06, + "loss": 0.1116, + "num_input_tokens_seen": 6099456, + "step": 6355 + }, + { + "epoch": 0.539714867617108, + "grad_norm": 0.08463834971189499, + "learning_rate": 1.0362810597805524e-06, + "loss": 0.033, + "num_input_tokens_seen": 6104448, + "step": 6360 + }, + { + "epoch": 0.5401391717583164, + "grad_norm": 0.2075161337852478, + "learning_rate": 1.0348008110420149e-06, + "loss": 0.0312, + "num_input_tokens_seen": 6109056, + "step": 6365 + }, + { + "epoch": 0.5405634758995248, + "grad_norm": 26.991762161254883, + "learning_rate": 1.0333204859537142e-06, + "loss": 0.088, + "num_input_tokens_seen": 6114496, + "step": 6370 + }, + { + "epoch": 0.5409877800407332, + "grad_norm": 0.06256894022226334, + "learning_rate": 1.0318400877633466e-06, + "loss": 0.0882, + "num_input_tokens_seen": 6119360, + "step": 6375 + }, + { + "epoch": 0.5414120841819416, + "grad_norm": 9.642011642456055, + "learning_rate": 1.030359619718769e-06, + "loss": 0.0926, + "num_input_tokens_seen": 6124352, + "step": 6380 + }, + { + "epoch": 0.5418363883231501, + "grad_norm": 1.3404682874679565, + "learning_rate": 1.0288790850679916e-06, + "loss": 0.0509, + "num_input_tokens_seen": 6128832, + "step": 6385 + }, + { + "epoch": 0.5422606924643585, + "grad_norm": 21.819734573364258, + "learning_rate": 1.0273984870591706e-06, + "loss": 0.1019, + "num_input_tokens_seen": 6133312, + "step": 6390 + }, + { + "epoch": 0.5426849966055669, + "grad_norm": 28.43328094482422, + "learning_rate": 1.025917828940601e-06, + "loss": 0.0668, + "num_input_tokens_seen": 6137600, + "step": 6395 + }, + { + "epoch": 0.5431093007467753, + "grad_norm": 6.486978054046631, + "learning_rate": 1.02443711396071e-06, + "loss": 0.0544, + "num_input_tokens_seen": 6142464, + "step": 6400 + }, + { + "epoch": 0.5435336048879837, + "grad_norm": 0.3852335512638092, + "learning_rate": 1.0229563453680495e-06, + "loss": 0.0586, + "num_input_tokens_seen": 6147072, + "step": 6405 + }, + { + "epoch": 0.5439579090291922, + "grad_norm": 25.111413955688477, + "learning_rate": 1.021475526411289e-06, + "loss": 0.045, + "num_input_tokens_seen": 6151744, + "step": 6410 + }, + { + "epoch": 0.5443822131704006, + "grad_norm": 9.803653717041016, + "learning_rate": 1.0199946603392078e-06, + "loss": 0.085, + "num_input_tokens_seen": 6156672, + "step": 6415 + }, + { + "epoch": 0.544806517311609, + "grad_norm": 37.152095794677734, + "learning_rate": 1.01851375040069e-06, + "loss": 0.0563, + "num_input_tokens_seen": 6161600, + "step": 6420 + }, + { + "epoch": 0.5452308214528174, + "grad_norm": 0.2186758667230606, + "learning_rate": 1.0170327998447149e-06, + "loss": 0.0312, + "num_input_tokens_seen": 6165760, + "step": 6425 + }, + { + "epoch": 0.5456551255940258, + "grad_norm": 1.1985291242599487, + "learning_rate": 1.015551811920351e-06, + "loss": 0.0114, + "num_input_tokens_seen": 6174912, + "step": 6430 + }, + { + "epoch": 0.5460794297352343, + "grad_norm": 10.91264533996582, + "learning_rate": 1.014070789876749e-06, + "loss": 0.0856, + "num_input_tokens_seen": 6179136, + "step": 6435 + }, + { + "epoch": 0.5465037338764427, + "grad_norm": 1.4603297710418701, + "learning_rate": 1.0125897369631342e-06, + "loss": 0.1228, + "num_input_tokens_seen": 6183680, + "step": 6440 + }, + { + "epoch": 0.546928038017651, + "grad_norm": 32.919151306152344, + "learning_rate": 1.0111086564288003e-06, + "loss": 0.0587, + "num_input_tokens_seen": 6188608, + "step": 6445 + }, + { + "epoch": 0.5473523421588594, + "grad_norm": 2.691481828689575, + "learning_rate": 1.009627551523101e-06, + "loss": 0.0592, + "num_input_tokens_seen": 6193600, + "step": 6450 + }, + { + "epoch": 0.5477766463000678, + "grad_norm": 0.18778395652770996, + "learning_rate": 1.008146425495443e-06, + "loss": 0.0367, + "num_input_tokens_seen": 6198528, + "step": 6455 + }, + { + "epoch": 0.5482009504412763, + "grad_norm": 0.21035797894001007, + "learning_rate": 1.0066652815952805e-06, + "loss": 0.0359, + "num_input_tokens_seen": 6204096, + "step": 6460 + }, + { + "epoch": 0.5486252545824847, + "grad_norm": 20.11526107788086, + "learning_rate": 1.0051841230721063e-06, + "loss": 0.0465, + "num_input_tokens_seen": 6208704, + "step": 6465 + }, + { + "epoch": 0.5490495587236931, + "grad_norm": 7.261089324951172, + "learning_rate": 1.0037029531754453e-06, + "loss": 0.1061, + "num_input_tokens_seen": 6213440, + "step": 6470 + }, + { + "epoch": 0.5494738628649015, + "grad_norm": 0.9366330504417419, + "learning_rate": 1.002221775154847e-06, + "loss": 0.0551, + "num_input_tokens_seen": 6218368, + "step": 6475 + }, + { + "epoch": 0.5498981670061099, + "grad_norm": 2.7935664653778076, + "learning_rate": 1.0007405922598793e-06, + "loss": 0.0618, + "num_input_tokens_seen": 6223616, + "step": 6480 + }, + { + "epoch": 0.5503224711473184, + "grad_norm": 0.42181113362312317, + "learning_rate": 9.992594077401208e-07, + "loss": 0.0938, + "num_input_tokens_seen": 6228480, + "step": 6485 + }, + { + "epoch": 0.5507467752885268, + "grad_norm": 5.8830180168151855, + "learning_rate": 9.977782248451534e-07, + "loss": 0.0534, + "num_input_tokens_seen": 6232896, + "step": 6490 + }, + { + "epoch": 0.5507467752885268, + "eval_loss": 0.05577274411916733, + "eval_runtime": 15.823, + "eval_samples_per_second": 662.009, + "eval_steps_per_second": 82.791, + "num_input_tokens_seen": 6232896, + "step": 6490 + }, + { + "epoch": 0.5511710794297352, + "grad_norm": 37.08064651489258, + "learning_rate": 9.962970468245548e-07, + "loss": 0.0959, + "num_input_tokens_seen": 6237696, + "step": 6495 + }, + { + "epoch": 0.5515953835709436, + "grad_norm": 1.1415743827819824, + "learning_rate": 9.948158769278939e-07, + "loss": 0.0324, + "num_input_tokens_seen": 6242304, + "step": 6500 + }, + { + "epoch": 0.552019687712152, + "grad_norm": 7.484726905822754, + "learning_rate": 9.933347184047194e-07, + "loss": 0.0393, + "num_input_tokens_seen": 6246976, + "step": 6505 + }, + { + "epoch": 0.5524439918533605, + "grad_norm": 18.209239959716797, + "learning_rate": 9.918535745045571e-07, + "loss": 0.0629, + "num_input_tokens_seen": 6251264, + "step": 6510 + }, + { + "epoch": 0.5528682959945689, + "grad_norm": 14.765807151794434, + "learning_rate": 9.903724484768991e-07, + "loss": 0.0104, + "num_input_tokens_seen": 6255872, + "step": 6515 + }, + { + "epoch": 0.5532926001357773, + "grad_norm": 0.719184935092926, + "learning_rate": 9.888913435711996e-07, + "loss": 0.0381, + "num_input_tokens_seen": 6260928, + "step": 6520 + }, + { + "epoch": 0.5537169042769857, + "grad_norm": 17.972461700439453, + "learning_rate": 9.874102630368658e-07, + "loss": 0.04, + "num_input_tokens_seen": 6265600, + "step": 6525 + }, + { + "epoch": 0.5541412084181941, + "grad_norm": 0.6293085813522339, + "learning_rate": 9.859292101232514e-07, + "loss": 0.113, + "num_input_tokens_seen": 6270464, + "step": 6530 + }, + { + "epoch": 0.5545655125594026, + "grad_norm": 24.948253631591797, + "learning_rate": 9.84448188079649e-07, + "loss": 0.0957, + "num_input_tokens_seen": 6274944, + "step": 6535 + }, + { + "epoch": 0.554989816700611, + "grad_norm": 6.423430442810059, + "learning_rate": 9.829672001552853e-07, + "loss": 0.0578, + "num_input_tokens_seen": 6279424, + "step": 6540 + }, + { + "epoch": 0.5554141208418194, + "grad_norm": 4.687989711761475, + "learning_rate": 9.8148624959931e-07, + "loss": 0.1313, + "num_input_tokens_seen": 6284096, + "step": 6545 + }, + { + "epoch": 0.5558384249830278, + "grad_norm": 5.982985973358154, + "learning_rate": 9.80005339660792e-07, + "loss": 0.03, + "num_input_tokens_seen": 6289728, + "step": 6550 + }, + { + "epoch": 0.5562627291242362, + "grad_norm": 43.957584381103516, + "learning_rate": 9.785244735887112e-07, + "loss": 0.0289, + "num_input_tokens_seen": 6294400, + "step": 6555 + }, + { + "epoch": 0.5566870332654447, + "grad_norm": 7.876846790313721, + "learning_rate": 9.770436546319504e-07, + "loss": 0.0557, + "num_input_tokens_seen": 6298880, + "step": 6560 + }, + { + "epoch": 0.5571113374066531, + "grad_norm": 26.09071922302246, + "learning_rate": 9.755628860392901e-07, + "loss": 0.0636, + "num_input_tokens_seen": 6303424, + "step": 6565 + }, + { + "epoch": 0.5575356415478615, + "grad_norm": 32.834678649902344, + "learning_rate": 9.740821710593988e-07, + "loss": 0.026, + "num_input_tokens_seen": 6308032, + "step": 6570 + }, + { + "epoch": 0.5579599456890699, + "grad_norm": 0.24545079469680786, + "learning_rate": 9.726015129408296e-07, + "loss": 0.048, + "num_input_tokens_seen": 6312832, + "step": 6575 + }, + { + "epoch": 0.5583842498302783, + "grad_norm": 6.026395797729492, + "learning_rate": 9.711209149320083e-07, + "loss": 0.035, + "num_input_tokens_seen": 6317312, + "step": 6580 + }, + { + "epoch": 0.5588085539714868, + "grad_norm": 7.616768836975098, + "learning_rate": 9.69640380281231e-07, + "loss": 0.0341, + "num_input_tokens_seen": 6321920, + "step": 6585 + }, + { + "epoch": 0.5592328581126952, + "grad_norm": 1.705358624458313, + "learning_rate": 9.681599122366533e-07, + "loss": 0.0732, + "num_input_tokens_seen": 6326336, + "step": 6590 + }, + { + "epoch": 0.5596571622539036, + "grad_norm": 0.5510636568069458, + "learning_rate": 9.66679514046286e-07, + "loss": 0.0904, + "num_input_tokens_seen": 6331008, + "step": 6595 + }, + { + "epoch": 0.560081466395112, + "grad_norm": 12.033029556274414, + "learning_rate": 9.65199188957985e-07, + "loss": 0.0866, + "num_input_tokens_seen": 6335744, + "step": 6600 + }, + { + "epoch": 0.5605057705363204, + "grad_norm": 25.936338424682617, + "learning_rate": 9.637189402194475e-07, + "loss": 0.0558, + "num_input_tokens_seen": 6340736, + "step": 6605 + }, + { + "epoch": 0.5609300746775289, + "grad_norm": 14.279778480529785, + "learning_rate": 9.622387710782017e-07, + "loss": 0.0955, + "num_input_tokens_seen": 6345216, + "step": 6610 + }, + { + "epoch": 0.5613543788187373, + "grad_norm": 9.077555656433105, + "learning_rate": 9.607586847816029e-07, + "loss": 0.0383, + "num_input_tokens_seen": 6350080, + "step": 6615 + }, + { + "epoch": 0.5617786829599457, + "grad_norm": 38.76018524169922, + "learning_rate": 9.592786845768225e-07, + "loss": 0.0689, + "num_input_tokens_seen": 6354816, + "step": 6620 + }, + { + "epoch": 0.5622029871011541, + "grad_norm": 6.977408409118652, + "learning_rate": 9.577987737108454e-07, + "loss": 0.0538, + "num_input_tokens_seen": 6360000, + "step": 6625 + }, + { + "epoch": 0.5626272912423625, + "grad_norm": 7.492920875549316, + "learning_rate": 9.563189554304578e-07, + "loss": 0.1221, + "num_input_tokens_seen": 6364672, + "step": 6630 + }, + { + "epoch": 0.563051595383571, + "grad_norm": 0.4446965456008911, + "learning_rate": 9.548392329822456e-07, + "loss": 0.0828, + "num_input_tokens_seen": 6369408, + "step": 6635 + }, + { + "epoch": 0.5634758995247794, + "grad_norm": 10.316352844238281, + "learning_rate": 9.533596096125825e-07, + "loss": 0.0482, + "num_input_tokens_seen": 6374080, + "step": 6640 + }, + { + "epoch": 0.5639002036659878, + "grad_norm": 1.064753532409668, + "learning_rate": 9.518800885676256e-07, + "loss": 0.051, + "num_input_tokens_seen": 6379200, + "step": 6645 + }, + { + "epoch": 0.5643245078071962, + "grad_norm": 0.490021675825119, + "learning_rate": 9.504006730933068e-07, + "loss": 0.0577, + "num_input_tokens_seen": 6384576, + "step": 6650 + }, + { + "epoch": 0.5647488119484046, + "grad_norm": 8.95898151397705, + "learning_rate": 9.489213664353276e-07, + "loss": 0.0797, + "num_input_tokens_seen": 6389760, + "step": 6655 + }, + { + "epoch": 0.5651731160896131, + "grad_norm": 27.54326820373535, + "learning_rate": 9.474421718391497e-07, + "loss": 0.1317, + "num_input_tokens_seen": 6394176, + "step": 6660 + }, + { + "epoch": 0.5655974202308215, + "grad_norm": 17.441795349121094, + "learning_rate": 9.459630925499897e-07, + "loss": 0.0693, + "num_input_tokens_seen": 6398976, + "step": 6665 + }, + { + "epoch": 0.5660217243720299, + "grad_norm": 12.825746536254883, + "learning_rate": 9.444841318128103e-07, + "loss": 0.0414, + "num_input_tokens_seen": 6403264, + "step": 6670 + }, + { + "epoch": 0.5664460285132383, + "grad_norm": 11.503122329711914, + "learning_rate": 9.430052928723152e-07, + "loss": 0.0771, + "num_input_tokens_seen": 6408128, + "step": 6675 + }, + { + "epoch": 0.5668703326544468, + "grad_norm": 14.999883651733398, + "learning_rate": 9.415265789729403e-07, + "loss": 0.1131, + "num_input_tokens_seen": 6412672, + "step": 6680 + }, + { + "epoch": 0.5672946367956552, + "grad_norm": 0.7394747734069824, + "learning_rate": 9.400479933588468e-07, + "loss": 0.0724, + "num_input_tokens_seen": 6417088, + "step": 6685 + }, + { + "epoch": 0.5677189409368636, + "grad_norm": 0.6897430419921875, + "learning_rate": 9.385695392739156e-07, + "loss": 0.0707, + "num_input_tokens_seen": 6421824, + "step": 6690 + }, + { + "epoch": 0.568143245078072, + "grad_norm": 2.056105852127075, + "learning_rate": 9.370912199617376e-07, + "loss": 0.0411, + "num_input_tokens_seen": 6426560, + "step": 6695 + }, + { + "epoch": 0.5685675492192804, + "grad_norm": 27.487417221069336, + "learning_rate": 9.356130386656093e-07, + "loss": 0.0867, + "num_input_tokens_seen": 6431040, + "step": 6700 + }, + { + "epoch": 0.5689918533604889, + "grad_norm": 24.241193771362305, + "learning_rate": 9.341349986285234e-07, + "loss": 0.0488, + "num_input_tokens_seen": 6435968, + "step": 6705 + }, + { + "epoch": 0.5694161575016972, + "grad_norm": 9.322391510009766, + "learning_rate": 9.326571030931636e-07, + "loss": 0.1258, + "num_input_tokens_seen": 6440640, + "step": 6710 + }, + { + "epoch": 0.5698404616429056, + "grad_norm": 0.4335402250289917, + "learning_rate": 9.311793553018958e-07, + "loss": 0.0646, + "num_input_tokens_seen": 6445504, + "step": 6715 + }, + { + "epoch": 0.570264765784114, + "grad_norm": 20.35150909423828, + "learning_rate": 9.297017584967624e-07, + "loss": 0.0453, + "num_input_tokens_seen": 6449600, + "step": 6720 + }, + { + "epoch": 0.5706890699253224, + "grad_norm": 2.6463088989257812, + "learning_rate": 9.282243159194734e-07, + "loss": 0.0386, + "num_input_tokens_seen": 6454528, + "step": 6725 + }, + { + "epoch": 0.5711133740665308, + "grad_norm": 0.11555840075016022, + "learning_rate": 9.267470308114025e-07, + "loss": 0.0768, + "num_input_tokens_seen": 6459264, + "step": 6730 + }, + { + "epoch": 0.5715376782077393, + "grad_norm": 20.841169357299805, + "learning_rate": 9.252699064135758e-07, + "loss": 0.078, + "num_input_tokens_seen": 6463552, + "step": 6735 + }, + { + "epoch": 0.5719619823489477, + "grad_norm": 0.19547468423843384, + "learning_rate": 9.23792945966668e-07, + "loss": 0.0072, + "num_input_tokens_seen": 6468608, + "step": 6740 + }, + { + "epoch": 0.5723862864901561, + "grad_norm": 11.617748260498047, + "learning_rate": 9.223161527109936e-07, + "loss": 0.0303, + "num_input_tokens_seen": 6473408, + "step": 6745 + }, + { + "epoch": 0.5728105906313645, + "grad_norm": 0.14943847060203552, + "learning_rate": 9.208395298865014e-07, + "loss": 0.0353, + "num_input_tokens_seen": 6478656, + "step": 6750 + }, + { + "epoch": 0.573234894772573, + "grad_norm": 6.509980201721191, + "learning_rate": 9.19363080732764e-07, + "loss": 0.0986, + "num_input_tokens_seen": 6483328, + "step": 6755 + }, + { + "epoch": 0.5736591989137814, + "grad_norm": 0.04856886342167854, + "learning_rate": 9.178868084889756e-07, + "loss": 0.0211, + "num_input_tokens_seen": 6488064, + "step": 6760 + }, + { + "epoch": 0.5740835030549898, + "grad_norm": 49.04362869262695, + "learning_rate": 9.164107163939401e-07, + "loss": 0.1111, + "num_input_tokens_seen": 6492864, + "step": 6765 + }, + { + "epoch": 0.5745078071961982, + "grad_norm": 0.417856901884079, + "learning_rate": 9.149348076860685e-07, + "loss": 0.038, + "num_input_tokens_seen": 6497216, + "step": 6770 + }, + { + "epoch": 0.5749321113374066, + "grad_norm": 8.41581916809082, + "learning_rate": 9.134590856033664e-07, + "loss": 0.0373, + "num_input_tokens_seen": 6501888, + "step": 6775 + }, + { + "epoch": 0.575356415478615, + "grad_norm": 21.148208618164062, + "learning_rate": 9.11983553383433e-07, + "loss": 0.0708, + "num_input_tokens_seen": 6507200, + "step": 6780 + }, + { + "epoch": 0.5757807196198235, + "grad_norm": 0.28288573026657104, + "learning_rate": 9.105082142634489e-07, + "loss": 0.0143, + "num_input_tokens_seen": 6515840, + "step": 6785 + }, + { + "epoch": 0.5762050237610319, + "grad_norm": 11.684349060058594, + "learning_rate": 9.090330714801723e-07, + "loss": 0.1098, + "num_input_tokens_seen": 6520384, + "step": 6790 + }, + { + "epoch": 0.5766293279022403, + "grad_norm": 4.981570243835449, + "learning_rate": 9.075581282699294e-07, + "loss": 0.0894, + "num_input_tokens_seen": 6524992, + "step": 6795 + }, + { + "epoch": 0.5770536320434487, + "grad_norm": 16.109561920166016, + "learning_rate": 9.060833878686098e-07, + "loss": 0.1289, + "num_input_tokens_seen": 6532160, + "step": 6800 + }, + { + "epoch": 0.5774779361846571, + "grad_norm": 1.6210087537765503, + "learning_rate": 9.046088535116581e-07, + "loss": 0.0264, + "num_input_tokens_seen": 6536384, + "step": 6805 + }, + { + "epoch": 0.5779022403258656, + "grad_norm": 1.7853858470916748, + "learning_rate": 9.031345284340652e-07, + "loss": 0.0253, + "num_input_tokens_seen": 6540800, + "step": 6810 + }, + { + "epoch": 0.578326544467074, + "grad_norm": 15.93824291229248, + "learning_rate": 9.016604158703654e-07, + "loss": 0.1609, + "num_input_tokens_seen": 6545216, + "step": 6815 + }, + { + "epoch": 0.5787508486082824, + "grad_norm": 0.15775707364082336, + "learning_rate": 9.001865190546244e-07, + "loss": 0.0496, + "num_input_tokens_seen": 6550400, + "step": 6820 + }, + { + "epoch": 0.5791751527494908, + "grad_norm": 2.840294599533081, + "learning_rate": 8.987128412204363e-07, + "loss": 0.0241, + "num_input_tokens_seen": 6554752, + "step": 6825 + }, + { + "epoch": 0.5795994568906992, + "grad_norm": 12.399237632751465, + "learning_rate": 8.972393856009132e-07, + "loss": 0.0436, + "num_input_tokens_seen": 6559616, + "step": 6830 + }, + { + "epoch": 0.5800237610319077, + "grad_norm": 18.532922744750977, + "learning_rate": 8.957661554286817e-07, + "loss": 0.0387, + "num_input_tokens_seen": 6564608, + "step": 6835 + }, + { + "epoch": 0.5804480651731161, + "grad_norm": 10.585027694702148, + "learning_rate": 8.942931539358718e-07, + "loss": 0.0477, + "num_input_tokens_seen": 6569024, + "step": 6840 + }, + { + "epoch": 0.5808723693143245, + "grad_norm": 0.14699456095695496, + "learning_rate": 8.928203843541131e-07, + "loss": 0.056, + "num_input_tokens_seen": 6574016, + "step": 6845 + }, + { + "epoch": 0.5812966734555329, + "grad_norm": 3.8574206829071045, + "learning_rate": 8.913478499145254e-07, + "loss": 0.0153, + "num_input_tokens_seen": 6578944, + "step": 6850 + }, + { + "epoch": 0.5817209775967414, + "grad_norm": 17.667245864868164, + "learning_rate": 8.898755538477138e-07, + "loss": 0.0494, + "num_input_tokens_seen": 6584192, + "step": 6855 + }, + { + "epoch": 0.5821452817379498, + "grad_norm": 13.899635314941406, + "learning_rate": 8.884034993837594e-07, + "loss": 0.1016, + "num_input_tokens_seen": 6589056, + "step": 6860 + }, + { + "epoch": 0.5825695858791582, + "grad_norm": 23.75398826599121, + "learning_rate": 8.869316897522141e-07, + "loss": 0.0777, + "num_input_tokens_seen": 6593536, + "step": 6865 + }, + { + "epoch": 0.5829938900203666, + "grad_norm": 17.726505279541016, + "learning_rate": 8.854601281820914e-07, + "loss": 0.0843, + "num_input_tokens_seen": 6600128, + "step": 6870 + }, + { + "epoch": 0.583418194161575, + "grad_norm": 9.943798065185547, + "learning_rate": 8.839888179018621e-07, + "loss": 0.0393, + "num_input_tokens_seen": 6604864, + "step": 6875 + }, + { + "epoch": 0.5838424983027835, + "grad_norm": 17.00997543334961, + "learning_rate": 8.825177621394449e-07, + "loss": 0.0228, + "num_input_tokens_seen": 6609728, + "step": 6880 + }, + { + "epoch": 0.5842668024439919, + "grad_norm": 0.7779857516288757, + "learning_rate": 8.810469641222001e-07, + "loss": 0.0307, + "num_input_tokens_seen": 6615104, + "step": 6885 + }, + { + "epoch": 0.5846911065852003, + "grad_norm": 0.525295078754425, + "learning_rate": 8.795764270769221e-07, + "loss": 0.0439, + "num_input_tokens_seen": 6620096, + "step": 6890 + }, + { + "epoch": 0.5851154107264087, + "grad_norm": 22.693933486938477, + "learning_rate": 8.781061542298341e-07, + "loss": 0.0911, + "num_input_tokens_seen": 6624448, + "step": 6895 + }, + { + "epoch": 0.5855397148676171, + "grad_norm": 26.4716854095459, + "learning_rate": 8.766361488065783e-07, + "loss": 0.1139, + "num_input_tokens_seen": 6628800, + "step": 6900 + }, + { + "epoch": 0.5859640190088256, + "grad_norm": 20.0168399810791, + "learning_rate": 8.751664140322112e-07, + "loss": 0.046, + "num_input_tokens_seen": 6633664, + "step": 6905 + }, + { + "epoch": 0.586388323150034, + "grad_norm": 19.89805030822754, + "learning_rate": 8.736969531311942e-07, + "loss": 0.0924, + "num_input_tokens_seen": 6638720, + "step": 6910 + }, + { + "epoch": 0.5868126272912424, + "grad_norm": 0.6657816171646118, + "learning_rate": 8.7222776932739e-07, + "loss": 0.0468, + "num_input_tokens_seen": 6643008, + "step": 6915 + }, + { + "epoch": 0.5872369314324508, + "grad_norm": 2.8613314628601074, + "learning_rate": 8.70758865844051e-07, + "loss": 0.0501, + "num_input_tokens_seen": 6647360, + "step": 6920 + }, + { + "epoch": 0.5876612355736592, + "grad_norm": 0.22243715822696686, + "learning_rate": 8.69290245903816e-07, + "loss": 0.0371, + "num_input_tokens_seen": 6652032, + "step": 6925 + }, + { + "epoch": 0.5880855397148677, + "grad_norm": 33.91285705566406, + "learning_rate": 8.678219127287018e-07, + "loss": 0.0833, + "num_input_tokens_seen": 6656320, + "step": 6930 + }, + { + "epoch": 0.5885098438560761, + "grad_norm": 2.0737743377685547, + "learning_rate": 8.663538695400951e-07, + "loss": 0.0731, + "num_input_tokens_seen": 6660928, + "step": 6935 + }, + { + "epoch": 0.5889341479972845, + "grad_norm": 0.5914128422737122, + "learning_rate": 8.648861195587475e-07, + "loss": 0.027, + "num_input_tokens_seen": 6665856, + "step": 6940 + }, + { + "epoch": 0.5893584521384929, + "grad_norm": 0.4644359350204468, + "learning_rate": 8.634186660047663e-07, + "loss": 0.0478, + "num_input_tokens_seen": 6670144, + "step": 6945 + }, + { + "epoch": 0.5897827562797013, + "grad_norm": 23.87814712524414, + "learning_rate": 8.619515120976097e-07, + "loss": 0.0928, + "num_input_tokens_seen": 6675264, + "step": 6950 + }, + { + "epoch": 0.5902070604209098, + "grad_norm": 8.000229835510254, + "learning_rate": 8.60484661056077e-07, + "loss": 0.13, + "num_input_tokens_seen": 6679552, + "step": 6955 + }, + { + "epoch": 0.5906313645621182, + "grad_norm": 17.253625869750977, + "learning_rate": 8.590181160983043e-07, + "loss": 0.0307, + "num_input_tokens_seen": 6683904, + "step": 6960 + }, + { + "epoch": 0.5910556687033266, + "grad_norm": 13.027657508850098, + "learning_rate": 8.575518804417552e-07, + "loss": 0.0527, + "num_input_tokens_seen": 6688320, + "step": 6965 + }, + { + "epoch": 0.591479972844535, + "grad_norm": 1.04340398311615, + "learning_rate": 8.560859573032161e-07, + "loss": 0.0419, + "num_input_tokens_seen": 6693696, + "step": 6970 + }, + { + "epoch": 0.5919042769857433, + "grad_norm": 2.2827279567718506, + "learning_rate": 8.546203498987861e-07, + "loss": 0.0631, + "num_input_tokens_seen": 6698496, + "step": 6975 + }, + { + "epoch": 0.5923285811269517, + "grad_norm": 8.269172668457031, + "learning_rate": 8.531550614438729e-07, + "loss": 0.0257, + "num_input_tokens_seen": 6704192, + "step": 6980 + }, + { + "epoch": 0.5927528852681602, + "grad_norm": 7.1176042556762695, + "learning_rate": 8.516900951531832e-07, + "loss": 0.0148, + "num_input_tokens_seen": 6708480, + "step": 6985 + }, + { + "epoch": 0.5931771894093686, + "grad_norm": 0.349401593208313, + "learning_rate": 8.502254542407185e-07, + "loss": 0.0556, + "num_input_tokens_seen": 6713856, + "step": 6990 + }, + { + "epoch": 0.593601493550577, + "grad_norm": 6.542207717895508, + "learning_rate": 8.487611419197653e-07, + "loss": 0.1009, + "num_input_tokens_seen": 6719104, + "step": 6995 + }, + { + "epoch": 0.5940257976917854, + "grad_norm": 23.163578033447266, + "learning_rate": 8.472971614028895e-07, + "loss": 0.0662, + "num_input_tokens_seen": 6723328, + "step": 7000 + }, + { + "epoch": 0.5944501018329938, + "grad_norm": 0.11186230182647705, + "learning_rate": 8.458335159019288e-07, + "loss": 0.0326, + "num_input_tokens_seen": 6728064, + "step": 7005 + }, + { + "epoch": 0.5948744059742023, + "grad_norm": 0.16995789110660553, + "learning_rate": 8.443702086279866e-07, + "loss": 0.0827, + "num_input_tokens_seen": 6732864, + "step": 7010 + }, + { + "epoch": 0.5952987101154107, + "grad_norm": 0.8537437915802002, + "learning_rate": 8.429072427914235e-07, + "loss": 0.0157, + "num_input_tokens_seen": 6737792, + "step": 7015 + }, + { + "epoch": 0.5957230142566191, + "grad_norm": 7.274645805358887, + "learning_rate": 8.414446216018516e-07, + "loss": 0.0443, + "num_input_tokens_seen": 6742848, + "step": 7020 + }, + { + "epoch": 0.5961473183978275, + "grad_norm": 0.6011242866516113, + "learning_rate": 8.399823482681261e-07, + "loss": 0.0561, + "num_input_tokens_seen": 6748160, + "step": 7025 + }, + { + "epoch": 0.596571622539036, + "grad_norm": 0.3317387104034424, + "learning_rate": 8.385204259983403e-07, + "loss": 0.0759, + "num_input_tokens_seen": 6752960, + "step": 7030 + }, + { + "epoch": 0.5969959266802444, + "grad_norm": 7.0838727951049805, + "learning_rate": 8.37058857999816e-07, + "loss": 0.0535, + "num_input_tokens_seen": 6758016, + "step": 7035 + }, + { + "epoch": 0.5974202308214528, + "grad_norm": 13.913290023803711, + "learning_rate": 8.355976474790987e-07, + "loss": 0.104, + "num_input_tokens_seen": 6762688, + "step": 7040 + }, + { + "epoch": 0.5978445349626612, + "grad_norm": 13.374975204467773, + "learning_rate": 8.341367976419485e-07, + "loss": 0.0365, + "num_input_tokens_seen": 6767424, + "step": 7045 + }, + { + "epoch": 0.5982688391038696, + "grad_norm": 8.739448547363281, + "learning_rate": 8.326763116933359e-07, + "loss": 0.0831, + "num_input_tokens_seen": 6771648, + "step": 7050 + }, + { + "epoch": 0.598693143245078, + "grad_norm": 8.212983131408691, + "learning_rate": 8.312161928374317e-07, + "loss": 0.0632, + "num_input_tokens_seen": 6776832, + "step": 7055 + }, + { + "epoch": 0.5991174473862865, + "grad_norm": 0.2802315950393677, + "learning_rate": 8.297564442776012e-07, + "loss": 0.0284, + "num_input_tokens_seen": 6781120, + "step": 7060 + }, + { + "epoch": 0.5995417515274949, + "grad_norm": 0.3204219937324524, + "learning_rate": 8.282970692163988e-07, + "loss": 0.0353, + "num_input_tokens_seen": 6785472, + "step": 7065 + }, + { + "epoch": 0.5999660556687033, + "grad_norm": 13.889245986938477, + "learning_rate": 8.268380708555579e-07, + "loss": 0.0856, + "num_input_tokens_seen": 6791488, + "step": 7070 + }, + { + "epoch": 0.6003903598099117, + "grad_norm": 0.5243129134178162, + "learning_rate": 8.253794523959863e-07, + "loss": 0.081, + "num_input_tokens_seen": 6797120, + "step": 7075 + }, + { + "epoch": 0.6008146639511202, + "grad_norm": 5.982780456542969, + "learning_rate": 8.239212170377576e-07, + "loss": 0.0467, + "num_input_tokens_seen": 6801984, + "step": 7080 + }, + { + "epoch": 0.6008146639511202, + "eval_loss": 0.05977928265929222, + "eval_runtime": 15.8463, + "eval_samples_per_second": 661.039, + "eval_steps_per_second": 82.669, + "num_input_tokens_seen": 6801984, + "step": 7080 + }, + { + "epoch": 0.6012389680923286, + "grad_norm": 7.727343559265137, + "learning_rate": 8.224633679801062e-07, + "loss": 0.0763, + "num_input_tokens_seen": 6806528, + "step": 7085 + }, + { + "epoch": 0.601663272233537, + "grad_norm": 13.581296920776367, + "learning_rate": 8.210059084214176e-07, + "loss": 0.1491, + "num_input_tokens_seen": 6811456, + "step": 7090 + }, + { + "epoch": 0.6020875763747454, + "grad_norm": 0.4599580764770508, + "learning_rate": 8.195488415592237e-07, + "loss": 0.027, + "num_input_tokens_seen": 6815872, + "step": 7095 + }, + { + "epoch": 0.6025118805159538, + "grad_norm": 0.5491631627082825, + "learning_rate": 8.180921705901941e-07, + "loss": 0.0177, + "num_input_tokens_seen": 6821376, + "step": 7100 + }, + { + "epoch": 0.6029361846571623, + "grad_norm": 7.0324506759643555, + "learning_rate": 8.16635898710131e-07, + "loss": 0.0499, + "num_input_tokens_seen": 6826688, + "step": 7105 + }, + { + "epoch": 0.6033604887983707, + "grad_norm": 0.3288812041282654, + "learning_rate": 8.151800291139596e-07, + "loss": 0.0411, + "num_input_tokens_seen": 6831680, + "step": 7110 + }, + { + "epoch": 0.6037847929395791, + "grad_norm": 1.8278636932373047, + "learning_rate": 8.137245649957239e-07, + "loss": 0.0234, + "num_input_tokens_seen": 6836032, + "step": 7115 + }, + { + "epoch": 0.6042090970807875, + "grad_norm": 23.321144104003906, + "learning_rate": 8.122695095485767e-07, + "loss": 0.0511, + "num_input_tokens_seen": 6840576, + "step": 7120 + }, + { + "epoch": 0.6046334012219959, + "grad_norm": 0.11708024889230728, + "learning_rate": 8.108148659647764e-07, + "loss": 0.0629, + "num_input_tokens_seen": 6845696, + "step": 7125 + }, + { + "epoch": 0.6050577053632044, + "grad_norm": 0.16852536797523499, + "learning_rate": 8.093606374356758e-07, + "loss": 0.0581, + "num_input_tokens_seen": 6849984, + "step": 7130 + }, + { + "epoch": 0.6054820095044128, + "grad_norm": 0.33735671639442444, + "learning_rate": 8.079068271517182e-07, + "loss": 0.0429, + "num_input_tokens_seen": 6854592, + "step": 7135 + }, + { + "epoch": 0.6059063136456212, + "grad_norm": 30.76317024230957, + "learning_rate": 8.064534383024284e-07, + "loss": 0.1295, + "num_input_tokens_seen": 6859584, + "step": 7140 + }, + { + "epoch": 0.6063306177868296, + "grad_norm": 0.9654586911201477, + "learning_rate": 8.050004740764082e-07, + "loss": 0.062, + "num_input_tokens_seen": 6864320, + "step": 7145 + }, + { + "epoch": 0.606754921928038, + "grad_norm": 6.901752948760986, + "learning_rate": 8.035479376613261e-07, + "loss": 0.038, + "num_input_tokens_seen": 6868672, + "step": 7150 + }, + { + "epoch": 0.6071792260692465, + "grad_norm": 2.7611167430877686, + "learning_rate": 8.020958322439132e-07, + "loss": 0.0651, + "num_input_tokens_seen": 6873088, + "step": 7155 + }, + { + "epoch": 0.6076035302104549, + "grad_norm": 29.122224807739258, + "learning_rate": 8.006441610099539e-07, + "loss": 0.0519, + "num_input_tokens_seen": 6877568, + "step": 7160 + }, + { + "epoch": 0.6080278343516633, + "grad_norm": 13.105555534362793, + "learning_rate": 7.991929271442817e-07, + "loss": 0.1173, + "num_input_tokens_seen": 6882112, + "step": 7165 + }, + { + "epoch": 0.6084521384928717, + "grad_norm": 14.10412311553955, + "learning_rate": 7.977421338307687e-07, + "loss": 0.1062, + "num_input_tokens_seen": 6886720, + "step": 7170 + }, + { + "epoch": 0.6088764426340801, + "grad_norm": 0.5990397334098816, + "learning_rate": 7.962917842523215e-07, + "loss": 0.0485, + "num_input_tokens_seen": 6891584, + "step": 7175 + }, + { + "epoch": 0.6093007467752886, + "grad_norm": 34.55642318725586, + "learning_rate": 7.94841881590874e-07, + "loss": 0.0222, + "num_input_tokens_seen": 6896320, + "step": 7180 + }, + { + "epoch": 0.609725050916497, + "grad_norm": 11.046833992004395, + "learning_rate": 7.933924290273774e-07, + "loss": 0.1228, + "num_input_tokens_seen": 6900992, + "step": 7185 + }, + { + "epoch": 0.6101493550577054, + "grad_norm": 4.509375095367432, + "learning_rate": 7.919434297417976e-07, + "loss": 0.0742, + "num_input_tokens_seen": 6905600, + "step": 7190 + }, + { + "epoch": 0.6105736591989138, + "grad_norm": 13.198589324951172, + "learning_rate": 7.904948869131039e-07, + "loss": 0.103, + "num_input_tokens_seen": 6910208, + "step": 7195 + }, + { + "epoch": 0.6109979633401222, + "grad_norm": 40.57453918457031, + "learning_rate": 7.89046803719267e-07, + "loss": 0.0421, + "num_input_tokens_seen": 6915264, + "step": 7200 + }, + { + "epoch": 0.6114222674813307, + "grad_norm": 2.951673984527588, + "learning_rate": 7.875991833372463e-07, + "loss": 0.0266, + "num_input_tokens_seen": 6919808, + "step": 7205 + }, + { + "epoch": 0.6118465716225391, + "grad_norm": 5.2823872566223145, + "learning_rate": 7.861520289429879e-07, + "loss": 0.0321, + "num_input_tokens_seen": 6924608, + "step": 7210 + }, + { + "epoch": 0.6122708757637475, + "grad_norm": 7.0157623291015625, + "learning_rate": 7.847053437114141e-07, + "loss": 0.0338, + "num_input_tokens_seen": 6929344, + "step": 7215 + }, + { + "epoch": 0.6126951799049559, + "grad_norm": 10.4129638671875, + "learning_rate": 7.832591308164193e-07, + "loss": 0.1193, + "num_input_tokens_seen": 6934464, + "step": 7220 + }, + { + "epoch": 0.6131194840461643, + "grad_norm": 39.73077392578125, + "learning_rate": 7.818133934308606e-07, + "loss": 0.0717, + "num_input_tokens_seen": 6939456, + "step": 7225 + }, + { + "epoch": 0.6135437881873728, + "grad_norm": 0.44385433197021484, + "learning_rate": 7.803681347265524e-07, + "loss": 0.0072, + "num_input_tokens_seen": 6943808, + "step": 7230 + }, + { + "epoch": 0.6139680923285811, + "grad_norm": 0.2843058407306671, + "learning_rate": 7.789233578742583e-07, + "loss": 0.0254, + "num_input_tokens_seen": 6948736, + "step": 7235 + }, + { + "epoch": 0.6143923964697895, + "grad_norm": 12.6770658493042, + "learning_rate": 7.774790660436857e-07, + "loss": 0.0926, + "num_input_tokens_seen": 6953792, + "step": 7240 + }, + { + "epoch": 0.6148167006109979, + "grad_norm": 0.1394738405942917, + "learning_rate": 7.760352624034769e-07, + "loss": 0.0226, + "num_input_tokens_seen": 6958656, + "step": 7245 + }, + { + "epoch": 0.6152410047522063, + "grad_norm": 39.0544319152832, + "learning_rate": 7.745919501212043e-07, + "loss": 0.0326, + "num_input_tokens_seen": 6963200, + "step": 7250 + }, + { + "epoch": 0.6156653088934148, + "grad_norm": 9.968228340148926, + "learning_rate": 7.731491323633608e-07, + "loss": 0.0307, + "num_input_tokens_seen": 6968448, + "step": 7255 + }, + { + "epoch": 0.6160896130346232, + "grad_norm": 12.081189155578613, + "learning_rate": 7.71706812295356e-07, + "loss": 0.0835, + "num_input_tokens_seen": 6973888, + "step": 7260 + }, + { + "epoch": 0.6165139171758316, + "grad_norm": 9.74222183227539, + "learning_rate": 7.702649930815065e-07, + "loss": 0.0817, + "num_input_tokens_seen": 6978304, + "step": 7265 + }, + { + "epoch": 0.61693822131704, + "grad_norm": 12.646188735961914, + "learning_rate": 7.688236778850306e-07, + "loss": 0.0605, + "num_input_tokens_seen": 6983168, + "step": 7270 + }, + { + "epoch": 0.6173625254582484, + "grad_norm": 1.385225534439087, + "learning_rate": 7.6738286986804e-07, + "loss": 0.0463, + "num_input_tokens_seen": 6988224, + "step": 7275 + }, + { + "epoch": 0.6177868295994569, + "grad_norm": 11.31658935546875, + "learning_rate": 7.659425721915351e-07, + "loss": 0.0742, + "num_input_tokens_seen": 6992448, + "step": 7280 + }, + { + "epoch": 0.6182111337406653, + "grad_norm": 11.948744773864746, + "learning_rate": 7.645027880153956e-07, + "loss": 0.0507, + "num_input_tokens_seen": 6996864, + "step": 7285 + }, + { + "epoch": 0.6186354378818737, + "grad_norm": 6.7950544357299805, + "learning_rate": 7.63063520498375e-07, + "loss": 0.0316, + "num_input_tokens_seen": 7001984, + "step": 7290 + }, + { + "epoch": 0.6190597420230821, + "grad_norm": 16.258878707885742, + "learning_rate": 7.616247727980927e-07, + "loss": 0.0762, + "num_input_tokens_seen": 7007168, + "step": 7295 + }, + { + "epoch": 0.6194840461642905, + "grad_norm": 0.2103135585784912, + "learning_rate": 7.601865480710289e-07, + "loss": 0.0582, + "num_input_tokens_seen": 7012416, + "step": 7300 + }, + { + "epoch": 0.619908350305499, + "grad_norm": 7.516461372375488, + "learning_rate": 7.587488494725156e-07, + "loss": 0.0792, + "num_input_tokens_seen": 7016576, + "step": 7305 + }, + { + "epoch": 0.6203326544467074, + "grad_norm": 12.709039688110352, + "learning_rate": 7.573116801567301e-07, + "loss": 0.0391, + "num_input_tokens_seen": 7021056, + "step": 7310 + }, + { + "epoch": 0.6207569585879158, + "grad_norm": 0.8361039757728577, + "learning_rate": 7.558750432766901e-07, + "loss": 0.091, + "num_input_tokens_seen": 7025984, + "step": 7315 + }, + { + "epoch": 0.6211812627291242, + "grad_norm": 15.576173782348633, + "learning_rate": 7.544389419842429e-07, + "loss": 0.0191, + "num_input_tokens_seen": 7031104, + "step": 7320 + }, + { + "epoch": 0.6216055668703326, + "grad_norm": 1.3385177850723267, + "learning_rate": 7.530033794300631e-07, + "loss": 0.0467, + "num_input_tokens_seen": 7035904, + "step": 7325 + }, + { + "epoch": 0.6220298710115411, + "grad_norm": 8.493648529052734, + "learning_rate": 7.515683587636412e-07, + "loss": 0.1059, + "num_input_tokens_seen": 7040384, + "step": 7330 + }, + { + "epoch": 0.6224541751527495, + "grad_norm": 0.186950221657753, + "learning_rate": 7.501338831332813e-07, + "loss": 0.0149, + "num_input_tokens_seen": 7045312, + "step": 7335 + }, + { + "epoch": 0.6228784792939579, + "grad_norm": 20.064208984375, + "learning_rate": 7.486999556860889e-07, + "loss": 0.034, + "num_input_tokens_seen": 7050048, + "step": 7340 + }, + { + "epoch": 0.6233027834351663, + "grad_norm": 0.10197906941175461, + "learning_rate": 7.472665795679694e-07, + "loss": 0.0281, + "num_input_tokens_seen": 7054592, + "step": 7345 + }, + { + "epoch": 0.6237270875763747, + "grad_norm": 50.49172592163086, + "learning_rate": 7.458337579236168e-07, + "loss": 0.0953, + "num_input_tokens_seen": 7059392, + "step": 7350 + }, + { + "epoch": 0.6241513917175832, + "grad_norm": 10.861714363098145, + "learning_rate": 7.4440149389651e-07, + "loss": 0.113, + "num_input_tokens_seen": 7063552, + "step": 7355 + }, + { + "epoch": 0.6245756958587916, + "grad_norm": 17.287086486816406, + "learning_rate": 7.429697906289029e-07, + "loss": 0.0476, + "num_input_tokens_seen": 7068288, + "step": 7360 + }, + { + "epoch": 0.625, + "grad_norm": 1.1666802167892456, + "learning_rate": 7.415386512618216e-07, + "loss": 0.005, + "num_input_tokens_seen": 7073216, + "step": 7365 + }, + { + "epoch": 0.6254243041412084, + "grad_norm": 11.889900207519531, + "learning_rate": 7.401080789350525e-07, + "loss": 0.0727, + "num_input_tokens_seen": 7077824, + "step": 7370 + }, + { + "epoch": 0.6258486082824168, + "grad_norm": 1.242414116859436, + "learning_rate": 7.386780767871396e-07, + "loss": 0.0238, + "num_input_tokens_seen": 7082240, + "step": 7375 + }, + { + "epoch": 0.6262729124236253, + "grad_norm": 9.294206619262695, + "learning_rate": 7.372486479553748e-07, + "loss": 0.078, + "num_input_tokens_seen": 7087360, + "step": 7380 + }, + { + "epoch": 0.6266972165648337, + "grad_norm": 0.3037230670452118, + "learning_rate": 7.358197955757939e-07, + "loss": 0.0387, + "num_input_tokens_seen": 7092288, + "step": 7385 + }, + { + "epoch": 0.6271215207060421, + "grad_norm": 18.94266700744629, + "learning_rate": 7.343915227831661e-07, + "loss": 0.0334, + "num_input_tokens_seen": 7096768, + "step": 7390 + }, + { + "epoch": 0.6275458248472505, + "grad_norm": 34.89910125732422, + "learning_rate": 7.329638327109902e-07, + "loss": 0.0722, + "num_input_tokens_seen": 7101312, + "step": 7395 + }, + { + "epoch": 0.6279701289884589, + "grad_norm": 6.9105224609375, + "learning_rate": 7.315367284914861e-07, + "loss": 0.0878, + "num_input_tokens_seen": 7105664, + "step": 7400 + }, + { + "epoch": 0.6283944331296674, + "grad_norm": 0.7883709669113159, + "learning_rate": 7.301102132555891e-07, + "loss": 0.0652, + "num_input_tokens_seen": 7110208, + "step": 7405 + }, + { + "epoch": 0.6288187372708758, + "grad_norm": 0.3730391263961792, + "learning_rate": 7.286842901329412e-07, + "loss": 0.004, + "num_input_tokens_seen": 7115136, + "step": 7410 + }, + { + "epoch": 0.6292430414120842, + "grad_norm": 0.42069530487060547, + "learning_rate": 7.272589622518863e-07, + "loss": 0.0337, + "num_input_tokens_seen": 7119552, + "step": 7415 + }, + { + "epoch": 0.6296673455532926, + "grad_norm": 8.277861595153809, + "learning_rate": 7.258342327394616e-07, + "loss": 0.0522, + "num_input_tokens_seen": 7124352, + "step": 7420 + }, + { + "epoch": 0.630091649694501, + "grad_norm": 0.07428093254566193, + "learning_rate": 7.244101047213927e-07, + "loss": 0.0316, + "num_input_tokens_seen": 7128768, + "step": 7425 + }, + { + "epoch": 0.6305159538357095, + "grad_norm": 36.01335144042969, + "learning_rate": 7.229865813220843e-07, + "loss": 0.0971, + "num_input_tokens_seen": 7133568, + "step": 7430 + }, + { + "epoch": 0.6309402579769179, + "grad_norm": 8.02556324005127, + "learning_rate": 7.215636656646151e-07, + "loss": 0.0625, + "num_input_tokens_seen": 7138112, + "step": 7435 + }, + { + "epoch": 0.6313645621181263, + "grad_norm": 8.330338478088379, + "learning_rate": 7.201413608707312e-07, + "loss": 0.0557, + "num_input_tokens_seen": 7142848, + "step": 7440 + }, + { + "epoch": 0.6317888662593347, + "grad_norm": 0.15843556821346283, + "learning_rate": 7.187196700608372e-07, + "loss": 0.0781, + "num_input_tokens_seen": 7147584, + "step": 7445 + }, + { + "epoch": 0.6322131704005431, + "grad_norm": 0.48546895384788513, + "learning_rate": 7.172985963539919e-07, + "loss": 0.0768, + "num_input_tokens_seen": 7152192, + "step": 7450 + }, + { + "epoch": 0.6326374745417516, + "grad_norm": 7.328261375427246, + "learning_rate": 7.158781428678989e-07, + "loss": 0.0604, + "num_input_tokens_seen": 7156672, + "step": 7455 + }, + { + "epoch": 0.63306177868296, + "grad_norm": 2.0213396549224854, + "learning_rate": 7.144583127189028e-07, + "loss": 0.0545, + "num_input_tokens_seen": 7161664, + "step": 7460 + }, + { + "epoch": 0.6334860828241684, + "grad_norm": 19.81895637512207, + "learning_rate": 7.130391090219789e-07, + "loss": 0.0352, + "num_input_tokens_seen": 7166016, + "step": 7465 + }, + { + "epoch": 0.6339103869653768, + "grad_norm": 21.48404312133789, + "learning_rate": 7.116205348907298e-07, + "loss": 0.0671, + "num_input_tokens_seen": 7170752, + "step": 7470 + }, + { + "epoch": 0.6343346911065852, + "grad_norm": 5.94006872177124, + "learning_rate": 7.10202593437375e-07, + "loss": 0.0546, + "num_input_tokens_seen": 7176064, + "step": 7475 + }, + { + "epoch": 0.6347589952477937, + "grad_norm": 29.022289276123047, + "learning_rate": 7.08785287772748e-07, + "loss": 0.0549, + "num_input_tokens_seen": 7181312, + "step": 7480 + }, + { + "epoch": 0.6351832993890021, + "grad_norm": 11.498795509338379, + "learning_rate": 7.073686210062859e-07, + "loss": 0.095, + "num_input_tokens_seen": 7186176, + "step": 7485 + }, + { + "epoch": 0.6356076035302105, + "grad_norm": 1.1727149486541748, + "learning_rate": 7.059525962460248e-07, + "loss": 0.0358, + "num_input_tokens_seen": 7190784, + "step": 7490 + }, + { + "epoch": 0.6360319076714189, + "grad_norm": 6.915524959564209, + "learning_rate": 7.045372165985919e-07, + "loss": 0.0687, + "num_input_tokens_seen": 7195840, + "step": 7495 + }, + { + "epoch": 0.6364562118126272, + "grad_norm": 6.936726093292236, + "learning_rate": 7.031224851691999e-07, + "loss": 0.0347, + "num_input_tokens_seen": 7201024, + "step": 7500 + }, + { + "epoch": 0.6368805159538357, + "grad_norm": 0.39795514941215515, + "learning_rate": 7.017084050616385e-07, + "loss": 0.0709, + "num_input_tokens_seen": 7205760, + "step": 7505 + }, + { + "epoch": 0.6373048200950441, + "grad_norm": 6.167526721954346, + "learning_rate": 7.002949793782686e-07, + "loss": 0.0482, + "num_input_tokens_seen": 7210560, + "step": 7510 + }, + { + "epoch": 0.6377291242362525, + "grad_norm": 27.449459075927734, + "learning_rate": 6.988822112200156e-07, + "loss": 0.0555, + "num_input_tokens_seen": 7215488, + "step": 7515 + }, + { + "epoch": 0.6381534283774609, + "grad_norm": 8.422005653381348, + "learning_rate": 6.974701036863626e-07, + "loss": 0.0843, + "num_input_tokens_seen": 7220608, + "step": 7520 + }, + { + "epoch": 0.6385777325186693, + "grad_norm": 0.35018572211265564, + "learning_rate": 6.960586598753426e-07, + "loss": 0.029, + "num_input_tokens_seen": 7225280, + "step": 7525 + }, + { + "epoch": 0.6390020366598778, + "grad_norm": 0.1585758924484253, + "learning_rate": 6.946478828835331e-07, + "loss": 0.0439, + "num_input_tokens_seen": 7229696, + "step": 7530 + }, + { + "epoch": 0.6394263408010862, + "grad_norm": 14.599209785461426, + "learning_rate": 6.932377758060481e-07, + "loss": 0.0425, + "num_input_tokens_seen": 7233984, + "step": 7535 + }, + { + "epoch": 0.6398506449422946, + "grad_norm": 22.134122848510742, + "learning_rate": 6.91828341736533e-07, + "loss": 0.0495, + "num_input_tokens_seen": 7239936, + "step": 7540 + }, + { + "epoch": 0.640274949083503, + "grad_norm": 45.42800521850586, + "learning_rate": 6.904195837671552e-07, + "loss": 0.0334, + "num_input_tokens_seen": 7244480, + "step": 7545 + }, + { + "epoch": 0.6406992532247114, + "grad_norm": 0.15206590294837952, + "learning_rate": 6.890115049885994e-07, + "loss": 0.0664, + "num_input_tokens_seen": 7248960, + "step": 7550 + }, + { + "epoch": 0.6411235573659199, + "grad_norm": 31.500267028808594, + "learning_rate": 6.87604108490061e-07, + "loss": 0.0579, + "num_input_tokens_seen": 7253888, + "step": 7555 + }, + { + "epoch": 0.6415478615071283, + "grad_norm": 0.42176005244255066, + "learning_rate": 6.861973973592372e-07, + "loss": 0.0378, + "num_input_tokens_seen": 7259200, + "step": 7560 + }, + { + "epoch": 0.6419721656483367, + "grad_norm": 1.1130379438400269, + "learning_rate": 6.847913746823227e-07, + "loss": 0.0178, + "num_input_tokens_seen": 7263808, + "step": 7565 + }, + { + "epoch": 0.6423964697895451, + "grad_norm": 17.046188354492188, + "learning_rate": 6.833860435440006e-07, + "loss": 0.0296, + "num_input_tokens_seen": 7269248, + "step": 7570 + }, + { + "epoch": 0.6428207739307535, + "grad_norm": 0.6816079020500183, + "learning_rate": 6.819814070274384e-07, + "loss": 0.0162, + "num_input_tokens_seen": 7274496, + "step": 7575 + }, + { + "epoch": 0.643245078071962, + "grad_norm": 6.718956470489502, + "learning_rate": 6.805774682142782e-07, + "loss": 0.1676, + "num_input_tokens_seen": 7279552, + "step": 7580 + }, + { + "epoch": 0.6436693822131704, + "grad_norm": 10.924981117248535, + "learning_rate": 6.791742301846325e-07, + "loss": 0.0954, + "num_input_tokens_seen": 7284096, + "step": 7585 + }, + { + "epoch": 0.6440936863543788, + "grad_norm": 0.19647859036922455, + "learning_rate": 6.777716960170752e-07, + "loss": 0.039, + "num_input_tokens_seen": 7289088, + "step": 7590 + }, + { + "epoch": 0.6445179904955872, + "grad_norm": 35.239501953125, + "learning_rate": 6.763698687886372e-07, + "loss": 0.1273, + "num_input_tokens_seen": 7293696, + "step": 7595 + }, + { + "epoch": 0.6449422946367956, + "grad_norm": 0.21344631910324097, + "learning_rate": 6.749687515747977e-07, + "loss": 0.047, + "num_input_tokens_seen": 7298816, + "step": 7600 + }, + { + "epoch": 0.6453665987780041, + "grad_norm": 5.354994297027588, + "learning_rate": 6.735683474494784e-07, + "loss": 0.0821, + "num_input_tokens_seen": 7303232, + "step": 7605 + }, + { + "epoch": 0.6457909029192125, + "grad_norm": 14.325811386108398, + "learning_rate": 6.721686594850362e-07, + "loss": 0.0391, + "num_input_tokens_seen": 7308416, + "step": 7610 + }, + { + "epoch": 0.6462152070604209, + "grad_norm": 0.12330670654773712, + "learning_rate": 6.707696907522577e-07, + "loss": 0.0789, + "num_input_tokens_seen": 7313024, + "step": 7615 + }, + { + "epoch": 0.6466395112016293, + "grad_norm": 12.076754570007324, + "learning_rate": 6.693714443203507e-07, + "loss": 0.0668, + "num_input_tokens_seen": 7317760, + "step": 7620 + }, + { + "epoch": 0.6470638153428377, + "grad_norm": 20.513025283813477, + "learning_rate": 6.679739232569388e-07, + "loss": 0.0315, + "num_input_tokens_seen": 7322624, + "step": 7625 + }, + { + "epoch": 0.6474881194840462, + "grad_norm": 12.568245887756348, + "learning_rate": 6.665771306280537e-07, + "loss": 0.0434, + "num_input_tokens_seen": 7327104, + "step": 7630 + }, + { + "epoch": 0.6479124236252546, + "grad_norm": 15.543357849121094, + "learning_rate": 6.651810694981299e-07, + "loss": 0.0735, + "num_input_tokens_seen": 7331520, + "step": 7635 + }, + { + "epoch": 0.648336727766463, + "grad_norm": 14.047017097473145, + "learning_rate": 6.637857429299958e-07, + "loss": 0.0712, + "num_input_tokens_seen": 7336448, + "step": 7640 + }, + { + "epoch": 0.6487610319076714, + "grad_norm": 0.43313679099082947, + "learning_rate": 6.623911539848697e-07, + "loss": 0.0669, + "num_input_tokens_seen": 7341248, + "step": 7645 + }, + { + "epoch": 0.6491853360488798, + "grad_norm": 13.36621379852295, + "learning_rate": 6.6099730572235e-07, + "loss": 0.0657, + "num_input_tokens_seen": 7345920, + "step": 7650 + }, + { + "epoch": 0.6496096401900883, + "grad_norm": 0.5109625458717346, + "learning_rate": 6.596042012004119e-07, + "loss": 0.0214, + "num_input_tokens_seen": 7350464, + "step": 7655 + }, + { + "epoch": 0.6500339443312967, + "grad_norm": 14.361283302307129, + "learning_rate": 6.582118434753973e-07, + "loss": 0.0931, + "num_input_tokens_seen": 7355008, + "step": 7660 + }, + { + "epoch": 0.6504582484725051, + "grad_norm": 32.11049270629883, + "learning_rate": 6.568202356020108e-07, + "loss": 0.0362, + "num_input_tokens_seen": 7359680, + "step": 7665 + }, + { + "epoch": 0.6508825526137135, + "grad_norm": 2.051922082901001, + "learning_rate": 6.554293806333109e-07, + "loss": 0.0404, + "num_input_tokens_seen": 7363968, + "step": 7670 + }, + { + "epoch": 0.6508825526137135, + "eval_loss": 0.05561085045337677, + "eval_runtime": 15.8384, + "eval_samples_per_second": 661.368, + "eval_steps_per_second": 82.71, + "num_input_tokens_seen": 7363968, + "step": 7670 + }, + { + "epoch": 0.651306856754922, + "grad_norm": 14.956515312194824, + "learning_rate": 6.540392816207054e-07, + "loss": 0.108, + "num_input_tokens_seen": 7368064, + "step": 7675 + }, + { + "epoch": 0.6517311608961304, + "grad_norm": 14.99658489227295, + "learning_rate": 6.52649941613943e-07, + "loss": 0.0423, + "num_input_tokens_seen": 7372736, + "step": 7680 + }, + { + "epoch": 0.6521554650373388, + "grad_norm": 1.0164530277252197, + "learning_rate": 6.512613636611068e-07, + "loss": 0.0361, + "num_input_tokens_seen": 7377600, + "step": 7685 + }, + { + "epoch": 0.6525797691785472, + "grad_norm": 17.327880859375, + "learning_rate": 6.498735508086093e-07, + "loss": 0.0238, + "num_input_tokens_seen": 7381952, + "step": 7690 + }, + { + "epoch": 0.6530040733197556, + "grad_norm": 2.355736494064331, + "learning_rate": 6.484865061011829e-07, + "loss": 0.0258, + "num_input_tokens_seen": 7387264, + "step": 7695 + }, + { + "epoch": 0.653428377460964, + "grad_norm": 1.3785549402236938, + "learning_rate": 6.471002325818761e-07, + "loss": 0.0708, + "num_input_tokens_seen": 7391936, + "step": 7700 + }, + { + "epoch": 0.6538526816021725, + "grad_norm": 3.1388180255889893, + "learning_rate": 6.45714733292044e-07, + "loss": 0.0622, + "num_input_tokens_seen": 7397248, + "step": 7705 + }, + { + "epoch": 0.6542769857433809, + "grad_norm": 12.08487319946289, + "learning_rate": 6.443300112713452e-07, + "loss": 0.0741, + "num_input_tokens_seen": 7401920, + "step": 7710 + }, + { + "epoch": 0.6547012898845893, + "grad_norm": 27.47235107421875, + "learning_rate": 6.429460695577309e-07, + "loss": 0.0523, + "num_input_tokens_seen": 7406912, + "step": 7715 + }, + { + "epoch": 0.6551255940257977, + "grad_norm": 8.73314094543457, + "learning_rate": 6.415629111874418e-07, + "loss": 0.0758, + "num_input_tokens_seen": 7411776, + "step": 7720 + }, + { + "epoch": 0.6555498981670062, + "grad_norm": 0.9363436698913574, + "learning_rate": 6.401805391949989e-07, + "loss": 0.0585, + "num_input_tokens_seen": 7416128, + "step": 7725 + }, + { + "epoch": 0.6559742023082146, + "grad_norm": 0.05604305863380432, + "learning_rate": 6.387989566131996e-07, + "loss": 0.0381, + "num_input_tokens_seen": 7422208, + "step": 7730 + }, + { + "epoch": 0.656398506449423, + "grad_norm": 8.00350570678711, + "learning_rate": 6.374181664731076e-07, + "loss": 0.0647, + "num_input_tokens_seen": 7427008, + "step": 7735 + }, + { + "epoch": 0.6568228105906314, + "grad_norm": 1.7786318063735962, + "learning_rate": 6.360381718040493e-07, + "loss": 0.053, + "num_input_tokens_seen": 7431488, + "step": 7740 + }, + { + "epoch": 0.6572471147318398, + "grad_norm": 1.2591062784194946, + "learning_rate": 6.34658975633605e-07, + "loss": 0.0645, + "num_input_tokens_seen": 7436544, + "step": 7745 + }, + { + "epoch": 0.6576714188730483, + "grad_norm": 0.7253627777099609, + "learning_rate": 6.332805809876041e-07, + "loss": 0.0631, + "num_input_tokens_seen": 7440896, + "step": 7750 + }, + { + "epoch": 0.6580957230142567, + "grad_norm": 30.2974796295166, + "learning_rate": 6.319029908901168e-07, + "loss": 0.1176, + "num_input_tokens_seen": 7445824, + "step": 7755 + }, + { + "epoch": 0.658520027155465, + "grad_norm": 22.773164749145508, + "learning_rate": 6.305262083634487e-07, + "loss": 0.0954, + "num_input_tokens_seen": 7450368, + "step": 7760 + }, + { + "epoch": 0.6589443312966734, + "grad_norm": 31.29222869873047, + "learning_rate": 6.29150236428133e-07, + "loss": 0.0724, + "num_input_tokens_seen": 7454720, + "step": 7765 + }, + { + "epoch": 0.6593686354378818, + "grad_norm": 9.666147232055664, + "learning_rate": 6.277750781029254e-07, + "loss": 0.0929, + "num_input_tokens_seen": 7459264, + "step": 7770 + }, + { + "epoch": 0.6597929395790902, + "grad_norm": 5.074236869812012, + "learning_rate": 6.26400736404796e-07, + "loss": 0.0311, + "num_input_tokens_seen": 7463744, + "step": 7775 + }, + { + "epoch": 0.6602172437202987, + "grad_norm": 27.29247283935547, + "learning_rate": 6.250272143489236e-07, + "loss": 0.0185, + "num_input_tokens_seen": 7468800, + "step": 7780 + }, + { + "epoch": 0.6606415478615071, + "grad_norm": 0.3091581165790558, + "learning_rate": 6.23654514948688e-07, + "loss": 0.0711, + "num_input_tokens_seen": 7473856, + "step": 7785 + }, + { + "epoch": 0.6610658520027155, + "grad_norm": 14.052967071533203, + "learning_rate": 6.222826412156659e-07, + "loss": 0.0637, + "num_input_tokens_seen": 7478144, + "step": 7790 + }, + { + "epoch": 0.6614901561439239, + "grad_norm": 5.32991361618042, + "learning_rate": 6.209115961596207e-07, + "loss": 0.0699, + "num_input_tokens_seen": 7482432, + "step": 7795 + }, + { + "epoch": 0.6619144602851323, + "grad_norm": 7.41123628616333, + "learning_rate": 6.195413827884986e-07, + "loss": 0.0804, + "num_input_tokens_seen": 7487488, + "step": 7800 + }, + { + "epoch": 0.6623387644263408, + "grad_norm": 8.139752388000488, + "learning_rate": 6.181720041084216e-07, + "loss": 0.0639, + "num_input_tokens_seen": 7492032, + "step": 7805 + }, + { + "epoch": 0.6627630685675492, + "grad_norm": 2.0337157249450684, + "learning_rate": 6.168034631236794e-07, + "loss": 0.0315, + "num_input_tokens_seen": 7496576, + "step": 7810 + }, + { + "epoch": 0.6631873727087576, + "grad_norm": 0.41162967681884766, + "learning_rate": 6.154357628367251e-07, + "loss": 0.0285, + "num_input_tokens_seen": 7501056, + "step": 7815 + }, + { + "epoch": 0.663611676849966, + "grad_norm": 25.204240798950195, + "learning_rate": 6.140689062481657e-07, + "loss": 0.0801, + "num_input_tokens_seen": 7505792, + "step": 7820 + }, + { + "epoch": 0.6640359809911744, + "grad_norm": 22.751937866210938, + "learning_rate": 6.127028963567593e-07, + "loss": 0.0378, + "num_input_tokens_seen": 7510528, + "step": 7825 + }, + { + "epoch": 0.6644602851323829, + "grad_norm": 14.025527954101562, + "learning_rate": 6.113377361594048e-07, + "loss": 0.0707, + "num_input_tokens_seen": 7515200, + "step": 7830 + }, + { + "epoch": 0.6648845892735913, + "grad_norm": 13.164021492004395, + "learning_rate": 6.099734286511378e-07, + "loss": 0.0271, + "num_input_tokens_seen": 7520128, + "step": 7835 + }, + { + "epoch": 0.6653088934147997, + "grad_norm": 5.781976699829102, + "learning_rate": 6.086099768251222e-07, + "loss": 0.0496, + "num_input_tokens_seen": 7524544, + "step": 7840 + }, + { + "epoch": 0.6657331975560081, + "grad_norm": 23.99297523498535, + "learning_rate": 6.072473836726461e-07, + "loss": 0.123, + "num_input_tokens_seen": 7529664, + "step": 7845 + }, + { + "epoch": 0.6661575016972165, + "grad_norm": 9.406707763671875, + "learning_rate": 6.058856521831126e-07, + "loss": 0.0509, + "num_input_tokens_seen": 7533760, + "step": 7850 + }, + { + "epoch": 0.666581805838425, + "grad_norm": 0.1467132270336151, + "learning_rate": 6.045247853440349e-07, + "loss": 0.0302, + "num_input_tokens_seen": 7538432, + "step": 7855 + }, + { + "epoch": 0.6670061099796334, + "grad_norm": 9.463153839111328, + "learning_rate": 6.031647861410287e-07, + "loss": 0.0664, + "num_input_tokens_seen": 7543168, + "step": 7860 + }, + { + "epoch": 0.6674304141208418, + "grad_norm": 73.3876724243164, + "learning_rate": 6.018056575578074e-07, + "loss": 0.0448, + "num_input_tokens_seen": 7547840, + "step": 7865 + }, + { + "epoch": 0.6678547182620502, + "grad_norm": 19.8172550201416, + "learning_rate": 6.004474025761723e-07, + "loss": 0.0146, + "num_input_tokens_seen": 7552768, + "step": 7870 + }, + { + "epoch": 0.6682790224032586, + "grad_norm": 12.499360084533691, + "learning_rate": 5.990900241760102e-07, + "loss": 0.1316, + "num_input_tokens_seen": 7557312, + "step": 7875 + }, + { + "epoch": 0.6687033265444671, + "grad_norm": 15.117225646972656, + "learning_rate": 5.977335253352833e-07, + "loss": 0.0859, + "num_input_tokens_seen": 7561920, + "step": 7880 + }, + { + "epoch": 0.6691276306856755, + "grad_norm": 22.873123168945312, + "learning_rate": 5.963779090300254e-07, + "loss": 0.0652, + "num_input_tokens_seen": 7567104, + "step": 7885 + }, + { + "epoch": 0.6695519348268839, + "grad_norm": 6.8520731925964355, + "learning_rate": 5.950231782343326e-07, + "loss": 0.0364, + "num_input_tokens_seen": 7571584, + "step": 7890 + }, + { + "epoch": 0.6699762389680923, + "grad_norm": 23.435100555419922, + "learning_rate": 5.936693359203597e-07, + "loss": 0.0182, + "num_input_tokens_seen": 7576256, + "step": 7895 + }, + { + "epoch": 0.6704005431093008, + "grad_norm": 10.050165176391602, + "learning_rate": 5.923163850583113e-07, + "loss": 0.0678, + "num_input_tokens_seen": 7581248, + "step": 7900 + }, + { + "epoch": 0.6708248472505092, + "grad_norm": 8.643121719360352, + "learning_rate": 5.909643286164367e-07, + "loss": 0.0382, + "num_input_tokens_seen": 7585984, + "step": 7905 + }, + { + "epoch": 0.6712491513917176, + "grad_norm": 15.745407104492188, + "learning_rate": 5.896131695610223e-07, + "loss": 0.0831, + "num_input_tokens_seen": 7590528, + "step": 7910 + }, + { + "epoch": 0.671673455532926, + "grad_norm": 8.164334297180176, + "learning_rate": 5.88262910856387e-07, + "loss": 0.0263, + "num_input_tokens_seen": 7595328, + "step": 7915 + }, + { + "epoch": 0.6720977596741344, + "grad_norm": 6.022217750549316, + "learning_rate": 5.869135554648728e-07, + "loss": 0.0661, + "num_input_tokens_seen": 7600128, + "step": 7920 + }, + { + "epoch": 0.6725220638153429, + "grad_norm": 0.2256646305322647, + "learning_rate": 5.855651063468411e-07, + "loss": 0.0523, + "num_input_tokens_seen": 7604288, + "step": 7925 + }, + { + "epoch": 0.6729463679565513, + "grad_norm": 15.741569519042969, + "learning_rate": 5.84217566460665e-07, + "loss": 0.0387, + "num_input_tokens_seen": 7609280, + "step": 7930 + }, + { + "epoch": 0.6733706720977597, + "grad_norm": 11.693473815917969, + "learning_rate": 5.828709387627217e-07, + "loss": 0.0498, + "num_input_tokens_seen": 7614528, + "step": 7935 + }, + { + "epoch": 0.6737949762389681, + "grad_norm": 17.00261116027832, + "learning_rate": 5.815252262073891e-07, + "loss": 0.112, + "num_input_tokens_seen": 7619264, + "step": 7940 + }, + { + "epoch": 0.6742192803801765, + "grad_norm": 0.4120815396308899, + "learning_rate": 5.801804317470349e-07, + "loss": 0.0041, + "num_input_tokens_seen": 7624192, + "step": 7945 + }, + { + "epoch": 0.674643584521385, + "grad_norm": 9.579654693603516, + "learning_rate": 5.788365583320144e-07, + "loss": 0.0446, + "num_input_tokens_seen": 7629120, + "step": 7950 + }, + { + "epoch": 0.6750678886625934, + "grad_norm": 19.68268394470215, + "learning_rate": 5.774936089106617e-07, + "loss": 0.0664, + "num_input_tokens_seen": 7633984, + "step": 7955 + }, + { + "epoch": 0.6754921928038018, + "grad_norm": 0.8648895621299744, + "learning_rate": 5.761515864292835e-07, + "loss": 0.0222, + "num_input_tokens_seen": 7638976, + "step": 7960 + }, + { + "epoch": 0.6759164969450102, + "grad_norm": 0.464873343706131, + "learning_rate": 5.748104938321534e-07, + "loss": 0.001, + "num_input_tokens_seen": 7643520, + "step": 7965 + }, + { + "epoch": 0.6763408010862186, + "grad_norm": 0.21355387568473816, + "learning_rate": 5.734703340615049e-07, + "loss": 0.0733, + "num_input_tokens_seen": 7648576, + "step": 7970 + }, + { + "epoch": 0.676765105227427, + "grad_norm": 0.31152021884918213, + "learning_rate": 5.721311100575235e-07, + "loss": 0.0419, + "num_input_tokens_seen": 7653632, + "step": 7975 + }, + { + "epoch": 0.6771894093686355, + "grad_norm": 0.17043381929397583, + "learning_rate": 5.707928247583444e-07, + "loss": 0.0034, + "num_input_tokens_seen": 7658176, + "step": 7980 + }, + { + "epoch": 0.6776137135098439, + "grad_norm": 19.964506149291992, + "learning_rate": 5.694554811000407e-07, + "loss": 0.1114, + "num_input_tokens_seen": 7663040, + "step": 7985 + }, + { + "epoch": 0.6780380176510523, + "grad_norm": 21.162376403808594, + "learning_rate": 5.681190820166213e-07, + "loss": 0.0707, + "num_input_tokens_seen": 7668160, + "step": 7990 + }, + { + "epoch": 0.6784623217922607, + "grad_norm": 3.3429012298583984, + "learning_rate": 5.667836304400221e-07, + "loss": 0.0181, + "num_input_tokens_seen": 7673024, + "step": 7995 + }, + { + "epoch": 0.6788866259334692, + "grad_norm": 0.6459212899208069, + "learning_rate": 5.654491293001005e-07, + "loss": 0.0702, + "num_input_tokens_seen": 7677696, + "step": 8000 + }, + { + "epoch": 0.6793109300746776, + "grad_norm": 16.935989379882812, + "learning_rate": 5.641155815246289e-07, + "loss": 0.046, + "num_input_tokens_seen": 7682752, + "step": 8005 + }, + { + "epoch": 0.679735234215886, + "grad_norm": 1.244130253791809, + "learning_rate": 5.62782990039288e-07, + "loss": 0.0835, + "num_input_tokens_seen": 7687872, + "step": 8010 + }, + { + "epoch": 0.6801595383570944, + "grad_norm": 22.5478515625, + "learning_rate": 5.614513577676592e-07, + "loss": 0.0774, + "num_input_tokens_seen": 7692800, + "step": 8015 + }, + { + "epoch": 0.6805838424983028, + "grad_norm": 0.9386072754859924, + "learning_rate": 5.601206876312223e-07, + "loss": 0.0339, + "num_input_tokens_seen": 7697472, + "step": 8020 + }, + { + "epoch": 0.6810081466395111, + "grad_norm": 0.1652708798646927, + "learning_rate": 5.587909825493433e-07, + "loss": 0.0687, + "num_input_tokens_seen": 7702336, + "step": 8025 + }, + { + "epoch": 0.6814324507807196, + "grad_norm": 13.297231674194336, + "learning_rate": 5.57462245439273e-07, + "loss": 0.0828, + "num_input_tokens_seen": 7707136, + "step": 8030 + }, + { + "epoch": 0.681856754921928, + "grad_norm": 19.77155876159668, + "learning_rate": 5.561344792161373e-07, + "loss": 0.1116, + "num_input_tokens_seen": 7711808, + "step": 8035 + }, + { + "epoch": 0.6822810590631364, + "grad_norm": 0.917893648147583, + "learning_rate": 5.54807686792933e-07, + "loss": 0.0646, + "num_input_tokens_seen": 7716352, + "step": 8040 + }, + { + "epoch": 0.6827053632043448, + "grad_norm": 1.264143705368042, + "learning_rate": 5.534818710805198e-07, + "loss": 0.1109, + "num_input_tokens_seen": 7721024, + "step": 8045 + }, + { + "epoch": 0.6831296673455532, + "grad_norm": 5.795388698577881, + "learning_rate": 5.52157034987615e-07, + "loss": 0.0793, + "num_input_tokens_seen": 7726208, + "step": 8050 + }, + { + "epoch": 0.6835539714867617, + "grad_norm": 0.15009626746177673, + "learning_rate": 5.508331814207864e-07, + "loss": 0.0826, + "num_input_tokens_seen": 7731136, + "step": 8055 + }, + { + "epoch": 0.6839782756279701, + "grad_norm": 11.75623607635498, + "learning_rate": 5.495103132844466e-07, + "loss": 0.1033, + "num_input_tokens_seen": 7735552, + "step": 8060 + }, + { + "epoch": 0.6844025797691785, + "grad_norm": 0.1650766134262085, + "learning_rate": 5.481884334808463e-07, + "loss": 0.0839, + "num_input_tokens_seen": 7740032, + "step": 8065 + }, + { + "epoch": 0.6848268839103869, + "grad_norm": 14.68458080291748, + "learning_rate": 5.468675449100664e-07, + "loss": 0.0441, + "num_input_tokens_seen": 7744960, + "step": 8070 + }, + { + "epoch": 0.6852511880515954, + "grad_norm": 5.773752212524414, + "learning_rate": 5.455476504700161e-07, + "loss": 0.1324, + "num_input_tokens_seen": 7749632, + "step": 8075 + }, + { + "epoch": 0.6856754921928038, + "grad_norm": 3.588259220123291, + "learning_rate": 5.442287530564203e-07, + "loss": 0.0765, + "num_input_tokens_seen": 7754176, + "step": 8080 + }, + { + "epoch": 0.6860997963340122, + "grad_norm": 13.416653633117676, + "learning_rate": 5.429108555628186e-07, + "loss": 0.1193, + "num_input_tokens_seen": 7759360, + "step": 8085 + }, + { + "epoch": 0.6865241004752206, + "grad_norm": 3.5806448459625244, + "learning_rate": 5.415939608805564e-07, + "loss": 0.0654, + "num_input_tokens_seen": 7763904, + "step": 8090 + }, + { + "epoch": 0.686948404616429, + "grad_norm": 0.8055393695831299, + "learning_rate": 5.402780718987789e-07, + "loss": 0.0189, + "num_input_tokens_seen": 7768320, + "step": 8095 + }, + { + "epoch": 0.6873727087576375, + "grad_norm": 15.830171585083008, + "learning_rate": 5.389631915044249e-07, + "loss": 0.0697, + "num_input_tokens_seen": 7772928, + "step": 8100 + }, + { + "epoch": 0.6877970128988459, + "grad_norm": 0.9971351027488708, + "learning_rate": 5.376493225822208e-07, + "loss": 0.0421, + "num_input_tokens_seen": 7777280, + "step": 8105 + }, + { + "epoch": 0.6882213170400543, + "grad_norm": 8.671359062194824, + "learning_rate": 5.363364680146725e-07, + "loss": 0.0558, + "num_input_tokens_seen": 7782080, + "step": 8110 + }, + { + "epoch": 0.6886456211812627, + "grad_norm": 17.439279556274414, + "learning_rate": 5.350246306820632e-07, + "loss": 0.0597, + "num_input_tokens_seen": 7786880, + "step": 8115 + }, + { + "epoch": 0.6890699253224711, + "grad_norm": 2.295281410217285, + "learning_rate": 5.337138134624412e-07, + "loss": 0.0316, + "num_input_tokens_seen": 7791232, + "step": 8120 + }, + { + "epoch": 0.6894942294636796, + "grad_norm": 1.799139142036438, + "learning_rate": 5.324040192316193e-07, + "loss": 0.0082, + "num_input_tokens_seen": 7796160, + "step": 8125 + }, + { + "epoch": 0.689918533604888, + "grad_norm": 9.208296775817871, + "learning_rate": 5.310952508631645e-07, + "loss": 0.0278, + "num_input_tokens_seen": 7801024, + "step": 8130 + }, + { + "epoch": 0.6903428377460964, + "grad_norm": 12.797538757324219, + "learning_rate": 5.297875112283941e-07, + "loss": 0.0599, + "num_input_tokens_seen": 7805952, + "step": 8135 + }, + { + "epoch": 0.6907671418873048, + "grad_norm": 13.396888732910156, + "learning_rate": 5.284808031963676e-07, + "loss": 0.0434, + "num_input_tokens_seen": 7811008, + "step": 8140 + }, + { + "epoch": 0.6911914460285132, + "grad_norm": 0.3513202369213104, + "learning_rate": 5.271751296338822e-07, + "loss": 0.0109, + "num_input_tokens_seen": 7815296, + "step": 8145 + }, + { + "epoch": 0.6916157501697217, + "grad_norm": 2.3773586750030518, + "learning_rate": 5.25870493405464e-07, + "loss": 0.0378, + "num_input_tokens_seen": 7820864, + "step": 8150 + }, + { + "epoch": 0.6920400543109301, + "grad_norm": 1.3169046640396118, + "learning_rate": 5.245668973733657e-07, + "loss": 0.0569, + "num_input_tokens_seen": 7825856, + "step": 8155 + }, + { + "epoch": 0.6924643584521385, + "grad_norm": 15.97273063659668, + "learning_rate": 5.232643443975553e-07, + "loss": 0.0184, + "num_input_tokens_seen": 7830400, + "step": 8160 + }, + { + "epoch": 0.6928886625933469, + "grad_norm": 1.616037130355835, + "learning_rate": 5.219628373357142e-07, + "loss": 0.0032, + "num_input_tokens_seen": 7835776, + "step": 8165 + }, + { + "epoch": 0.6933129667345553, + "grad_norm": 0.09990768134593964, + "learning_rate": 5.206623790432285e-07, + "loss": 0.0689, + "num_input_tokens_seen": 7840640, + "step": 8170 + }, + { + "epoch": 0.6937372708757638, + "grad_norm": 31.12531852722168, + "learning_rate": 5.193629723731837e-07, + "loss": 0.0707, + "num_input_tokens_seen": 7845120, + "step": 8175 + }, + { + "epoch": 0.6941615750169722, + "grad_norm": 14.439279556274414, + "learning_rate": 5.180646201763577e-07, + "loss": 0.0528, + "num_input_tokens_seen": 7849664, + "step": 8180 + }, + { + "epoch": 0.6945858791581806, + "grad_norm": 17.159196853637695, + "learning_rate": 5.167673253012152e-07, + "loss": 0.008, + "num_input_tokens_seen": 7854080, + "step": 8185 + }, + { + "epoch": 0.695010183299389, + "grad_norm": 19.91121482849121, + "learning_rate": 5.154710905939015e-07, + "loss": 0.0412, + "num_input_tokens_seen": 7858176, + "step": 8190 + }, + { + "epoch": 0.6954344874405974, + "grad_norm": 3.647684335708618, + "learning_rate": 5.141759188982356e-07, + "loss": 0.0572, + "num_input_tokens_seen": 7862912, + "step": 8195 + }, + { + "epoch": 0.6958587915818059, + "grad_norm": 14.494319915771484, + "learning_rate": 5.12881813055705e-07, + "loss": 0.0708, + "num_input_tokens_seen": 7867648, + "step": 8200 + }, + { + "epoch": 0.6962830957230143, + "grad_norm": 0.490307092666626, + "learning_rate": 5.115887759054571e-07, + "loss": 0.1211, + "num_input_tokens_seen": 7873152, + "step": 8205 + }, + { + "epoch": 0.6967073998642227, + "grad_norm": 25.91111946105957, + "learning_rate": 5.102968102842973e-07, + "loss": 0.0675, + "num_input_tokens_seen": 7877824, + "step": 8210 + }, + { + "epoch": 0.6971317040054311, + "grad_norm": 9.230366706848145, + "learning_rate": 5.090059190266779e-07, + "loss": 0.0948, + "num_input_tokens_seen": 7882688, + "step": 8215 + }, + { + "epoch": 0.6975560081466395, + "grad_norm": 3.7622454166412354, + "learning_rate": 5.077161049646951e-07, + "loss": 0.0272, + "num_input_tokens_seen": 7887424, + "step": 8220 + }, + { + "epoch": 0.697980312287848, + "grad_norm": 0.6465440392494202, + "learning_rate": 5.06427370928082e-07, + "loss": 0.0328, + "num_input_tokens_seen": 7891584, + "step": 8225 + }, + { + "epoch": 0.6984046164290564, + "grad_norm": 35.424503326416016, + "learning_rate": 5.05139719744202e-07, + "loss": 0.1217, + "num_input_tokens_seen": 7896576, + "step": 8230 + }, + { + "epoch": 0.6988289205702648, + "grad_norm": 0.2946743071079254, + "learning_rate": 5.038531542380425e-07, + "loss": 0.0685, + "num_input_tokens_seen": 7901376, + "step": 8235 + }, + { + "epoch": 0.6992532247114732, + "grad_norm": 0.7577657699584961, + "learning_rate": 5.025676772322099e-07, + "loss": 0.0305, + "num_input_tokens_seen": 7906368, + "step": 8240 + }, + { + "epoch": 0.6996775288526816, + "grad_norm": 12.771099090576172, + "learning_rate": 5.012832915469207e-07, + "loss": 0.0503, + "num_input_tokens_seen": 7911232, + "step": 8245 + }, + { + "epoch": 0.7001018329938901, + "grad_norm": 15.256807327270508, + "learning_rate": 5.000000000000002e-07, + "loss": 0.0654, + "num_input_tokens_seen": 7915776, + "step": 8250 + }, + { + "epoch": 0.7005261371350985, + "grad_norm": 18.504268646240234, + "learning_rate": 4.987178054068699e-07, + "loss": 0.0226, + "num_input_tokens_seen": 7920192, + "step": 8255 + }, + { + "epoch": 0.7009504412763069, + "grad_norm": 0.9049757719039917, + "learning_rate": 4.97436710580547e-07, + "loss": 0.0633, + "num_input_tokens_seen": 7924800, + "step": 8260 + }, + { + "epoch": 0.7009504412763069, + "eval_loss": 0.0545908585190773, + "eval_runtime": 15.9241, + "eval_samples_per_second": 657.806, + "eval_steps_per_second": 82.265, + "num_input_tokens_seen": 7924800, + "step": 8260 + }, + { + "epoch": 0.7013747454175153, + "grad_norm": 0.3657372295856476, + "learning_rate": 4.961567183316348e-07, + "loss": 0.0334, + "num_input_tokens_seen": 7929664, + "step": 8265 + }, + { + "epoch": 0.7017990495587237, + "grad_norm": 19.398540496826172, + "learning_rate": 4.948778314683183e-07, + "loss": 0.1168, + "num_input_tokens_seen": 7934464, + "step": 8270 + }, + { + "epoch": 0.7022233536999322, + "grad_norm": 44.40122604370117, + "learning_rate": 4.93600052796357e-07, + "loss": 0.0608, + "num_input_tokens_seen": 7939264, + "step": 8275 + }, + { + "epoch": 0.7026476578411406, + "grad_norm": 28.30301856994629, + "learning_rate": 4.923233851190794e-07, + "loss": 0.0751, + "num_input_tokens_seen": 7943552, + "step": 8280 + }, + { + "epoch": 0.703071961982349, + "grad_norm": 2.000850200653076, + "learning_rate": 4.910478312373756e-07, + "loss": 0.0151, + "num_input_tokens_seen": 7948032, + "step": 8285 + }, + { + "epoch": 0.7034962661235573, + "grad_norm": 0.9129754900932312, + "learning_rate": 4.897733939496942e-07, + "loss": 0.0642, + "num_input_tokens_seen": 7953408, + "step": 8290 + }, + { + "epoch": 0.7039205702647657, + "grad_norm": 0.4381353557109833, + "learning_rate": 4.885000760520317e-07, + "loss": 0.0675, + "num_input_tokens_seen": 7957760, + "step": 8295 + }, + { + "epoch": 0.7043448744059742, + "grad_norm": 15.017487525939941, + "learning_rate": 4.872278803379299e-07, + "loss": 0.035, + "num_input_tokens_seen": 7963712, + "step": 8300 + }, + { + "epoch": 0.7047691785471826, + "grad_norm": 1.7986425161361694, + "learning_rate": 4.8595680959847e-07, + "loss": 0.0596, + "num_input_tokens_seen": 7968576, + "step": 8305 + }, + { + "epoch": 0.705193482688391, + "grad_norm": 8.05062484741211, + "learning_rate": 4.846868666222622e-07, + "loss": 0.0847, + "num_input_tokens_seen": 7973184, + "step": 8310 + }, + { + "epoch": 0.7056177868295994, + "grad_norm": 0.7912288308143616, + "learning_rate": 4.834180541954447e-07, + "loss": 0.0375, + "num_input_tokens_seen": 7977792, + "step": 8315 + }, + { + "epoch": 0.7060420909708078, + "grad_norm": 0.472023606300354, + "learning_rate": 4.821503751016745e-07, + "loss": 0.0239, + "num_input_tokens_seen": 7981824, + "step": 8320 + }, + { + "epoch": 0.7064663951120163, + "grad_norm": 2.4885144233703613, + "learning_rate": 4.808838321221226e-07, + "loss": 0.0527, + "num_input_tokens_seen": 7986112, + "step": 8325 + }, + { + "epoch": 0.7068906992532247, + "grad_norm": 7.7319536209106445, + "learning_rate": 4.79618428035467e-07, + "loss": 0.1099, + "num_input_tokens_seen": 7990592, + "step": 8330 + }, + { + "epoch": 0.7073150033944331, + "grad_norm": 1.5436960458755493, + "learning_rate": 4.78354165617888e-07, + "loss": 0.1138, + "num_input_tokens_seen": 7995136, + "step": 8335 + }, + { + "epoch": 0.7077393075356415, + "grad_norm": 10.343153953552246, + "learning_rate": 4.77091047643059e-07, + "loss": 0.0542, + "num_input_tokens_seen": 7999872, + "step": 8340 + }, + { + "epoch": 0.7081636116768499, + "grad_norm": 40.822261810302734, + "learning_rate": 4.7582907688214593e-07, + "loss": 0.0385, + "num_input_tokens_seen": 8004608, + "step": 8345 + }, + { + "epoch": 0.7085879158180584, + "grad_norm": 28.6870174407959, + "learning_rate": 4.745682561037947e-07, + "loss": 0.0206, + "num_input_tokens_seen": 8009024, + "step": 8350 + }, + { + "epoch": 0.7090122199592668, + "grad_norm": 14.473899841308594, + "learning_rate": 4.733085880741301e-07, + "loss": 0.0727, + "num_input_tokens_seen": 8013504, + "step": 8355 + }, + { + "epoch": 0.7094365241004752, + "grad_norm": 7.182537078857422, + "learning_rate": 4.7205007555674714e-07, + "loss": 0.0952, + "num_input_tokens_seen": 8018432, + "step": 8360 + }, + { + "epoch": 0.7098608282416836, + "grad_norm": 0.05755231902003288, + "learning_rate": 4.707927213127062e-07, + "loss": 0.0025, + "num_input_tokens_seen": 8022848, + "step": 8365 + }, + { + "epoch": 0.710285132382892, + "grad_norm": 6.39005184173584, + "learning_rate": 4.6953652810052615e-07, + "loss": 0.0645, + "num_input_tokens_seen": 8027392, + "step": 8370 + }, + { + "epoch": 0.7107094365241005, + "grad_norm": 12.84408187866211, + "learning_rate": 4.682814986761792e-07, + "loss": 0.154, + "num_input_tokens_seen": 8032640, + "step": 8375 + }, + { + "epoch": 0.7111337406653089, + "grad_norm": 0.5624406337738037, + "learning_rate": 4.670276357930828e-07, + "loss": 0.0511, + "num_input_tokens_seen": 8037312, + "step": 8380 + }, + { + "epoch": 0.7115580448065173, + "grad_norm": 8.612873077392578, + "learning_rate": 4.657749422020979e-07, + "loss": 0.1108, + "num_input_tokens_seen": 8042240, + "step": 8385 + }, + { + "epoch": 0.7119823489477257, + "grad_norm": 0.5993130207061768, + "learning_rate": 4.6452342065151704e-07, + "loss": 0.0449, + "num_input_tokens_seen": 8047360, + "step": 8390 + }, + { + "epoch": 0.7124066530889341, + "grad_norm": 16.80901527404785, + "learning_rate": 4.632730738870634e-07, + "loss": 0.0763, + "num_input_tokens_seen": 8052096, + "step": 8395 + }, + { + "epoch": 0.7128309572301426, + "grad_norm": 20.762821197509766, + "learning_rate": 4.6202390465188225e-07, + "loss": 0.0305, + "num_input_tokens_seen": 8056960, + "step": 8400 + }, + { + "epoch": 0.713255261371351, + "grad_norm": 37.5723876953125, + "learning_rate": 4.6077591568653575e-07, + "loss": 0.0582, + "num_input_tokens_seen": 8061568, + "step": 8405 + }, + { + "epoch": 0.7136795655125594, + "grad_norm": 21.374719619750977, + "learning_rate": 4.595291097289952e-07, + "loss": 0.0432, + "num_input_tokens_seen": 8066624, + "step": 8410 + }, + { + "epoch": 0.7141038696537678, + "grad_norm": 16.342044830322266, + "learning_rate": 4.582834895146391e-07, + "loss": 0.0634, + "num_input_tokens_seen": 8071680, + "step": 8415 + }, + { + "epoch": 0.7145281737949762, + "grad_norm": 7.064672470092773, + "learning_rate": 4.5703905777624184e-07, + "loss": 0.061, + "num_input_tokens_seen": 8076480, + "step": 8420 + }, + { + "epoch": 0.7149524779361847, + "grad_norm": 0.5178303718566895, + "learning_rate": 4.5579581724397255e-07, + "loss": 0.0268, + "num_input_tokens_seen": 8080768, + "step": 8425 + }, + { + "epoch": 0.7153767820773931, + "grad_norm": 1.711916446685791, + "learning_rate": 4.5455377064538603e-07, + "loss": 0.0509, + "num_input_tokens_seen": 8085248, + "step": 8430 + }, + { + "epoch": 0.7158010862186015, + "grad_norm": 17.0489444732666, + "learning_rate": 4.533129207054167e-07, + "loss": 0.0485, + "num_input_tokens_seen": 8090176, + "step": 8435 + }, + { + "epoch": 0.7162253903598099, + "grad_norm": 4.791530132293701, + "learning_rate": 4.520732701463762e-07, + "loss": 0.0764, + "num_input_tokens_seen": 8094464, + "step": 8440 + }, + { + "epoch": 0.7166496945010183, + "grad_norm": 0.5642265677452087, + "learning_rate": 4.508348216879421e-07, + "loss": 0.1021, + "num_input_tokens_seen": 8099008, + "step": 8445 + }, + { + "epoch": 0.7170739986422268, + "grad_norm": 6.994995594024658, + "learning_rate": 4.4959757804715613e-07, + "loss": 0.0745, + "num_input_tokens_seen": 8104256, + "step": 8450 + }, + { + "epoch": 0.7174983027834352, + "grad_norm": 19.228673934936523, + "learning_rate": 4.483615419384167e-07, + "loss": 0.0703, + "num_input_tokens_seen": 8109056, + "step": 8455 + }, + { + "epoch": 0.7179226069246436, + "grad_norm": 0.37509685754776, + "learning_rate": 4.4712671607347307e-07, + "loss": 0.1058, + "num_input_tokens_seen": 8114176, + "step": 8460 + }, + { + "epoch": 0.718346911065852, + "grad_norm": 5.339738845825195, + "learning_rate": 4.458931031614179e-07, + "loss": 0.0812, + "num_input_tokens_seen": 8119232, + "step": 8465 + }, + { + "epoch": 0.7187712152070604, + "grad_norm": 1.2560439109802246, + "learning_rate": 4.4466070590868543e-07, + "loss": 0.0612, + "num_input_tokens_seen": 8123840, + "step": 8470 + }, + { + "epoch": 0.7191955193482689, + "grad_norm": 5.405581474304199, + "learning_rate": 4.434295270190402e-07, + "loss": 0.1036, + "num_input_tokens_seen": 8128256, + "step": 8475 + }, + { + "epoch": 0.7196198234894773, + "grad_norm": 16.936054229736328, + "learning_rate": 4.4219956919357546e-07, + "loss": 0.0321, + "num_input_tokens_seen": 8133504, + "step": 8480 + }, + { + "epoch": 0.7200441276306857, + "grad_norm": 0.13875967264175415, + "learning_rate": 4.409708351307049e-07, + "loss": 0.0123, + "num_input_tokens_seen": 8138176, + "step": 8485 + }, + { + "epoch": 0.7204684317718941, + "grad_norm": 14.88166332244873, + "learning_rate": 4.3974332752615727e-07, + "loss": 0.0502, + "num_input_tokens_seen": 8143104, + "step": 8490 + }, + { + "epoch": 0.7208927359131025, + "grad_norm": 4.445859909057617, + "learning_rate": 4.385170490729712e-07, + "loss": 0.028, + "num_input_tokens_seen": 8147584, + "step": 8495 + }, + { + "epoch": 0.721317040054311, + "grad_norm": 24.310640335083008, + "learning_rate": 4.3729200246148835e-07, + "loss": 0.1012, + "num_input_tokens_seen": 8152896, + "step": 8500 + }, + { + "epoch": 0.7217413441955194, + "grad_norm": 3.4741063117980957, + "learning_rate": 4.3606819037934673e-07, + "loss": 0.0263, + "num_input_tokens_seen": 8158400, + "step": 8505 + }, + { + "epoch": 0.7221656483367278, + "grad_norm": 21.058059692382812, + "learning_rate": 4.348456155114786e-07, + "loss": 0.0805, + "num_input_tokens_seen": 8163520, + "step": 8510 + }, + { + "epoch": 0.7225899524779362, + "grad_norm": 29.004924774169922, + "learning_rate": 4.336242805400989e-07, + "loss": 0.0636, + "num_input_tokens_seen": 8168256, + "step": 8515 + }, + { + "epoch": 0.7230142566191446, + "grad_norm": 1.2382115125656128, + "learning_rate": 4.324041881447041e-07, + "loss": 0.0225, + "num_input_tokens_seen": 8172928, + "step": 8520 + }, + { + "epoch": 0.7234385607603531, + "grad_norm": 14.450663566589355, + "learning_rate": 4.311853410020643e-07, + "loss": 0.033, + "num_input_tokens_seen": 8177728, + "step": 8525 + }, + { + "epoch": 0.7238628649015615, + "grad_norm": 10.671835899353027, + "learning_rate": 4.299677417862173e-07, + "loss": 0.0376, + "num_input_tokens_seen": 8182272, + "step": 8530 + }, + { + "epoch": 0.7242871690427699, + "grad_norm": 26.833974838256836, + "learning_rate": 4.287513931684634e-07, + "loss": 0.142, + "num_input_tokens_seen": 8187776, + "step": 8535 + }, + { + "epoch": 0.7247114731839783, + "grad_norm": 0.49240443110466003, + "learning_rate": 4.2753629781735936e-07, + "loss": 0.0742, + "num_input_tokens_seen": 8192960, + "step": 8540 + }, + { + "epoch": 0.7251357773251867, + "grad_norm": 29.694900512695312, + "learning_rate": 4.2632245839871095e-07, + "loss": 0.1659, + "num_input_tokens_seen": 8197824, + "step": 8545 + }, + { + "epoch": 0.7255600814663951, + "grad_norm": 42.07789611816406, + "learning_rate": 4.251098775755708e-07, + "loss": 0.0614, + "num_input_tokens_seen": 8202880, + "step": 8550 + }, + { + "epoch": 0.7259843856076035, + "grad_norm": 14.277400970458984, + "learning_rate": 4.238985580082293e-07, + "loss": 0.1057, + "num_input_tokens_seen": 8207552, + "step": 8555 + }, + { + "epoch": 0.7264086897488119, + "grad_norm": 7.6538543701171875, + "learning_rate": 4.2268850235420827e-07, + "loss": 0.0801, + "num_input_tokens_seen": 8212672, + "step": 8560 + }, + { + "epoch": 0.7268329938900203, + "grad_norm": 25.851024627685547, + "learning_rate": 4.214797132682596e-07, + "loss": 0.0461, + "num_input_tokens_seen": 8219392, + "step": 8565 + }, + { + "epoch": 0.7272572980312287, + "grad_norm": 13.57695198059082, + "learning_rate": 4.202721934023536e-07, + "loss": 0.0472, + "num_input_tokens_seen": 8224064, + "step": 8570 + }, + { + "epoch": 0.7276816021724372, + "grad_norm": 8.460223197937012, + "learning_rate": 4.19065945405678e-07, + "loss": 0.0636, + "num_input_tokens_seen": 8229376, + "step": 8575 + }, + { + "epoch": 0.7281059063136456, + "grad_norm": 0.40096619725227356, + "learning_rate": 4.17860971924629e-07, + "loss": 0.0224, + "num_input_tokens_seen": 8234816, + "step": 8580 + }, + { + "epoch": 0.728530210454854, + "grad_norm": 13.379546165466309, + "learning_rate": 4.166572756028073e-07, + "loss": 0.0674, + "num_input_tokens_seen": 8239552, + "step": 8585 + }, + { + "epoch": 0.7289545145960624, + "grad_norm": 10.900382995605469, + "learning_rate": 4.154548590810113e-07, + "loss": 0.0569, + "num_input_tokens_seen": 8243840, + "step": 8590 + }, + { + "epoch": 0.7293788187372708, + "grad_norm": 33.06897735595703, + "learning_rate": 4.14253724997232e-07, + "loss": 0.1121, + "num_input_tokens_seen": 8248960, + "step": 8595 + }, + { + "epoch": 0.7298031228784793, + "grad_norm": 5.541492938995361, + "learning_rate": 4.1305387598664567e-07, + "loss": 0.0483, + "num_input_tokens_seen": 8253888, + "step": 8600 + }, + { + "epoch": 0.7302274270196877, + "grad_norm": 24.621667861938477, + "learning_rate": 4.118553146816115e-07, + "loss": 0.0628, + "num_input_tokens_seen": 8258816, + "step": 8605 + }, + { + "epoch": 0.7306517311608961, + "grad_norm": 2.005286931991577, + "learning_rate": 4.1065804371166114e-07, + "loss": 0.0508, + "num_input_tokens_seen": 8263424, + "step": 8610 + }, + { + "epoch": 0.7310760353021045, + "grad_norm": 0.4178374111652374, + "learning_rate": 4.0946206570349685e-07, + "loss": 0.0554, + "num_input_tokens_seen": 8268288, + "step": 8615 + }, + { + "epoch": 0.7315003394433129, + "grad_norm": 19.408565521240234, + "learning_rate": 4.082673832809838e-07, + "loss": 0.0945, + "num_input_tokens_seen": 8273152, + "step": 8620 + }, + { + "epoch": 0.7319246435845214, + "grad_norm": 0.30382806062698364, + "learning_rate": 4.0707399906514483e-07, + "loss": 0.0166, + "num_input_tokens_seen": 8278016, + "step": 8625 + }, + { + "epoch": 0.7323489477257298, + "grad_norm": 6.678869724273682, + "learning_rate": 4.058819156741545e-07, + "loss": 0.0345, + "num_input_tokens_seen": 8283392, + "step": 8630 + }, + { + "epoch": 0.7327732518669382, + "grad_norm": 11.891843795776367, + "learning_rate": 4.0469113572333426e-07, + "loss": 0.0519, + "num_input_tokens_seen": 8288064, + "step": 8635 + }, + { + "epoch": 0.7331975560081466, + "grad_norm": 10.481492042541504, + "learning_rate": 4.03501661825144e-07, + "loss": 0.0699, + "num_input_tokens_seen": 8292672, + "step": 8640 + }, + { + "epoch": 0.733621860149355, + "grad_norm": 7.467172622680664, + "learning_rate": 4.023134965891809e-07, + "loss": 0.09, + "num_input_tokens_seen": 8297344, + "step": 8645 + }, + { + "epoch": 0.7340461642905635, + "grad_norm": 9.949556350708008, + "learning_rate": 4.0112664262216866e-07, + "loss": 0.0495, + "num_input_tokens_seen": 8302080, + "step": 8650 + }, + { + "epoch": 0.7344704684317719, + "grad_norm": 24.923635482788086, + "learning_rate": 3.9994110252795563e-07, + "loss": 0.1092, + "num_input_tokens_seen": 8307008, + "step": 8655 + }, + { + "epoch": 0.7348947725729803, + "grad_norm": 23.27560806274414, + "learning_rate": 3.987568789075072e-07, + "loss": 0.0748, + "num_input_tokens_seen": 8312064, + "step": 8660 + }, + { + "epoch": 0.7353190767141887, + "grad_norm": 0.12548640370368958, + "learning_rate": 3.975739743589004e-07, + "loss": 0.0264, + "num_input_tokens_seen": 8316480, + "step": 8665 + }, + { + "epoch": 0.7357433808553971, + "grad_norm": 8.573442459106445, + "learning_rate": 3.9639239147731864e-07, + "loss": 0.024, + "num_input_tokens_seen": 8321664, + "step": 8670 + }, + { + "epoch": 0.7361676849966056, + "grad_norm": 16.72869300842285, + "learning_rate": 3.952121328550455e-07, + "loss": 0.0706, + "num_input_tokens_seen": 8326016, + "step": 8675 + }, + { + "epoch": 0.736591989137814, + "grad_norm": 0.3787572383880615, + "learning_rate": 3.9403320108145943e-07, + "loss": 0.01, + "num_input_tokens_seen": 8330688, + "step": 8680 + }, + { + "epoch": 0.7370162932790224, + "grad_norm": 0.19815906882286072, + "learning_rate": 3.928555987430275e-07, + "loss": 0.0251, + "num_input_tokens_seen": 8335552, + "step": 8685 + }, + { + "epoch": 0.7374405974202308, + "grad_norm": 1.1737080812454224, + "learning_rate": 3.916793284233011e-07, + "loss": 0.0294, + "num_input_tokens_seen": 8340224, + "step": 8690 + }, + { + "epoch": 0.7378649015614392, + "grad_norm": 6.083102226257324, + "learning_rate": 3.9050439270290733e-07, + "loss": 0.0556, + "num_input_tokens_seen": 8345024, + "step": 8695 + }, + { + "epoch": 0.7382892057026477, + "grad_norm": 0.30222249031066895, + "learning_rate": 3.8933079415954805e-07, + "loss": 0.0438, + "num_input_tokens_seen": 8349632, + "step": 8700 + }, + { + "epoch": 0.7387135098438561, + "grad_norm": 0.32194453477859497, + "learning_rate": 3.8815853536798905e-07, + "loss": 0.0502, + "num_input_tokens_seen": 8354624, + "step": 8705 + }, + { + "epoch": 0.7391378139850645, + "grad_norm": 23.009239196777344, + "learning_rate": 3.8698761890005794e-07, + "loss": 0.0513, + "num_input_tokens_seen": 8359168, + "step": 8710 + }, + { + "epoch": 0.7395621181262729, + "grad_norm": 0.40400999784469604, + "learning_rate": 3.858180473246373e-07, + "loss": 0.0154, + "num_input_tokens_seen": 8363968, + "step": 8715 + }, + { + "epoch": 0.7399864222674813, + "grad_norm": 8.802319526672363, + "learning_rate": 3.8464982320765906e-07, + "loss": 0.0438, + "num_input_tokens_seen": 8369152, + "step": 8720 + }, + { + "epoch": 0.7404107264086898, + "grad_norm": 39.71036148071289, + "learning_rate": 3.834829491120991e-07, + "loss": 0.0391, + "num_input_tokens_seen": 8373952, + "step": 8725 + }, + { + "epoch": 0.7408350305498982, + "grad_norm": 20.570491790771484, + "learning_rate": 3.8231742759797157e-07, + "loss": 0.0883, + "num_input_tokens_seen": 8379456, + "step": 8730 + }, + { + "epoch": 0.7412593346911066, + "grad_norm": 16.419105529785156, + "learning_rate": 3.811532612223219e-07, + "loss": 0.0386, + "num_input_tokens_seen": 8383936, + "step": 8735 + }, + { + "epoch": 0.741683638832315, + "grad_norm": 14.511885643005371, + "learning_rate": 3.7999045253922504e-07, + "loss": 0.0699, + "num_input_tokens_seen": 8388864, + "step": 8740 + }, + { + "epoch": 0.7421079429735234, + "grad_norm": 30.785167694091797, + "learning_rate": 3.788290040997746e-07, + "loss": 0.0511, + "num_input_tokens_seen": 8393920, + "step": 8745 + }, + { + "epoch": 0.7425322471147319, + "grad_norm": 1.0883046388626099, + "learning_rate": 3.776689184520815e-07, + "loss": 0.0927, + "num_input_tokens_seen": 8398848, + "step": 8750 + }, + { + "epoch": 0.7429565512559403, + "grad_norm": 26.308425903320312, + "learning_rate": 3.765101981412665e-07, + "loss": 0.1256, + "num_input_tokens_seen": 8404160, + "step": 8755 + }, + { + "epoch": 0.7433808553971487, + "grad_norm": 10.920520782470703, + "learning_rate": 3.753528457094548e-07, + "loss": 0.0433, + "num_input_tokens_seen": 8408896, + "step": 8760 + }, + { + "epoch": 0.7438051595383571, + "grad_norm": 14.321868896484375, + "learning_rate": 3.7419686369577076e-07, + "loss": 0.1051, + "num_input_tokens_seen": 8413312, + "step": 8765 + }, + { + "epoch": 0.7442294636795656, + "grad_norm": 0.10328897833824158, + "learning_rate": 3.730422546363323e-07, + "loss": 0.0251, + "num_input_tokens_seen": 8417920, + "step": 8770 + }, + { + "epoch": 0.744653767820774, + "grad_norm": 22.444488525390625, + "learning_rate": 3.7188902106424414e-07, + "loss": 0.068, + "num_input_tokens_seen": 8422720, + "step": 8775 + }, + { + "epoch": 0.7450780719619824, + "grad_norm": 0.36431556940078735, + "learning_rate": 3.7073716550959533e-07, + "loss": 0.0595, + "num_input_tokens_seen": 8426944, + "step": 8780 + }, + { + "epoch": 0.7455023761031908, + "grad_norm": 16.112642288208008, + "learning_rate": 3.6958669049944956e-07, + "loss": 0.0491, + "num_input_tokens_seen": 8431104, + "step": 8785 + }, + { + "epoch": 0.7459266802443992, + "grad_norm": 15.997936248779297, + "learning_rate": 3.684375985578431e-07, + "loss": 0.0572, + "num_input_tokens_seen": 8435520, + "step": 8790 + }, + { + "epoch": 0.7463509843856077, + "grad_norm": 2.1923623085021973, + "learning_rate": 3.672898922057773e-07, + "loss": 0.0451, + "num_input_tokens_seen": 8441152, + "step": 8795 + }, + { + "epoch": 0.7467752885268161, + "grad_norm": 21.68796157836914, + "learning_rate": 3.66143573961214e-07, + "loss": 0.0423, + "num_input_tokens_seen": 8445824, + "step": 8800 + }, + { + "epoch": 0.7471995926680245, + "grad_norm": 0.1858411580324173, + "learning_rate": 3.649986463390694e-07, + "loss": 0.0749, + "num_input_tokens_seen": 8450752, + "step": 8805 + }, + { + "epoch": 0.7476238968092329, + "grad_norm": 13.372088432312012, + "learning_rate": 3.6385511185120885e-07, + "loss": 0.0542, + "num_input_tokens_seen": 8455296, + "step": 8810 + }, + { + "epoch": 0.7480482009504412, + "grad_norm": 5.704926013946533, + "learning_rate": 3.6271297300644156e-07, + "loss": 0.0453, + "num_input_tokens_seen": 8460480, + "step": 8815 + }, + { + "epoch": 0.7484725050916496, + "grad_norm": 0.219235360622406, + "learning_rate": 3.6157223231051426e-07, + "loss": 0.0876, + "num_input_tokens_seen": 8465152, + "step": 8820 + }, + { + "epoch": 0.7488968092328581, + "grad_norm": 0.9952836632728577, + "learning_rate": 3.6043289226610717e-07, + "loss": 0.0494, + "num_input_tokens_seen": 8470144, + "step": 8825 + }, + { + "epoch": 0.7493211133740665, + "grad_norm": 21.607990264892578, + "learning_rate": 3.5929495537282596e-07, + "loss": 0.0376, + "num_input_tokens_seen": 8475840, + "step": 8830 + }, + { + "epoch": 0.7497454175152749, + "grad_norm": 0.37881892919540405, + "learning_rate": 3.5815842412720045e-07, + "loss": 0.0778, + "num_input_tokens_seen": 8480256, + "step": 8835 + }, + { + "epoch": 0.7501697216564833, + "grad_norm": 1.4716641902923584, + "learning_rate": 3.57023301022674e-07, + "loss": 0.0507, + "num_input_tokens_seen": 8485568, + "step": 8840 + }, + { + "epoch": 0.7505940257976917, + "grad_norm": 16.42884063720703, + "learning_rate": 3.558895885496023e-07, + "loss": 0.0751, + "num_input_tokens_seen": 8490048, + "step": 8845 + }, + { + "epoch": 0.7510183299389002, + "grad_norm": 0.9709210991859436, + "learning_rate": 3.547572891952456e-07, + "loss": 0.0632, + "num_input_tokens_seen": 8494720, + "step": 8850 + }, + { + "epoch": 0.7510183299389002, + "eval_loss": 0.05398377776145935, + "eval_runtime": 15.752, + "eval_samples_per_second": 664.994, + "eval_steps_per_second": 83.164, + "num_input_tokens_seen": 8494720, + "step": 8850 + }, + { + "epoch": 0.7514426340801086, + "grad_norm": 7.805334091186523, + "learning_rate": 3.536264054437641e-07, + "loss": 0.0387, + "num_input_tokens_seen": 8499392, + "step": 8855 + }, + { + "epoch": 0.751866938221317, + "grad_norm": 20.84987449645996, + "learning_rate": 3.524969397762122e-07, + "loss": 0.0444, + "num_input_tokens_seen": 8504256, + "step": 8860 + }, + { + "epoch": 0.7522912423625254, + "grad_norm": 25.035362243652344, + "learning_rate": 3.5136889467053353e-07, + "loss": 0.074, + "num_input_tokens_seen": 8508864, + "step": 8865 + }, + { + "epoch": 0.7527155465037338, + "grad_norm": 13.157938957214355, + "learning_rate": 3.5024227260155383e-07, + "loss": 0.0805, + "num_input_tokens_seen": 8514048, + "step": 8870 + }, + { + "epoch": 0.7531398506449423, + "grad_norm": 10.362030982971191, + "learning_rate": 3.4911707604097916e-07, + "loss": 0.0212, + "num_input_tokens_seen": 8518656, + "step": 8875 + }, + { + "epoch": 0.7535641547861507, + "grad_norm": 28.8228816986084, + "learning_rate": 3.4799330745738573e-07, + "loss": 0.0455, + "num_input_tokens_seen": 8523520, + "step": 8880 + }, + { + "epoch": 0.7539884589273591, + "grad_norm": 12.480382919311523, + "learning_rate": 3.468709693162183e-07, + "loss": 0.1758, + "num_input_tokens_seen": 8528320, + "step": 8885 + }, + { + "epoch": 0.7544127630685675, + "grad_norm": 15.432955741882324, + "learning_rate": 3.4575006407978304e-07, + "loss": 0.0379, + "num_input_tokens_seen": 8533184, + "step": 8890 + }, + { + "epoch": 0.754837067209776, + "grad_norm": 21.28765106201172, + "learning_rate": 3.446305942072425e-07, + "loss": 0.0292, + "num_input_tokens_seen": 8537536, + "step": 8895 + }, + { + "epoch": 0.7552613713509844, + "grad_norm": 23.919931411743164, + "learning_rate": 3.4351256215461e-07, + "loss": 0.0689, + "num_input_tokens_seen": 8541696, + "step": 8900 + }, + { + "epoch": 0.7556856754921928, + "grad_norm": 10.663202285766602, + "learning_rate": 3.423959703747449e-07, + "loss": 0.0226, + "num_input_tokens_seen": 8546176, + "step": 8905 + }, + { + "epoch": 0.7561099796334012, + "grad_norm": 0.2008209079504013, + "learning_rate": 3.4128082131734535e-07, + "loss": 0.0418, + "num_input_tokens_seen": 8550592, + "step": 8910 + }, + { + "epoch": 0.7565342837746096, + "grad_norm": 0.5570741891860962, + "learning_rate": 3.401671174289469e-07, + "loss": 0.0512, + "num_input_tokens_seen": 8555648, + "step": 8915 + }, + { + "epoch": 0.756958587915818, + "grad_norm": 15.728958129882812, + "learning_rate": 3.390548611529116e-07, + "loss": 0.0859, + "num_input_tokens_seen": 8560640, + "step": 8920 + }, + { + "epoch": 0.7573828920570265, + "grad_norm": 7.811544418334961, + "learning_rate": 3.3794405492942713e-07, + "loss": 0.0721, + "num_input_tokens_seen": 8565376, + "step": 8925 + }, + { + "epoch": 0.7578071961982349, + "grad_norm": 15.441573143005371, + "learning_rate": 3.368347011955006e-07, + "loss": 0.0562, + "num_input_tokens_seen": 8569728, + "step": 8930 + }, + { + "epoch": 0.7582315003394433, + "grad_norm": 42.045528411865234, + "learning_rate": 3.3572680238495064e-07, + "loss": 0.0676, + "num_input_tokens_seen": 8574656, + "step": 8935 + }, + { + "epoch": 0.7586558044806517, + "grad_norm": 0.640474796295166, + "learning_rate": 3.346203609284053e-07, + "loss": 0.0675, + "num_input_tokens_seen": 8579072, + "step": 8940 + }, + { + "epoch": 0.7590801086218602, + "grad_norm": 13.068296432495117, + "learning_rate": 3.335153792532945e-07, + "loss": 0.0307, + "num_input_tokens_seen": 8584000, + "step": 8945 + }, + { + "epoch": 0.7595044127630686, + "grad_norm": 9.666223526000977, + "learning_rate": 3.324118597838463e-07, + "loss": 0.0874, + "num_input_tokens_seen": 8589248, + "step": 8950 + }, + { + "epoch": 0.759928716904277, + "grad_norm": 0.12405683100223541, + "learning_rate": 3.313098049410803e-07, + "loss": 0.0565, + "num_input_tokens_seen": 8593472, + "step": 8955 + }, + { + "epoch": 0.7603530210454854, + "grad_norm": 0.11512839794158936, + "learning_rate": 3.3020921714280325e-07, + "loss": 0.0134, + "num_input_tokens_seen": 8597952, + "step": 8960 + }, + { + "epoch": 0.7607773251866938, + "grad_norm": 2.1277525424957275, + "learning_rate": 3.291100988036022e-07, + "loss": 0.0331, + "num_input_tokens_seen": 8602816, + "step": 8965 + }, + { + "epoch": 0.7612016293279023, + "grad_norm": 0.37769949436187744, + "learning_rate": 3.280124523348423e-07, + "loss": 0.0146, + "num_input_tokens_seen": 8608256, + "step": 8970 + }, + { + "epoch": 0.7616259334691107, + "grad_norm": 31.548748016357422, + "learning_rate": 3.269162801446578e-07, + "loss": 0.0495, + "num_input_tokens_seen": 8612928, + "step": 8975 + }, + { + "epoch": 0.7620502376103191, + "grad_norm": 0.18137019872665405, + "learning_rate": 3.258215846379492e-07, + "loss": 0.1099, + "num_input_tokens_seen": 8617280, + "step": 8980 + }, + { + "epoch": 0.7624745417515275, + "grad_norm": 0.3657650649547577, + "learning_rate": 3.247283682163774e-07, + "loss": 0.0041, + "num_input_tokens_seen": 8621952, + "step": 8985 + }, + { + "epoch": 0.7628988458927359, + "grad_norm": 1.3342311382293701, + "learning_rate": 3.2363663327835855e-07, + "loss": 0.0261, + "num_input_tokens_seen": 8627136, + "step": 8990 + }, + { + "epoch": 0.7633231500339444, + "grad_norm": 0.06417883932590485, + "learning_rate": 3.2254638221905716e-07, + "loss": 0.0975, + "num_input_tokens_seen": 8631872, + "step": 8995 + }, + { + "epoch": 0.7637474541751528, + "grad_norm": 25.41999626159668, + "learning_rate": 3.214576174303846e-07, + "loss": 0.0871, + "num_input_tokens_seen": 8636864, + "step": 9000 + }, + { + "epoch": 0.7641717583163612, + "grad_norm": 1.317375898361206, + "learning_rate": 3.2037034130098905e-07, + "loss": 0.033, + "num_input_tokens_seen": 8641792, + "step": 9005 + }, + { + "epoch": 0.7645960624575696, + "grad_norm": 6.363920211791992, + "learning_rate": 3.192845562162549e-07, + "loss": 0.0412, + "num_input_tokens_seen": 8646400, + "step": 9010 + }, + { + "epoch": 0.765020366598778, + "grad_norm": 6.3293867111206055, + "learning_rate": 3.1820026455829353e-07, + "loss": 0.0528, + "num_input_tokens_seen": 8650944, + "step": 9015 + }, + { + "epoch": 0.7654446707399865, + "grad_norm": 0.2312636822462082, + "learning_rate": 3.171174687059408e-07, + "loss": 0.0714, + "num_input_tokens_seen": 8656704, + "step": 9020 + }, + { + "epoch": 0.7658689748811949, + "grad_norm": 14.677282333374023, + "learning_rate": 3.160361710347508e-07, + "loss": 0.0878, + "num_input_tokens_seen": 8661120, + "step": 9025 + }, + { + "epoch": 0.7662932790224033, + "grad_norm": 0.961157500743866, + "learning_rate": 3.14956373916991e-07, + "loss": 0.0299, + "num_input_tokens_seen": 8665664, + "step": 9030 + }, + { + "epoch": 0.7667175831636117, + "grad_norm": 22.225244522094727, + "learning_rate": 3.138780797216356e-07, + "loss": 0.0576, + "num_input_tokens_seen": 8670592, + "step": 9035 + }, + { + "epoch": 0.7671418873048201, + "grad_norm": 7.560973644256592, + "learning_rate": 3.128012908143636e-07, + "loss": 0.0568, + "num_input_tokens_seen": 8677120, + "step": 9040 + }, + { + "epoch": 0.7675661914460286, + "grad_norm": 1.7268146276474, + "learning_rate": 3.1172600955754935e-07, + "loss": 0.0727, + "num_input_tokens_seen": 8682176, + "step": 9045 + }, + { + "epoch": 0.767990495587237, + "grad_norm": 5.31557559967041, + "learning_rate": 3.1065223831026066e-07, + "loss": 0.0849, + "num_input_tokens_seen": 8686976, + "step": 9050 + }, + { + "epoch": 0.7684147997284454, + "grad_norm": 51.82563781738281, + "learning_rate": 3.095799794282533e-07, + "loss": 0.0669, + "num_input_tokens_seen": 8691904, + "step": 9055 + }, + { + "epoch": 0.7688391038696538, + "grad_norm": 6.468626022338867, + "learning_rate": 3.0850923526396334e-07, + "loss": 0.0465, + "num_input_tokens_seen": 8700928, + "step": 9060 + }, + { + "epoch": 0.7692634080108622, + "grad_norm": 26.497516632080078, + "learning_rate": 3.0744000816650464e-07, + "loss": 0.0227, + "num_input_tokens_seen": 8705344, + "step": 9065 + }, + { + "epoch": 0.7696877121520707, + "grad_norm": 3.8520402908325195, + "learning_rate": 3.0637230048166263e-07, + "loss": 0.0562, + "num_input_tokens_seen": 8710784, + "step": 9070 + }, + { + "epoch": 0.770112016293279, + "grad_norm": 6.1381731033325195, + "learning_rate": 3.0530611455188946e-07, + "loss": 0.0205, + "num_input_tokens_seen": 8716032, + "step": 9075 + }, + { + "epoch": 0.7705363204344874, + "grad_norm": 11.487967491149902, + "learning_rate": 3.0424145271629844e-07, + "loss": 0.0332, + "num_input_tokens_seen": 8721088, + "step": 9080 + }, + { + "epoch": 0.7709606245756958, + "grad_norm": 0.8325733542442322, + "learning_rate": 3.031783173106596e-07, + "loss": 0.0212, + "num_input_tokens_seen": 8725632, + "step": 9085 + }, + { + "epoch": 0.7713849287169042, + "grad_norm": 22.9674072265625, + "learning_rate": 3.0211671066739276e-07, + "loss": 0.0318, + "num_input_tokens_seen": 8730560, + "step": 9090 + }, + { + "epoch": 0.7718092328581126, + "grad_norm": 4.438326835632324, + "learning_rate": 3.01056635115566e-07, + "loss": 0.0695, + "num_input_tokens_seen": 8736640, + "step": 9095 + }, + { + "epoch": 0.7722335369993211, + "grad_norm": 0.9272420406341553, + "learning_rate": 2.999980929808863e-07, + "loss": 0.0152, + "num_input_tokens_seen": 8741248, + "step": 9100 + }, + { + "epoch": 0.7726578411405295, + "grad_norm": 8.21893310546875, + "learning_rate": 2.989410865856975e-07, + "loss": 0.0392, + "num_input_tokens_seen": 8745856, + "step": 9105 + }, + { + "epoch": 0.7730821452817379, + "grad_norm": 18.906034469604492, + "learning_rate": 2.9788561824897397e-07, + "loss": 0.1469, + "num_input_tokens_seen": 8750016, + "step": 9110 + }, + { + "epoch": 0.7735064494229463, + "grad_norm": 31.49846839904785, + "learning_rate": 2.968316902863157e-07, + "loss": 0.0212, + "num_input_tokens_seen": 8754944, + "step": 9115 + }, + { + "epoch": 0.7739307535641547, + "grad_norm": 15.8748779296875, + "learning_rate": 2.957793050099433e-07, + "loss": 0.0967, + "num_input_tokens_seen": 8759488, + "step": 9120 + }, + { + "epoch": 0.7743550577053632, + "grad_norm": 3.842491388320923, + "learning_rate": 2.9472846472869295e-07, + "loss": 0.0815, + "num_input_tokens_seen": 8763840, + "step": 9125 + }, + { + "epoch": 0.7747793618465716, + "grad_norm": 0.13100187480449677, + "learning_rate": 2.936791717480104e-07, + "loss": 0.0108, + "num_input_tokens_seen": 8768320, + "step": 9130 + }, + { + "epoch": 0.77520366598778, + "grad_norm": 8.87035083770752, + "learning_rate": 2.9263142836994845e-07, + "loss": 0.0252, + "num_input_tokens_seen": 8773568, + "step": 9135 + }, + { + "epoch": 0.7756279701289884, + "grad_norm": 7.818394660949707, + "learning_rate": 2.915852368931585e-07, + "loss": 0.0947, + "num_input_tokens_seen": 8778176, + "step": 9140 + }, + { + "epoch": 0.7760522742701969, + "grad_norm": 50.66056823730469, + "learning_rate": 2.905405996128882e-07, + "loss": 0.111, + "num_input_tokens_seen": 8782784, + "step": 9145 + }, + { + "epoch": 0.7764765784114053, + "grad_norm": 0.7679608464241028, + "learning_rate": 2.894975188209754e-07, + "loss": 0.0308, + "num_input_tokens_seen": 8787456, + "step": 9150 + }, + { + "epoch": 0.7769008825526137, + "grad_norm": 18.441892623901367, + "learning_rate": 2.8845599680584265e-07, + "loss": 0.0682, + "num_input_tokens_seen": 8792256, + "step": 9155 + }, + { + "epoch": 0.7773251866938221, + "grad_norm": 15.792590141296387, + "learning_rate": 2.8741603585249306e-07, + "loss": 0.1251, + "num_input_tokens_seen": 8797568, + "step": 9160 + }, + { + "epoch": 0.7777494908350305, + "grad_norm": 0.1300697922706604, + "learning_rate": 2.8637763824250507e-07, + "loss": 0.0504, + "num_input_tokens_seen": 8802112, + "step": 9165 + }, + { + "epoch": 0.778173794976239, + "grad_norm": 0.6240066289901733, + "learning_rate": 2.8534080625402677e-07, + "loss": 0.0153, + "num_input_tokens_seen": 8806784, + "step": 9170 + }, + { + "epoch": 0.7785980991174474, + "grad_norm": 26.65509033203125, + "learning_rate": 2.8430554216177203e-07, + "loss": 0.0108, + "num_input_tokens_seen": 8812096, + "step": 9175 + }, + { + "epoch": 0.7790224032586558, + "grad_norm": 21.93799591064453, + "learning_rate": 2.8327184823701464e-07, + "loss": 0.11, + "num_input_tokens_seen": 8817024, + "step": 9180 + }, + { + "epoch": 0.7794467073998642, + "grad_norm": 35.96677780151367, + "learning_rate": 2.822397267475827e-07, + "loss": 0.1664, + "num_input_tokens_seen": 8821696, + "step": 9185 + }, + { + "epoch": 0.7798710115410726, + "grad_norm": 28.536968231201172, + "learning_rate": 2.812091799578566e-07, + "loss": 0.1155, + "num_input_tokens_seen": 8826560, + "step": 9190 + }, + { + "epoch": 0.780295315682281, + "grad_norm": 0.38851362466812134, + "learning_rate": 2.8018021012875995e-07, + "loss": 0.0301, + "num_input_tokens_seen": 8831552, + "step": 9195 + }, + { + "epoch": 0.7807196198234895, + "grad_norm": 17.1531925201416, + "learning_rate": 2.791528195177576e-07, + "loss": 0.0673, + "num_input_tokens_seen": 8836480, + "step": 9200 + }, + { + "epoch": 0.7811439239646979, + "grad_norm": 13.413601875305176, + "learning_rate": 2.7812701037884964e-07, + "loss": 0.0681, + "num_input_tokens_seen": 8840832, + "step": 9205 + }, + { + "epoch": 0.7815682281059063, + "grad_norm": 24.447235107421875, + "learning_rate": 2.7710278496256665e-07, + "loss": 0.0717, + "num_input_tokens_seen": 8845376, + "step": 9210 + }, + { + "epoch": 0.7819925322471147, + "grad_norm": 0.10010475665330887, + "learning_rate": 2.7608014551596437e-07, + "loss": 0.0424, + "num_input_tokens_seen": 8850176, + "step": 9215 + }, + { + "epoch": 0.7824168363883232, + "grad_norm": 2.201000452041626, + "learning_rate": 2.7505909428261956e-07, + "loss": 0.0697, + "num_input_tokens_seen": 8854976, + "step": 9220 + }, + { + "epoch": 0.7828411405295316, + "grad_norm": 19.673593521118164, + "learning_rate": 2.740396335026234e-07, + "loss": 0.0415, + "num_input_tokens_seen": 8859392, + "step": 9225 + }, + { + "epoch": 0.78326544467074, + "grad_norm": 0.5144276022911072, + "learning_rate": 2.7302176541257984e-07, + "loss": 0.0155, + "num_input_tokens_seen": 8863936, + "step": 9230 + }, + { + "epoch": 0.7836897488119484, + "grad_norm": 0.5424436330795288, + "learning_rate": 2.720054922455964e-07, + "loss": 0.1092, + "num_input_tokens_seen": 8868480, + "step": 9235 + }, + { + "epoch": 0.7841140529531568, + "grad_norm": 53.60285949707031, + "learning_rate": 2.7099081623128294e-07, + "loss": 0.0854, + "num_input_tokens_seen": 8873344, + "step": 9240 + }, + { + "epoch": 0.7845383570943653, + "grad_norm": 30.46038818359375, + "learning_rate": 2.699777395957449e-07, + "loss": 0.0661, + "num_input_tokens_seen": 8878016, + "step": 9245 + }, + { + "epoch": 0.7849626612355737, + "grad_norm": 9.4970703125, + "learning_rate": 2.6896626456157846e-07, + "loss": 0.1429, + "num_input_tokens_seen": 8882816, + "step": 9250 + }, + { + "epoch": 0.7853869653767821, + "grad_norm": 0.3060969412326813, + "learning_rate": 2.679563933478667e-07, + "loss": 0.0025, + "num_input_tokens_seen": 8887936, + "step": 9255 + }, + { + "epoch": 0.7858112695179905, + "grad_norm": 8.793341636657715, + "learning_rate": 2.6694812817017387e-07, + "loss": 0.0219, + "num_input_tokens_seen": 8892672, + "step": 9260 + }, + { + "epoch": 0.7862355736591989, + "grad_norm": 17.539247512817383, + "learning_rate": 2.659414712405398e-07, + "loss": 0.053, + "num_input_tokens_seen": 8897152, + "step": 9265 + }, + { + "epoch": 0.7866598778004074, + "grad_norm": 0.20830750465393066, + "learning_rate": 2.649364247674779e-07, + "loss": 0.0312, + "num_input_tokens_seen": 8901504, + "step": 9270 + }, + { + "epoch": 0.7870841819416158, + "grad_norm": 12.591196060180664, + "learning_rate": 2.639329909559662e-07, + "loss": 0.073, + "num_input_tokens_seen": 8906176, + "step": 9275 + }, + { + "epoch": 0.7875084860828242, + "grad_norm": 0.34583133459091187, + "learning_rate": 2.6293117200744643e-07, + "loss": 0.0315, + "num_input_tokens_seen": 8910720, + "step": 9280 + }, + { + "epoch": 0.7879327902240326, + "grad_norm": 5.635802268981934, + "learning_rate": 2.6193097011981635e-07, + "loss": 0.0515, + "num_input_tokens_seen": 8915200, + "step": 9285 + }, + { + "epoch": 0.788357094365241, + "grad_norm": 15.817290306091309, + "learning_rate": 2.609323874874266e-07, + "loss": 0.0686, + "num_input_tokens_seen": 8920256, + "step": 9290 + }, + { + "epoch": 0.7887813985064495, + "grad_norm": 2.470198392868042, + "learning_rate": 2.5993542630107533e-07, + "loss": 0.0547, + "num_input_tokens_seen": 8924864, + "step": 9295 + }, + { + "epoch": 0.7892057026476579, + "grad_norm": 0.5217587351799011, + "learning_rate": 2.589400887480032e-07, + "loss": 0.0615, + "num_input_tokens_seen": 8929856, + "step": 9300 + }, + { + "epoch": 0.7896300067888663, + "grad_norm": 16.123640060424805, + "learning_rate": 2.579463770118887e-07, + "loss": 0.0191, + "num_input_tokens_seen": 8936640, + "step": 9305 + }, + { + "epoch": 0.7900543109300747, + "grad_norm": 0.8148235082626343, + "learning_rate": 2.569542932728436e-07, + "loss": 0.0344, + "num_input_tokens_seen": 8940992, + "step": 9310 + }, + { + "epoch": 0.7904786150712831, + "grad_norm": 2.461164712905884, + "learning_rate": 2.5596383970740833e-07, + "loss": 0.0789, + "num_input_tokens_seen": 8945664, + "step": 9315 + }, + { + "epoch": 0.7909029192124916, + "grad_norm": 10.289015769958496, + "learning_rate": 2.549750184885454e-07, + "loss": 0.1219, + "num_input_tokens_seen": 8950016, + "step": 9320 + }, + { + "epoch": 0.7913272233537, + "grad_norm": 14.911494255065918, + "learning_rate": 2.5398783178563844e-07, + "loss": 0.0785, + "num_input_tokens_seen": 8954880, + "step": 9325 + }, + { + "epoch": 0.7917515274949084, + "grad_norm": 7.448769569396973, + "learning_rate": 2.5300228176448304e-07, + "loss": 0.058, + "num_input_tokens_seen": 8960128, + "step": 9330 + }, + { + "epoch": 0.7921758316361168, + "grad_norm": 2.861565113067627, + "learning_rate": 2.52018370587285e-07, + "loss": 0.0552, + "num_input_tokens_seen": 8964864, + "step": 9335 + }, + { + "epoch": 0.7926001357773251, + "grad_norm": 6.1687822341918945, + "learning_rate": 2.5103610041265475e-07, + "loss": 0.0933, + "num_input_tokens_seen": 8970112, + "step": 9340 + }, + { + "epoch": 0.7930244399185336, + "grad_norm": 5.1669416427612305, + "learning_rate": 2.5005547339560207e-07, + "loss": 0.0475, + "num_input_tokens_seen": 8975104, + "step": 9345 + }, + { + "epoch": 0.793448744059742, + "grad_norm": 5.684637546539307, + "learning_rate": 2.4907649168753197e-07, + "loss": 0.0478, + "num_input_tokens_seen": 8980160, + "step": 9350 + }, + { + "epoch": 0.7938730482009504, + "grad_norm": 39.38056182861328, + "learning_rate": 2.480991574362403e-07, + "loss": 0.1039, + "num_input_tokens_seen": 8984320, + "step": 9355 + }, + { + "epoch": 0.7942973523421588, + "grad_norm": 14.252079963684082, + "learning_rate": 2.471234727859072e-07, + "loss": 0.0495, + "num_input_tokens_seen": 8989376, + "step": 9360 + }, + { + "epoch": 0.7947216564833672, + "grad_norm": 2.3673818111419678, + "learning_rate": 2.461494398770957e-07, + "loss": 0.026, + "num_input_tokens_seen": 8993920, + "step": 9365 + }, + { + "epoch": 0.7951459606245757, + "grad_norm": 0.3640649914741516, + "learning_rate": 2.4517706084674316e-07, + "loss": 0.0662, + "num_input_tokens_seen": 8998848, + "step": 9370 + }, + { + "epoch": 0.7955702647657841, + "grad_norm": 0.05636943131685257, + "learning_rate": 2.4420633782815945e-07, + "loss": 0.0314, + "num_input_tokens_seen": 9003712, + "step": 9375 + }, + { + "epoch": 0.7959945689069925, + "grad_norm": 0.3024899363517761, + "learning_rate": 2.432372729510214e-07, + "loss": 0.0552, + "num_input_tokens_seen": 9009472, + "step": 9380 + }, + { + "epoch": 0.7964188730482009, + "grad_norm": 4.740152359008789, + "learning_rate": 2.4226986834136763e-07, + "loss": 0.0265, + "num_input_tokens_seen": 9014144, + "step": 9385 + }, + { + "epoch": 0.7968431771894093, + "grad_norm": 18.9387264251709, + "learning_rate": 2.4130412612159445e-07, + "loss": 0.0193, + "num_input_tokens_seen": 9018688, + "step": 9390 + }, + { + "epoch": 0.7972674813306178, + "grad_norm": 9.759135246276855, + "learning_rate": 2.403400484104514e-07, + "loss": 0.0334, + "num_input_tokens_seen": 9023040, + "step": 9395 + }, + { + "epoch": 0.7976917854718262, + "grad_norm": 0.15711554884910583, + "learning_rate": 2.3937763732303504e-07, + "loss": 0.0262, + "num_input_tokens_seen": 9027328, + "step": 9400 + }, + { + "epoch": 0.7981160896130346, + "grad_norm": 1.643560528755188, + "learning_rate": 2.3841689497078742e-07, + "loss": 0.0333, + "num_input_tokens_seen": 9031552, + "step": 9405 + }, + { + "epoch": 0.798540393754243, + "grad_norm": 0.7876635193824768, + "learning_rate": 2.3745782346148756e-07, + "loss": 0.0224, + "num_input_tokens_seen": 9036672, + "step": 9410 + }, + { + "epoch": 0.7989646978954514, + "grad_norm": 4.152390956878662, + "learning_rate": 2.3650042489924992e-07, + "loss": 0.0491, + "num_input_tokens_seen": 9041472, + "step": 9415 + }, + { + "epoch": 0.7993890020366599, + "grad_norm": 31.914854049682617, + "learning_rate": 2.3554470138451909e-07, + "loss": 0.0686, + "num_input_tokens_seen": 9046912, + "step": 9420 + }, + { + "epoch": 0.7998133061778683, + "grad_norm": 8.399618148803711, + "learning_rate": 2.345906550140634e-07, + "loss": 0.0481, + "num_input_tokens_seen": 9051712, + "step": 9425 + }, + { + "epoch": 0.8002376103190767, + "grad_norm": 10.311676025390625, + "learning_rate": 2.3363828788097274e-07, + "loss": 0.0909, + "num_input_tokens_seen": 9056256, + "step": 9430 + }, + { + "epoch": 0.8006619144602851, + "grad_norm": 8.353757858276367, + "learning_rate": 2.3268760207465244e-07, + "loss": 0.0345, + "num_input_tokens_seen": 9061376, + "step": 9435 + }, + { + "epoch": 0.8010862186014935, + "grad_norm": 13.43514633178711, + "learning_rate": 2.3173859968081944e-07, + "loss": 0.1023, + "num_input_tokens_seen": 9066048, + "step": 9440 + }, + { + "epoch": 0.8010862186014935, + "eval_loss": 0.05470386520028114, + "eval_runtime": 15.9466, + "eval_samples_per_second": 656.88, + "eval_steps_per_second": 82.149, + "num_input_tokens_seen": 9066048, + "step": 9440 + }, + { + "epoch": 0.801510522742702, + "grad_norm": 14.77238941192627, + "learning_rate": 2.3079128278149717e-07, + "loss": 0.0286, + "num_input_tokens_seen": 9071232, + "step": 9445 + }, + { + "epoch": 0.8019348268839104, + "grad_norm": 24.130496978759766, + "learning_rate": 2.2984565345501172e-07, + "loss": 0.0474, + "num_input_tokens_seen": 9075520, + "step": 9450 + }, + { + "epoch": 0.8023591310251188, + "grad_norm": 15.134228706359863, + "learning_rate": 2.2890171377598556e-07, + "loss": 0.0605, + "num_input_tokens_seen": 9080192, + "step": 9455 + }, + { + "epoch": 0.8027834351663272, + "grad_norm": 7.502737998962402, + "learning_rate": 2.2795946581533632e-07, + "loss": 0.0041, + "num_input_tokens_seen": 9085696, + "step": 9460 + }, + { + "epoch": 0.8032077393075356, + "grad_norm": 11.876471519470215, + "learning_rate": 2.27018911640268e-07, + "loss": 0.0716, + "num_input_tokens_seen": 9090432, + "step": 9465 + }, + { + "epoch": 0.8036320434487441, + "grad_norm": 7.7270731925964355, + "learning_rate": 2.2608005331426982e-07, + "loss": 0.0832, + "num_input_tokens_seen": 9094976, + "step": 9470 + }, + { + "epoch": 0.8040563475899525, + "grad_norm": 7.613325595855713, + "learning_rate": 2.251428928971102e-07, + "loss": 0.0863, + "num_input_tokens_seen": 9100096, + "step": 9475 + }, + { + "epoch": 0.8044806517311609, + "grad_norm": 6.049130439758301, + "learning_rate": 2.2420743244483253e-07, + "loss": 0.0752, + "num_input_tokens_seen": 9105408, + "step": 9480 + }, + { + "epoch": 0.8049049558723693, + "grad_norm": 6.191554546356201, + "learning_rate": 2.2327367400975051e-07, + "loss": 0.0346, + "num_input_tokens_seen": 9110144, + "step": 9485 + }, + { + "epoch": 0.8053292600135777, + "grad_norm": 0.3262642025947571, + "learning_rate": 2.2234161964044417e-07, + "loss": 0.0509, + "num_input_tokens_seen": 9115520, + "step": 9490 + }, + { + "epoch": 0.8057535641547862, + "grad_norm": 0.36145687103271484, + "learning_rate": 2.2141127138175386e-07, + "loss": 0.0249, + "num_input_tokens_seen": 9119808, + "step": 9495 + }, + { + "epoch": 0.8061778682959946, + "grad_norm": 11.450455665588379, + "learning_rate": 2.2048263127477861e-07, + "loss": 0.0707, + "num_input_tokens_seen": 9124672, + "step": 9500 + }, + { + "epoch": 0.806602172437203, + "grad_norm": 9.32411003112793, + "learning_rate": 2.195557013568684e-07, + "loss": 0.0903, + "num_input_tokens_seen": 9129216, + "step": 9505 + }, + { + "epoch": 0.8070264765784114, + "grad_norm": 3.2268972396850586, + "learning_rate": 2.1863048366162207e-07, + "loss": 0.0704, + "num_input_tokens_seen": 9133952, + "step": 9510 + }, + { + "epoch": 0.8074507807196198, + "grad_norm": 26.277555465698242, + "learning_rate": 2.1770698021888145e-07, + "loss": 0.0464, + "num_input_tokens_seen": 9138240, + "step": 9515 + }, + { + "epoch": 0.8078750848608283, + "grad_norm": 25.081214904785156, + "learning_rate": 2.167851930547283e-07, + "loss": 0.1019, + "num_input_tokens_seen": 9142656, + "step": 9520 + }, + { + "epoch": 0.8082993890020367, + "grad_norm": 0.8892016410827637, + "learning_rate": 2.1586512419147763e-07, + "loss": 0.0595, + "num_input_tokens_seen": 9147456, + "step": 9525 + }, + { + "epoch": 0.8087236931432451, + "grad_norm": 16.272262573242188, + "learning_rate": 2.149467756476765e-07, + "loss": 0.0314, + "num_input_tokens_seen": 9152064, + "step": 9530 + }, + { + "epoch": 0.8091479972844535, + "grad_norm": 12.82519245147705, + "learning_rate": 2.140301494380956e-07, + "loss": 0.0694, + "num_input_tokens_seen": 9156544, + "step": 9535 + }, + { + "epoch": 0.8095723014256619, + "grad_norm": 17.81451988220215, + "learning_rate": 2.1311524757372901e-07, + "loss": 0.0385, + "num_input_tokens_seen": 9161088, + "step": 9540 + }, + { + "epoch": 0.8099966055668704, + "grad_norm": 21.11798667907715, + "learning_rate": 2.1220207206178685e-07, + "loss": 0.05, + "num_input_tokens_seen": 9165440, + "step": 9545 + }, + { + "epoch": 0.8104209097080788, + "grad_norm": 2.2403523921966553, + "learning_rate": 2.1129062490569106e-07, + "loss": 0.0143, + "num_input_tokens_seen": 9170496, + "step": 9550 + }, + { + "epoch": 0.8108452138492872, + "grad_norm": 1.4986127614974976, + "learning_rate": 2.1038090810507348e-07, + "loss": 0.034, + "num_input_tokens_seen": 9175360, + "step": 9555 + }, + { + "epoch": 0.8112695179904956, + "grad_norm": 16.13140296936035, + "learning_rate": 2.0947292365576785e-07, + "loss": 0.057, + "num_input_tokens_seen": 9179776, + "step": 9560 + }, + { + "epoch": 0.811693822131704, + "grad_norm": 0.2588607966899872, + "learning_rate": 2.085666735498085e-07, + "loss": 0.0305, + "num_input_tokens_seen": 9185536, + "step": 9565 + }, + { + "epoch": 0.8121181262729125, + "grad_norm": 11.537827491760254, + "learning_rate": 2.0766215977542435e-07, + "loss": 0.0328, + "num_input_tokens_seen": 9190528, + "step": 9570 + }, + { + "epoch": 0.8125424304141209, + "grad_norm": 29.518110275268555, + "learning_rate": 2.0675938431703532e-07, + "loss": 0.0937, + "num_input_tokens_seen": 9195264, + "step": 9575 + }, + { + "epoch": 0.8129667345553293, + "grad_norm": 16.484352111816406, + "learning_rate": 2.0585834915524646e-07, + "loss": 0.065, + "num_input_tokens_seen": 9200192, + "step": 9580 + }, + { + "epoch": 0.8133910386965377, + "grad_norm": 0.5188624858856201, + "learning_rate": 2.0495905626684674e-07, + "loss": 0.0184, + "num_input_tokens_seen": 9205056, + "step": 9585 + }, + { + "epoch": 0.8138153428377461, + "grad_norm": 15.126639366149902, + "learning_rate": 2.0406150762480089e-07, + "loss": 0.0328, + "num_input_tokens_seen": 9209856, + "step": 9590 + }, + { + "epoch": 0.8142396469789546, + "grad_norm": 14.457038879394531, + "learning_rate": 2.0316570519824806e-07, + "loss": 0.033, + "num_input_tokens_seen": 9214464, + "step": 9595 + }, + { + "epoch": 0.814663951120163, + "grad_norm": 7.134429931640625, + "learning_rate": 2.0227165095249564e-07, + "loss": 0.0432, + "num_input_tokens_seen": 9219072, + "step": 9600 + }, + { + "epoch": 0.8150882552613713, + "grad_norm": 0.702487587928772, + "learning_rate": 2.0137934684901636e-07, + "loss": 0.0675, + "num_input_tokens_seen": 9224768, + "step": 9605 + }, + { + "epoch": 0.8155125594025797, + "grad_norm": 41.96034240722656, + "learning_rate": 2.0048879484544279e-07, + "loss": 0.0464, + "num_input_tokens_seen": 9229696, + "step": 9610 + }, + { + "epoch": 0.8159368635437881, + "grad_norm": 0.04394965618848801, + "learning_rate": 1.9959999689556407e-07, + "loss": 0.0971, + "num_input_tokens_seen": 9235072, + "step": 9615 + }, + { + "epoch": 0.8163611676849966, + "grad_norm": 0.4195549488067627, + "learning_rate": 1.9871295494931994e-07, + "loss": 0.0947, + "num_input_tokens_seen": 9240320, + "step": 9620 + }, + { + "epoch": 0.816785471826205, + "grad_norm": 0.4872162640094757, + "learning_rate": 1.978276709527994e-07, + "loss": 0.0287, + "num_input_tokens_seen": 9244928, + "step": 9625 + }, + { + "epoch": 0.8172097759674134, + "grad_norm": 12.054028511047363, + "learning_rate": 1.9694414684823313e-07, + "loss": 0.1166, + "num_input_tokens_seen": 9249792, + "step": 9630 + }, + { + "epoch": 0.8176340801086218, + "grad_norm": 0.684785008430481, + "learning_rate": 1.960623845739914e-07, + "loss": 0.0569, + "num_input_tokens_seen": 9254848, + "step": 9635 + }, + { + "epoch": 0.8180583842498302, + "grad_norm": 27.895057678222656, + "learning_rate": 1.9518238606457925e-07, + "loss": 0.0463, + "num_input_tokens_seen": 9259392, + "step": 9640 + }, + { + "epoch": 0.8184826883910387, + "grad_norm": 0.3559216260910034, + "learning_rate": 1.943041532506322e-07, + "loss": 0.066, + "num_input_tokens_seen": 9263872, + "step": 9645 + }, + { + "epoch": 0.8189069925322471, + "grad_norm": 19.72517204284668, + "learning_rate": 1.9342768805891173e-07, + "loss": 0.0389, + "num_input_tokens_seen": 9268800, + "step": 9650 + }, + { + "epoch": 0.8193312966734555, + "grad_norm": 6.721203804016113, + "learning_rate": 1.9255299241230182e-07, + "loss": 0.1183, + "num_input_tokens_seen": 9273408, + "step": 9655 + }, + { + "epoch": 0.8197556008146639, + "grad_norm": 22.650775909423828, + "learning_rate": 1.91680068229803e-07, + "loss": 0.0679, + "num_input_tokens_seen": 9278208, + "step": 9660 + }, + { + "epoch": 0.8201799049558723, + "grad_norm": 0.2406800389289856, + "learning_rate": 1.9080891742653105e-07, + "loss": 0.0643, + "num_input_tokens_seen": 9282944, + "step": 9665 + }, + { + "epoch": 0.8206042090970808, + "grad_norm": 0.293648898601532, + "learning_rate": 1.8993954191371042e-07, + "loss": 0.0074, + "num_input_tokens_seen": 9288064, + "step": 9670 + }, + { + "epoch": 0.8210285132382892, + "grad_norm": 0.11446045339107513, + "learning_rate": 1.8907194359866986e-07, + "loss": 0.0289, + "num_input_tokens_seen": 9293120, + "step": 9675 + }, + { + "epoch": 0.8214528173794976, + "grad_norm": 19.012449264526367, + "learning_rate": 1.8820612438484075e-07, + "loss": 0.0717, + "num_input_tokens_seen": 9297472, + "step": 9680 + }, + { + "epoch": 0.821877121520706, + "grad_norm": 7.200514793395996, + "learning_rate": 1.8734208617174986e-07, + "loss": 0.0743, + "num_input_tokens_seen": 9302144, + "step": 9685 + }, + { + "epoch": 0.8223014256619144, + "grad_norm": 0.48357534408569336, + "learning_rate": 1.864798308550173e-07, + "loss": 0.0671, + "num_input_tokens_seen": 9307200, + "step": 9690 + }, + { + "epoch": 0.8227257298031229, + "grad_norm": 15.533746719360352, + "learning_rate": 1.856193603263515e-07, + "loss": 0.0212, + "num_input_tokens_seen": 9312384, + "step": 9695 + }, + { + "epoch": 0.8231500339443313, + "grad_norm": 7.348632335662842, + "learning_rate": 1.8476067647354553e-07, + "loss": 0.0079, + "num_input_tokens_seen": 9317120, + "step": 9700 + }, + { + "epoch": 0.8235743380855397, + "grad_norm": 26.3117733001709, + "learning_rate": 1.8390378118047213e-07, + "loss": 0.0715, + "num_input_tokens_seen": 9321664, + "step": 9705 + }, + { + "epoch": 0.8239986422267481, + "grad_norm": 20.507158279418945, + "learning_rate": 1.8304867632708077e-07, + "loss": 0.0367, + "num_input_tokens_seen": 9326208, + "step": 9710 + }, + { + "epoch": 0.8244229463679565, + "grad_norm": 0.20351463556289673, + "learning_rate": 1.821953637893917e-07, + "loss": 0.0819, + "num_input_tokens_seen": 9331264, + "step": 9715 + }, + { + "epoch": 0.824847250509165, + "grad_norm": 9.972869873046875, + "learning_rate": 1.8134384543949478e-07, + "loss": 0.0681, + "num_input_tokens_seen": 9336704, + "step": 9720 + }, + { + "epoch": 0.8252715546503734, + "grad_norm": 8.309649467468262, + "learning_rate": 1.804941231455417e-07, + "loss": 0.0809, + "num_input_tokens_seen": 9342016, + "step": 9725 + }, + { + "epoch": 0.8256958587915818, + "grad_norm": 10.31495475769043, + "learning_rate": 1.7964619877174513e-07, + "loss": 0.0543, + "num_input_tokens_seen": 9346752, + "step": 9730 + }, + { + "epoch": 0.8261201629327902, + "grad_norm": 0.1515226662158966, + "learning_rate": 1.788000741783725e-07, + "loss": 0.045, + "num_input_tokens_seen": 9351296, + "step": 9735 + }, + { + "epoch": 0.8265444670739986, + "grad_norm": 8.2435302734375, + "learning_rate": 1.7795575122174323e-07, + "loss": 0.078, + "num_input_tokens_seen": 9355712, + "step": 9740 + }, + { + "epoch": 0.8269687712152071, + "grad_norm": 9.670989990234375, + "learning_rate": 1.7711323175422376e-07, + "loss": 0.0929, + "num_input_tokens_seen": 9360384, + "step": 9745 + }, + { + "epoch": 0.8273930753564155, + "grad_norm": 6.574410915374756, + "learning_rate": 1.7627251762422413e-07, + "loss": 0.0472, + "num_input_tokens_seen": 9364608, + "step": 9750 + }, + { + "epoch": 0.8278173794976239, + "grad_norm": 16.814891815185547, + "learning_rate": 1.7543361067619267e-07, + "loss": 0.0203, + "num_input_tokens_seen": 9369728, + "step": 9755 + }, + { + "epoch": 0.8282416836388323, + "grad_norm": 6.871399402618408, + "learning_rate": 1.7459651275061483e-07, + "loss": 0.0865, + "num_input_tokens_seen": 9374592, + "step": 9760 + }, + { + "epoch": 0.8286659877800407, + "grad_norm": 8.160877227783203, + "learning_rate": 1.737612256840053e-07, + "loss": 0.0433, + "num_input_tokens_seen": 9380160, + "step": 9765 + }, + { + "epoch": 0.8290902919212492, + "grad_norm": 2.8912947177886963, + "learning_rate": 1.729277513089068e-07, + "loss": 0.0528, + "num_input_tokens_seen": 9385216, + "step": 9770 + }, + { + "epoch": 0.8295145960624576, + "grad_norm": 8.160943984985352, + "learning_rate": 1.7209609145388538e-07, + "loss": 0.0437, + "num_input_tokens_seen": 9390080, + "step": 9775 + }, + { + "epoch": 0.829938900203666, + "grad_norm": 18.256690979003906, + "learning_rate": 1.7126624794352563e-07, + "loss": 0.0108, + "num_input_tokens_seen": 9394304, + "step": 9780 + }, + { + "epoch": 0.8303632043448744, + "grad_norm": 0.5559505224227905, + "learning_rate": 1.7043822259842766e-07, + "loss": 0.0646, + "num_input_tokens_seen": 9399360, + "step": 9785 + }, + { + "epoch": 0.8307875084860828, + "grad_norm": 0.38654825091362, + "learning_rate": 1.6961201723520247e-07, + "loss": 0.0338, + "num_input_tokens_seen": 9404352, + "step": 9790 + }, + { + "epoch": 0.8312118126272913, + "grad_norm": 12.815828323364258, + "learning_rate": 1.6878763366646832e-07, + "loss": 0.0434, + "num_input_tokens_seen": 9409152, + "step": 9795 + }, + { + "epoch": 0.8316361167684997, + "grad_norm": 8.283032417297363, + "learning_rate": 1.6796507370084656e-07, + "loss": 0.0449, + "num_input_tokens_seen": 9414208, + "step": 9800 + }, + { + "epoch": 0.8320604209097081, + "grad_norm": 0.2858330309391022, + "learning_rate": 1.671443391429581e-07, + "loss": 0.0646, + "num_input_tokens_seen": 9419008, + "step": 9805 + }, + { + "epoch": 0.8324847250509165, + "grad_norm": 0.6068195104598999, + "learning_rate": 1.6632543179341772e-07, + "loss": 0.0368, + "num_input_tokens_seen": 9424192, + "step": 9810 + }, + { + "epoch": 0.832909029192125, + "grad_norm": 21.136734008789062, + "learning_rate": 1.6550835344883364e-07, + "loss": 0.0235, + "num_input_tokens_seen": 9429056, + "step": 9815 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 11.528197288513184, + "learning_rate": 1.646931059017994e-07, + "loss": 0.053, + "num_input_tokens_seen": 9433024, + "step": 9820 + }, + { + "epoch": 0.8337576374745418, + "grad_norm": 21.11509895324707, + "learning_rate": 1.6387969094089317e-07, + "loss": 0.0435, + "num_input_tokens_seen": 9437696, + "step": 9825 + }, + { + "epoch": 0.8341819416157502, + "grad_norm": 1.8037995100021362, + "learning_rate": 1.6306811035067203e-07, + "loss": 0.0472, + "num_input_tokens_seen": 9442944, + "step": 9830 + }, + { + "epoch": 0.8346062457569586, + "grad_norm": 0.501308798789978, + "learning_rate": 1.6225836591166886e-07, + "loss": 0.0394, + "num_input_tokens_seen": 9447680, + "step": 9835 + }, + { + "epoch": 0.835030549898167, + "grad_norm": 27.36667823791504, + "learning_rate": 1.6145045940038803e-07, + "loss": 0.0774, + "num_input_tokens_seen": 9452288, + "step": 9840 + }, + { + "epoch": 0.8354548540393755, + "grad_norm": 18.382902145385742, + "learning_rate": 1.6064439258930217e-07, + "loss": 0.063, + "num_input_tokens_seen": 9456896, + "step": 9845 + }, + { + "epoch": 0.8358791581805839, + "grad_norm": 19.6127872467041, + "learning_rate": 1.5984016724684658e-07, + "loss": 0.0847, + "num_input_tokens_seen": 9461632, + "step": 9850 + }, + { + "epoch": 0.8363034623217923, + "grad_norm": 0.09241819381713867, + "learning_rate": 1.5903778513741816e-07, + "loss": 0.077, + "num_input_tokens_seen": 9466560, + "step": 9855 + }, + { + "epoch": 0.8367277664630007, + "grad_norm": 0.4436960816383362, + "learning_rate": 1.5823724802136862e-07, + "loss": 0.0369, + "num_input_tokens_seen": 9471168, + "step": 9860 + }, + { + "epoch": 0.837152070604209, + "grad_norm": 11.254528999328613, + "learning_rate": 1.5743855765500258e-07, + "loss": 0.105, + "num_input_tokens_seen": 9475968, + "step": 9865 + }, + { + "epoch": 0.8375763747454175, + "grad_norm": 18.34065818786621, + "learning_rate": 1.5664171579057273e-07, + "loss": 0.0345, + "num_input_tokens_seen": 9481280, + "step": 9870 + }, + { + "epoch": 0.8380006788866259, + "grad_norm": 0.9362542033195496, + "learning_rate": 1.5584672417627665e-07, + "loss": 0.0235, + "num_input_tokens_seen": 9485952, + "step": 9875 + }, + { + "epoch": 0.8384249830278343, + "grad_norm": 1.7877192497253418, + "learning_rate": 1.5505358455625229e-07, + "loss": 0.0437, + "num_input_tokens_seen": 9490048, + "step": 9880 + }, + { + "epoch": 0.8388492871690427, + "grad_norm": 22.90734100341797, + "learning_rate": 1.5426229867057516e-07, + "loss": 0.0855, + "num_input_tokens_seen": 9495360, + "step": 9885 + }, + { + "epoch": 0.8392735913102511, + "grad_norm": 10.30651569366455, + "learning_rate": 1.5347286825525252e-07, + "loss": 0.0929, + "num_input_tokens_seen": 9499968, + "step": 9890 + }, + { + "epoch": 0.8396978954514596, + "grad_norm": 12.345735549926758, + "learning_rate": 1.526852950422226e-07, + "loss": 0.0571, + "num_input_tokens_seen": 9504704, + "step": 9895 + }, + { + "epoch": 0.840122199592668, + "grad_norm": 0.13042160868644714, + "learning_rate": 1.5189958075934771e-07, + "loss": 0.0243, + "num_input_tokens_seen": 9509184, + "step": 9900 + }, + { + "epoch": 0.8405465037338764, + "grad_norm": 16.40341567993164, + "learning_rate": 1.5111572713041253e-07, + "loss": 0.047, + "num_input_tokens_seen": 9514048, + "step": 9905 + }, + { + "epoch": 0.8409708078750848, + "grad_norm": 6.82795524597168, + "learning_rate": 1.5033373587511944e-07, + "loss": 0.0214, + "num_input_tokens_seen": 9519104, + "step": 9910 + }, + { + "epoch": 0.8413951120162932, + "grad_norm": 0.75281822681427, + "learning_rate": 1.4955360870908505e-07, + "loss": 0.0717, + "num_input_tokens_seen": 9523840, + "step": 9915 + }, + { + "epoch": 0.8418194161575017, + "grad_norm": 7.721748352050781, + "learning_rate": 1.4877534734383624e-07, + "loss": 0.0594, + "num_input_tokens_seen": 9528384, + "step": 9920 + }, + { + "epoch": 0.8422437202987101, + "grad_norm": 17.48251724243164, + "learning_rate": 1.4799895348680647e-07, + "loss": 0.0608, + "num_input_tokens_seen": 9533184, + "step": 9925 + }, + { + "epoch": 0.8426680244399185, + "grad_norm": 0.6833990812301636, + "learning_rate": 1.4722442884133214e-07, + "loss": 0.0261, + "num_input_tokens_seen": 9538944, + "step": 9930 + }, + { + "epoch": 0.8430923285811269, + "grad_norm": 1.3696354627609253, + "learning_rate": 1.4645177510664886e-07, + "loss": 0.0125, + "num_input_tokens_seen": 9543296, + "step": 9935 + }, + { + "epoch": 0.8435166327223353, + "grad_norm": 0.21253393590450287, + "learning_rate": 1.4568099397788746e-07, + "loss": 0.0706, + "num_input_tokens_seen": 9547840, + "step": 9940 + }, + { + "epoch": 0.8439409368635438, + "grad_norm": 1.1190301179885864, + "learning_rate": 1.4491208714607016e-07, + "loss": 0.0964, + "num_input_tokens_seen": 9552704, + "step": 9945 + }, + { + "epoch": 0.8443652410047522, + "grad_norm": 6.390957355499268, + "learning_rate": 1.4414505629810813e-07, + "loss": 0.0629, + "num_input_tokens_seen": 9558272, + "step": 9950 + }, + { + "epoch": 0.8447895451459606, + "grad_norm": 19.948999404907227, + "learning_rate": 1.433799031167957e-07, + "loss": 0.0944, + "num_input_tokens_seen": 9563264, + "step": 9955 + }, + { + "epoch": 0.845213849287169, + "grad_norm": 0.25374409556388855, + "learning_rate": 1.426166292808083e-07, + "loss": 0.0298, + "num_input_tokens_seen": 9567680, + "step": 9960 + }, + { + "epoch": 0.8456381534283774, + "grad_norm": 0.17964182794094086, + "learning_rate": 1.4185523646469821e-07, + "loss": 0.0373, + "num_input_tokens_seen": 9572608, + "step": 9965 + }, + { + "epoch": 0.8460624575695859, + "grad_norm": 13.58020305633545, + "learning_rate": 1.410957263388909e-07, + "loss": 0.0676, + "num_input_tokens_seen": 9577088, + "step": 9970 + }, + { + "epoch": 0.8464867617107943, + "grad_norm": 0.4978608191013336, + "learning_rate": 1.4033810056968155e-07, + "loss": 0.0421, + "num_input_tokens_seen": 9581952, + "step": 9975 + }, + { + "epoch": 0.8469110658520027, + "grad_norm": 0.18817739188671112, + "learning_rate": 1.3958236081923102e-07, + "loss": 0.096, + "num_input_tokens_seen": 9586496, + "step": 9980 + }, + { + "epoch": 0.8473353699932111, + "grad_norm": 0.07502438873052597, + "learning_rate": 1.3882850874556207e-07, + "loss": 0.0315, + "num_input_tokens_seen": 9591296, + "step": 9985 + }, + { + "epoch": 0.8477596741344195, + "grad_norm": 0.8373299241065979, + "learning_rate": 1.3807654600255713e-07, + "loss": 0.0639, + "num_input_tokens_seen": 9595904, + "step": 9990 + }, + { + "epoch": 0.848183978275628, + "grad_norm": 42.67544174194336, + "learning_rate": 1.373264742399526e-07, + "loss": 0.1177, + "num_input_tokens_seen": 9600640, + "step": 9995 + }, + { + "epoch": 0.8486082824168364, + "grad_norm": 2.967900514602661, + "learning_rate": 1.3657829510333652e-07, + "loss": 0.0569, + "num_input_tokens_seen": 9605056, + "step": 10000 + }, + { + "epoch": 0.8490325865580448, + "grad_norm": 9.756741523742676, + "learning_rate": 1.3583201023414493e-07, + "loss": 0.0102, + "num_input_tokens_seen": 9610112, + "step": 10005 + }, + { + "epoch": 0.8494568906992532, + "grad_norm": 0.3621211349964142, + "learning_rate": 1.350876212696579e-07, + "loss": 0.0276, + "num_input_tokens_seen": 9615744, + "step": 10010 + }, + { + "epoch": 0.8498811948404617, + "grad_norm": 12.596657752990723, + "learning_rate": 1.3434512984299596e-07, + "loss": 0.0089, + "num_input_tokens_seen": 9620288, + "step": 10015 + }, + { + "epoch": 0.8503054989816701, + "grad_norm": 0.3857429325580597, + "learning_rate": 1.3360453758311686e-07, + "loss": 0.0775, + "num_input_tokens_seen": 9625024, + "step": 10020 + }, + { + "epoch": 0.8507298031228785, + "grad_norm": 0.5006208419799805, + "learning_rate": 1.32865846114811e-07, + "loss": 0.0471, + "num_input_tokens_seen": 9630144, + "step": 10025 + }, + { + "epoch": 0.8511541072640869, + "grad_norm": 13.02332878112793, + "learning_rate": 1.321290570586999e-07, + "loss": 0.0665, + "num_input_tokens_seen": 9634624, + "step": 10030 + }, + { + "epoch": 0.8511541072640869, + "eval_loss": 0.05264058709144592, + "eval_runtime": 15.9998, + "eval_samples_per_second": 654.694, + "eval_steps_per_second": 81.876, + "num_input_tokens_seen": 9634624, + "step": 10030 + }, + { + "epoch": 0.8515784114052953, + "grad_norm": 0.15222354233264923, + "learning_rate": 1.3139417203123027e-07, + "loss": 0.0181, + "num_input_tokens_seen": 9639744, + "step": 10035 + }, + { + "epoch": 0.8520027155465038, + "grad_norm": 0.2849067747592926, + "learning_rate": 1.306611926446718e-07, + "loss": 0.009, + "num_input_tokens_seen": 9644480, + "step": 10040 + }, + { + "epoch": 0.8524270196877122, + "grad_norm": 0.13256412744522095, + "learning_rate": 1.2993012050711406e-07, + "loss": 0.0478, + "num_input_tokens_seen": 9649408, + "step": 10045 + }, + { + "epoch": 0.8528513238289206, + "grad_norm": 7.352311134338379, + "learning_rate": 1.292009572224614e-07, + "loss": 0.0887, + "num_input_tokens_seen": 9653440, + "step": 10050 + }, + { + "epoch": 0.853275627970129, + "grad_norm": 1.3892513513565063, + "learning_rate": 1.284737043904306e-07, + "loss": 0.0318, + "num_input_tokens_seen": 9658176, + "step": 10055 + }, + { + "epoch": 0.8536999321113374, + "grad_norm": 0.09574834257364273, + "learning_rate": 1.2774836360654717e-07, + "loss": 0.0806, + "num_input_tokens_seen": 9662848, + "step": 10060 + }, + { + "epoch": 0.8541242362525459, + "grad_norm": 17.671186447143555, + "learning_rate": 1.2702493646214207e-07, + "loss": 0.0707, + "num_input_tokens_seen": 9667392, + "step": 10065 + }, + { + "epoch": 0.8545485403937543, + "grad_norm": 12.697011947631836, + "learning_rate": 1.2630342454434728e-07, + "loss": 0.1539, + "num_input_tokens_seen": 9672384, + "step": 10070 + }, + { + "epoch": 0.8549728445349627, + "grad_norm": 10.890848159790039, + "learning_rate": 1.2558382943609357e-07, + "loss": 0.0521, + "num_input_tokens_seen": 9677248, + "step": 10075 + }, + { + "epoch": 0.8553971486761711, + "grad_norm": 12.55119514465332, + "learning_rate": 1.2486615271610558e-07, + "loss": 0.0558, + "num_input_tokens_seen": 9681536, + "step": 10080 + }, + { + "epoch": 0.8558214528173795, + "grad_norm": 35.36617660522461, + "learning_rate": 1.241503959589003e-07, + "loss": 0.081, + "num_input_tokens_seen": 9686592, + "step": 10085 + }, + { + "epoch": 0.856245756958588, + "grad_norm": 25.30384635925293, + "learning_rate": 1.234365607347816e-07, + "loss": 0.0423, + "num_input_tokens_seen": 9691648, + "step": 10090 + }, + { + "epoch": 0.8566700610997964, + "grad_norm": 26.083160400390625, + "learning_rate": 1.22724648609838e-07, + "loss": 0.0433, + "num_input_tokens_seen": 9696768, + "step": 10095 + }, + { + "epoch": 0.8570943652410048, + "grad_norm": 11.980801582336426, + "learning_rate": 1.2201466114593884e-07, + "loss": 0.0689, + "num_input_tokens_seen": 9701376, + "step": 10100 + }, + { + "epoch": 0.8575186693822132, + "grad_norm": 0.25428134202957153, + "learning_rate": 1.2130659990073144e-07, + "loss": 0.0659, + "num_input_tokens_seen": 9705408, + "step": 10105 + }, + { + "epoch": 0.8579429735234216, + "grad_norm": 25.351072311401367, + "learning_rate": 1.206004664276359e-07, + "loss": 0.064, + "num_input_tokens_seen": 9709824, + "step": 10110 + }, + { + "epoch": 0.8583672776646301, + "grad_norm": 0.5601566433906555, + "learning_rate": 1.198962622758447e-07, + "loss": 0.0826, + "num_input_tokens_seen": 9715072, + "step": 10115 + }, + { + "epoch": 0.8587915818058385, + "grad_norm": 2.15639591217041, + "learning_rate": 1.1919398899031585e-07, + "loss": 0.0706, + "num_input_tokens_seen": 9720000, + "step": 10120 + }, + { + "epoch": 0.8592158859470469, + "grad_norm": 0.6312253475189209, + "learning_rate": 1.1849364811177288e-07, + "loss": 0.0041, + "num_input_tokens_seen": 9724288, + "step": 10125 + }, + { + "epoch": 0.8596401900882552, + "grad_norm": 0.6518107056617737, + "learning_rate": 1.1779524117669837e-07, + "loss": 0.0681, + "num_input_tokens_seen": 9729280, + "step": 10130 + }, + { + "epoch": 0.8600644942294636, + "grad_norm": 8.462985038757324, + "learning_rate": 1.1709876971733269e-07, + "loss": 0.0764, + "num_input_tokens_seen": 9733696, + "step": 10135 + }, + { + "epoch": 0.860488798370672, + "grad_norm": 0.15076375007629395, + "learning_rate": 1.1640423526166987e-07, + "loss": 0.016, + "num_input_tokens_seen": 9738624, + "step": 10140 + }, + { + "epoch": 0.8609131025118805, + "grad_norm": 0.44147834181785583, + "learning_rate": 1.1571163933345462e-07, + "loss": 0.0557, + "num_input_tokens_seen": 9743488, + "step": 10145 + }, + { + "epoch": 0.8613374066530889, + "grad_norm": 2.310833215713501, + "learning_rate": 1.150209834521777e-07, + "loss": 0.0133, + "num_input_tokens_seen": 9749632, + "step": 10150 + }, + { + "epoch": 0.8617617107942973, + "grad_norm": 11.502927780151367, + "learning_rate": 1.1433226913307514e-07, + "loss": 0.0291, + "num_input_tokens_seen": 9754432, + "step": 10155 + }, + { + "epoch": 0.8621860149355057, + "grad_norm": 1.099219560623169, + "learning_rate": 1.1364549788712185e-07, + "loss": 0.0293, + "num_input_tokens_seen": 9759168, + "step": 10160 + }, + { + "epoch": 0.8626103190767141, + "grad_norm": 0.827223002910614, + "learning_rate": 1.1296067122103059e-07, + "loss": 0.012, + "num_input_tokens_seen": 9764096, + "step": 10165 + }, + { + "epoch": 0.8630346232179226, + "grad_norm": 0.387584924697876, + "learning_rate": 1.1227779063724818e-07, + "loss": 0.0581, + "num_input_tokens_seen": 9768768, + "step": 10170 + }, + { + "epoch": 0.863458927359131, + "grad_norm": 15.812118530273438, + "learning_rate": 1.115968576339511e-07, + "loss": 0.0188, + "num_input_tokens_seen": 9773184, + "step": 10175 + }, + { + "epoch": 0.8638832315003394, + "grad_norm": 7.606304168701172, + "learning_rate": 1.1091787370504347e-07, + "loss": 0.0713, + "num_input_tokens_seen": 9778688, + "step": 10180 + }, + { + "epoch": 0.8643075356415478, + "grad_norm": 20.49475860595703, + "learning_rate": 1.1024084034015347e-07, + "loss": 0.0388, + "num_input_tokens_seen": 9783168, + "step": 10185 + }, + { + "epoch": 0.8647318397827563, + "grad_norm": 14.119841575622559, + "learning_rate": 1.095657590246295e-07, + "loss": 0.0622, + "num_input_tokens_seen": 9787712, + "step": 10190 + }, + { + "epoch": 0.8651561439239647, + "grad_norm": 17.043668746948242, + "learning_rate": 1.0889263123953773e-07, + "loss": 0.0461, + "num_input_tokens_seen": 9792384, + "step": 10195 + }, + { + "epoch": 0.8655804480651731, + "grad_norm": 6.3596625328063965, + "learning_rate": 1.0822145846165853e-07, + "loss": 0.1011, + "num_input_tokens_seen": 9797824, + "step": 10200 + }, + { + "epoch": 0.8660047522063815, + "grad_norm": 2.849475145339966, + "learning_rate": 1.0755224216348235e-07, + "loss": 0.1421, + "num_input_tokens_seen": 9802880, + "step": 10205 + }, + { + "epoch": 0.8664290563475899, + "grad_norm": 10.186467170715332, + "learning_rate": 1.0688498381320854e-07, + "loss": 0.088, + "num_input_tokens_seen": 9807424, + "step": 10210 + }, + { + "epoch": 0.8668533604887984, + "grad_norm": 15.472275733947754, + "learning_rate": 1.0621968487473975e-07, + "loss": 0.0545, + "num_input_tokens_seen": 9812480, + "step": 10215 + }, + { + "epoch": 0.8672776646300068, + "grad_norm": 72.3904037475586, + "learning_rate": 1.0555634680768066e-07, + "loss": 0.0356, + "num_input_tokens_seen": 9816960, + "step": 10220 + }, + { + "epoch": 0.8677019687712152, + "grad_norm": 0.23482580482959747, + "learning_rate": 1.0489497106733347e-07, + "loss": 0.0259, + "num_input_tokens_seen": 9821568, + "step": 10225 + }, + { + "epoch": 0.8681262729124236, + "grad_norm": 2.643237829208374, + "learning_rate": 1.0423555910469561e-07, + "loss": 0.0805, + "num_input_tokens_seen": 9826048, + "step": 10230 + }, + { + "epoch": 0.868550577053632, + "grad_norm": 7.882426738739014, + "learning_rate": 1.0357811236645597e-07, + "loss": 0.0412, + "num_input_tokens_seen": 9830720, + "step": 10235 + }, + { + "epoch": 0.8689748811948405, + "grad_norm": 7.657166481018066, + "learning_rate": 1.0292263229499209e-07, + "loss": 0.0653, + "num_input_tokens_seen": 9835648, + "step": 10240 + }, + { + "epoch": 0.8693991853360489, + "grad_norm": 9.319342613220215, + "learning_rate": 1.022691203283661e-07, + "loss": 0.1175, + "num_input_tokens_seen": 9839936, + "step": 10245 + }, + { + "epoch": 0.8698234894772573, + "grad_norm": 0.1383010298013687, + "learning_rate": 1.0161757790032355e-07, + "loss": 0.0568, + "num_input_tokens_seen": 9844608, + "step": 10250 + }, + { + "epoch": 0.8702477936184657, + "grad_norm": 1.5263642072677612, + "learning_rate": 1.0096800644028791e-07, + "loss": 0.0191, + "num_input_tokens_seen": 9848896, + "step": 10255 + }, + { + "epoch": 0.8706720977596741, + "grad_norm": 6.711588382720947, + "learning_rate": 1.003204073733589e-07, + "loss": 0.0822, + "num_input_tokens_seen": 9853184, + "step": 10260 + }, + { + "epoch": 0.8710964019008826, + "grad_norm": 0.34313127398490906, + "learning_rate": 9.967478212030923e-08, + "loss": 0.0627, + "num_input_tokens_seen": 9857856, + "step": 10265 + }, + { + "epoch": 0.871520706042091, + "grad_norm": 37.75361251831055, + "learning_rate": 9.903113209758096e-08, + "loss": 0.0556, + "num_input_tokens_seen": 9862592, + "step": 10270 + }, + { + "epoch": 0.8719450101832994, + "grad_norm": 12.846491813659668, + "learning_rate": 9.838945871728266e-08, + "loss": 0.0582, + "num_input_tokens_seen": 9867584, + "step": 10275 + }, + { + "epoch": 0.8723693143245078, + "grad_norm": 4.416686534881592, + "learning_rate": 9.774976338718677e-08, + "loss": 0.0089, + "num_input_tokens_seen": 9872384, + "step": 10280 + }, + { + "epoch": 0.8727936184657162, + "grad_norm": 27.68250846862793, + "learning_rate": 9.711204751072499e-08, + "loss": 0.0969, + "num_input_tokens_seen": 9876672, + "step": 10285 + }, + { + "epoch": 0.8732179226069247, + "grad_norm": 31.03038215637207, + "learning_rate": 9.647631248698773e-08, + "loss": 0.0259, + "num_input_tokens_seen": 9881792, + "step": 10290 + }, + { + "epoch": 0.8736422267481331, + "grad_norm": 13.698047637939453, + "learning_rate": 9.584255971071886e-08, + "loss": 0.1201, + "num_input_tokens_seen": 9886464, + "step": 10295 + }, + { + "epoch": 0.8740665308893415, + "grad_norm": 19.63283348083496, + "learning_rate": 9.521079057231274e-08, + "loss": 0.0121, + "num_input_tokens_seen": 9891264, + "step": 10300 + }, + { + "epoch": 0.8744908350305499, + "grad_norm": 5.031111240386963, + "learning_rate": 9.45810064578133e-08, + "loss": 0.0298, + "num_input_tokens_seen": 9896320, + "step": 10305 + }, + { + "epoch": 0.8749151391717583, + "grad_norm": 16.013206481933594, + "learning_rate": 9.39532087489081e-08, + "loss": 0.0864, + "num_input_tokens_seen": 9901504, + "step": 10310 + }, + { + "epoch": 0.8753394433129668, + "grad_norm": 10.649767875671387, + "learning_rate": 9.33273988229275e-08, + "loss": 0.0504, + "num_input_tokens_seen": 9907008, + "step": 10315 + }, + { + "epoch": 0.8757637474541752, + "grad_norm": 7.6332197189331055, + "learning_rate": 9.270357805284057e-08, + "loss": 0.0989, + "num_input_tokens_seen": 9911744, + "step": 10320 + }, + { + "epoch": 0.8761880515953836, + "grad_norm": 12.951970100402832, + "learning_rate": 9.208174780725253e-08, + "loss": 0.1047, + "num_input_tokens_seen": 9916096, + "step": 10325 + }, + { + "epoch": 0.876612355736592, + "grad_norm": 1.121921181678772, + "learning_rate": 9.146190945040145e-08, + "loss": 0.0295, + "num_input_tokens_seen": 9920448, + "step": 10330 + }, + { + "epoch": 0.8770366598778004, + "grad_norm": 16.190587997436523, + "learning_rate": 9.084406434215553e-08, + "loss": 0.0886, + "num_input_tokens_seen": 9925312, + "step": 10335 + }, + { + "epoch": 0.8774609640190089, + "grad_norm": 18.056102752685547, + "learning_rate": 9.022821383800926e-08, + "loss": 0.0846, + "num_input_tokens_seen": 9929920, + "step": 10340 + }, + { + "epoch": 0.8778852681602173, + "grad_norm": 1.8114804029464722, + "learning_rate": 8.961435928908267e-08, + "loss": 0.0035, + "num_input_tokens_seen": 9934912, + "step": 10345 + }, + { + "epoch": 0.8783095723014257, + "grad_norm": 19.24297332763672, + "learning_rate": 8.900250204211513e-08, + "loss": 0.0809, + "num_input_tokens_seen": 9939520, + "step": 10350 + }, + { + "epoch": 0.8787338764426341, + "grad_norm": 28.829084396362305, + "learning_rate": 8.839264343946506e-08, + "loss": 0.0441, + "num_input_tokens_seen": 9944384, + "step": 10355 + }, + { + "epoch": 0.8791581805838425, + "grad_norm": 9.343367576599121, + "learning_rate": 8.778478481910611e-08, + "loss": 0.0557, + "num_input_tokens_seen": 9949056, + "step": 10360 + }, + { + "epoch": 0.879582484725051, + "grad_norm": 15.344569206237793, + "learning_rate": 8.717892751462363e-08, + "loss": 0.0435, + "num_input_tokens_seen": 9954176, + "step": 10365 + }, + { + "epoch": 0.8800067888662594, + "grad_norm": 15.17747974395752, + "learning_rate": 8.657507285521281e-08, + "loss": 0.0449, + "num_input_tokens_seen": 9958912, + "step": 10370 + }, + { + "epoch": 0.8804310930074678, + "grad_norm": 12.388559341430664, + "learning_rate": 8.597322216567493e-08, + "loss": 0.0793, + "num_input_tokens_seen": 9963648, + "step": 10375 + }, + { + "epoch": 0.8808553971486762, + "grad_norm": 13.636301040649414, + "learning_rate": 8.537337676641442e-08, + "loss": 0.0145, + "num_input_tokens_seen": 9968256, + "step": 10380 + }, + { + "epoch": 0.8812797012898846, + "grad_norm": 0.26373496651649475, + "learning_rate": 8.477553797343728e-08, + "loss": 0.0439, + "num_input_tokens_seen": 9973376, + "step": 10385 + }, + { + "epoch": 0.881704005431093, + "grad_norm": 20.62845230102539, + "learning_rate": 8.41797070983461e-08, + "loss": 0.0804, + "num_input_tokens_seen": 9978240, + "step": 10390 + }, + { + "epoch": 0.8821283095723014, + "grad_norm": 0.17017631232738495, + "learning_rate": 8.358588544833877e-08, + "loss": 0.003, + "num_input_tokens_seen": 9982784, + "step": 10395 + }, + { + "epoch": 0.8825526137135098, + "grad_norm": 5.08088493347168, + "learning_rate": 8.29940743262052e-08, + "loss": 0.0894, + "num_input_tokens_seen": 9987008, + "step": 10400 + }, + { + "epoch": 0.8829769178547182, + "grad_norm": 11.488354682922363, + "learning_rate": 8.240427503032443e-08, + "loss": 0.0803, + "num_input_tokens_seen": 9992640, + "step": 10405 + }, + { + "epoch": 0.8834012219959266, + "grad_norm": 0.7701570987701416, + "learning_rate": 8.181648885466141e-08, + "loss": 0.0592, + "num_input_tokens_seen": 9996672, + "step": 10410 + }, + { + "epoch": 0.883825526137135, + "grad_norm": 5.4983601570129395, + "learning_rate": 8.123071708876473e-08, + "loss": 0.07, + "num_input_tokens_seen": 10001216, + "step": 10415 + }, + { + "epoch": 0.8842498302783435, + "grad_norm": 2.3859031200408936, + "learning_rate": 8.064696101776358e-08, + "loss": 0.0299, + "num_input_tokens_seen": 10006144, + "step": 10420 + }, + { + "epoch": 0.8846741344195519, + "grad_norm": 23.999298095703125, + "learning_rate": 8.006522192236487e-08, + "loss": 0.0395, + "num_input_tokens_seen": 10011520, + "step": 10425 + }, + { + "epoch": 0.8850984385607603, + "grad_norm": 1.3761314153671265, + "learning_rate": 7.948550107885043e-08, + "loss": 0.0071, + "num_input_tokens_seen": 10016512, + "step": 10430 + }, + { + "epoch": 0.8855227427019687, + "grad_norm": 33.45118713378906, + "learning_rate": 7.89077997590738e-08, + "loss": 0.0866, + "num_input_tokens_seen": 10020928, + "step": 10435 + }, + { + "epoch": 0.8859470468431772, + "grad_norm": 1.5158158540725708, + "learning_rate": 7.833211923045891e-08, + "loss": 0.0255, + "num_input_tokens_seen": 10025920, + "step": 10440 + }, + { + "epoch": 0.8863713509843856, + "grad_norm": 14.920109748840332, + "learning_rate": 7.775846075599524e-08, + "loss": 0.0434, + "num_input_tokens_seen": 10030464, + "step": 10445 + }, + { + "epoch": 0.886795655125594, + "grad_norm": 4.014338970184326, + "learning_rate": 7.718682559423651e-08, + "loss": 0.0498, + "num_input_tokens_seen": 10035328, + "step": 10450 + }, + { + "epoch": 0.8872199592668024, + "grad_norm": 30.575336456298828, + "learning_rate": 7.661721499929752e-08, + "loss": 0.0692, + "num_input_tokens_seen": 10040384, + "step": 10455 + }, + { + "epoch": 0.8876442634080108, + "grad_norm": 0.6011759042739868, + "learning_rate": 7.60496302208512e-08, + "loss": 0.0334, + "num_input_tokens_seen": 10045440, + "step": 10460 + }, + { + "epoch": 0.8880685675492193, + "grad_norm": 5.954895973205566, + "learning_rate": 7.548407250412614e-08, + "loss": 0.0272, + "num_input_tokens_seen": 10050432, + "step": 10465 + }, + { + "epoch": 0.8884928716904277, + "grad_norm": 23.370336532592773, + "learning_rate": 7.492054308990381e-08, + "loss": 0.0403, + "num_input_tokens_seen": 10055296, + "step": 10470 + }, + { + "epoch": 0.8889171758316361, + "grad_norm": 15.24754810333252, + "learning_rate": 7.435904321451524e-08, + "loss": 0.0513, + "num_input_tokens_seen": 10060416, + "step": 10475 + }, + { + "epoch": 0.8893414799728445, + "grad_norm": 0.8229270577430725, + "learning_rate": 7.379957410983995e-08, + "loss": 0.0397, + "num_input_tokens_seen": 10065472, + "step": 10480 + }, + { + "epoch": 0.8897657841140529, + "grad_norm": 9.5431489944458, + "learning_rate": 7.324213700330095e-08, + "loss": 0.0267, + "num_input_tokens_seen": 10070784, + "step": 10485 + }, + { + "epoch": 0.8901900882552614, + "grad_norm": 0.07072459161281586, + "learning_rate": 7.268673311786378e-08, + "loss": 0.0202, + "num_input_tokens_seen": 10075904, + "step": 10490 + }, + { + "epoch": 0.8906143923964698, + "grad_norm": 10.91126823425293, + "learning_rate": 7.213336367203338e-08, + "loss": 0.0229, + "num_input_tokens_seen": 10080768, + "step": 10495 + }, + { + "epoch": 0.8910386965376782, + "grad_norm": 0.6212610006332397, + "learning_rate": 7.158202987985106e-08, + "loss": 0.0579, + "num_input_tokens_seen": 10085312, + "step": 10500 + }, + { + "epoch": 0.8914630006788866, + "grad_norm": 0.43236419558525085, + "learning_rate": 7.10327329508923e-08, + "loss": 0.1098, + "num_input_tokens_seen": 10089792, + "step": 10505 + }, + { + "epoch": 0.891887304820095, + "grad_norm": 7.02418851852417, + "learning_rate": 7.048547409026384e-08, + "loss": 0.0712, + "num_input_tokens_seen": 10094976, + "step": 10510 + }, + { + "epoch": 0.8923116089613035, + "grad_norm": 0.24239858984947205, + "learning_rate": 6.994025449860064e-08, + "loss": 0.0331, + "num_input_tokens_seen": 10099200, + "step": 10515 + }, + { + "epoch": 0.8927359131025119, + "grad_norm": 23.110071182250977, + "learning_rate": 6.939707537206485e-08, + "loss": 0.075, + "num_input_tokens_seen": 10104320, + "step": 10520 + }, + { + "epoch": 0.8931602172437203, + "grad_norm": 0.2738831043243408, + "learning_rate": 6.885593790234056e-08, + "loss": 0.0464, + "num_input_tokens_seen": 10109312, + "step": 10525 + }, + { + "epoch": 0.8935845213849287, + "grad_norm": 0.8342077136039734, + "learning_rate": 6.831684327663367e-08, + "loss": 0.0293, + "num_input_tokens_seen": 10113600, + "step": 10530 + }, + { + "epoch": 0.8940088255261371, + "grad_norm": 6.28220272064209, + "learning_rate": 6.777979267766786e-08, + "loss": 0.0048, + "num_input_tokens_seen": 10118272, + "step": 10535 + }, + { + "epoch": 0.8944331296673456, + "grad_norm": 28.221162796020508, + "learning_rate": 6.724478728368277e-08, + "loss": 0.059, + "num_input_tokens_seen": 10122688, + "step": 10540 + }, + { + "epoch": 0.894857433808554, + "grad_norm": 5.227849960327148, + "learning_rate": 6.671182826843047e-08, + "loss": 0.1179, + "num_input_tokens_seen": 10128000, + "step": 10545 + }, + { + "epoch": 0.8952817379497624, + "grad_norm": 6.202547073364258, + "learning_rate": 6.618091680117399e-08, + "loss": 0.0508, + "num_input_tokens_seen": 10132544, + "step": 10550 + }, + { + "epoch": 0.8957060420909708, + "grad_norm": 7.8940629959106445, + "learning_rate": 6.565205404668395e-08, + "loss": 0.0739, + "num_input_tokens_seen": 10138496, + "step": 10555 + }, + { + "epoch": 0.8961303462321792, + "grad_norm": 29.633386611938477, + "learning_rate": 6.512524116523633e-08, + "loss": 0.0383, + "num_input_tokens_seen": 10143424, + "step": 10560 + }, + { + "epoch": 0.8965546503733877, + "grad_norm": 23.16342544555664, + "learning_rate": 6.460047931261003e-08, + "loss": 0.0861, + "num_input_tokens_seen": 10148672, + "step": 10565 + }, + { + "epoch": 0.8969789545145961, + "grad_norm": 7.808042526245117, + "learning_rate": 6.407776964008383e-08, + "loss": 0.0124, + "num_input_tokens_seen": 10153408, + "step": 10570 + }, + { + "epoch": 0.8974032586558045, + "grad_norm": 0.7497434020042419, + "learning_rate": 6.355711329443481e-08, + "loss": 0.055, + "num_input_tokens_seen": 10157952, + "step": 10575 + }, + { + "epoch": 0.8978275627970129, + "grad_norm": 0.13198748230934143, + "learning_rate": 6.303851141793437e-08, + "loss": 0.0355, + "num_input_tokens_seen": 10163712, + "step": 10580 + }, + { + "epoch": 0.8982518669382213, + "grad_norm": 18.96082305908203, + "learning_rate": 6.252196514834751e-08, + "loss": 0.0312, + "num_input_tokens_seen": 10168512, + "step": 10585 + }, + { + "epoch": 0.8986761710794298, + "grad_norm": 33.99904251098633, + "learning_rate": 6.200747561892882e-08, + "loss": 0.0404, + "num_input_tokens_seen": 10173120, + "step": 10590 + }, + { + "epoch": 0.8991004752206382, + "grad_norm": 5.6009368896484375, + "learning_rate": 6.149504395842087e-08, + "loss": 0.0617, + "num_input_tokens_seen": 10177856, + "step": 10595 + }, + { + "epoch": 0.8995247793618466, + "grad_norm": 21.8309383392334, + "learning_rate": 6.098467129105123e-08, + "loss": 0.0328, + "num_input_tokens_seen": 10182080, + "step": 10600 + }, + { + "epoch": 0.899949083503055, + "grad_norm": 27.717575073242188, + "learning_rate": 6.047635873653068e-08, + "loss": 0.0675, + "num_input_tokens_seen": 10186496, + "step": 10605 + }, + { + "epoch": 0.9003733876442634, + "grad_norm": 20.762332916259766, + "learning_rate": 5.997010741004949e-08, + "loss": 0.0368, + "num_input_tokens_seen": 10190912, + "step": 10610 + }, + { + "epoch": 0.9007976917854719, + "grad_norm": 27.49617576599121, + "learning_rate": 5.946591842227677e-08, + "loss": 0.0525, + "num_input_tokens_seen": 10195008, + "step": 10615 + }, + { + "epoch": 0.9012219959266803, + "grad_norm": 9.339949607849121, + "learning_rate": 5.8963792879356265e-08, + "loss": 0.0855, + "num_input_tokens_seen": 10199424, + "step": 10620 + }, + { + "epoch": 0.9012219959266803, + "eval_loss": 0.05229973420500755, + "eval_runtime": 15.7294, + "eval_samples_per_second": 665.95, + "eval_steps_per_second": 83.284, + "num_input_tokens_seen": 10199424, + "step": 10620 + }, + { + "epoch": 0.9016463000678887, + "grad_norm": 14.488176345825195, + "learning_rate": 5.84637318829051e-08, + "loss": 0.1033, + "num_input_tokens_seen": 10203968, + "step": 10625 + }, + { + "epoch": 0.9020706042090971, + "grad_norm": 37.42259216308594, + "learning_rate": 5.796573653001091e-08, + "loss": 0.0844, + "num_input_tokens_seen": 10208640, + "step": 10630 + }, + { + "epoch": 0.9024949083503055, + "grad_norm": 0.3053247332572937, + "learning_rate": 5.746980791322942e-08, + "loss": 0.0136, + "num_input_tokens_seen": 10213504, + "step": 10635 + }, + { + "epoch": 0.902919212491514, + "grad_norm": 9.757519721984863, + "learning_rate": 5.697594712058218e-08, + "loss": 0.0535, + "num_input_tokens_seen": 10218432, + "step": 10640 + }, + { + "epoch": 0.9033435166327224, + "grad_norm": 0.3384649157524109, + "learning_rate": 5.6484155235554275e-08, + "loss": 0.09, + "num_input_tokens_seen": 10223296, + "step": 10645 + }, + { + "epoch": 0.9037678207739308, + "grad_norm": 9.248594284057617, + "learning_rate": 5.599443333709131e-08, + "loss": 0.1227, + "num_input_tokens_seen": 10227904, + "step": 10650 + }, + { + "epoch": 0.9041921249151391, + "grad_norm": 0.7518671154975891, + "learning_rate": 5.5506782499598394e-08, + "loss": 0.0481, + "num_input_tokens_seen": 10232640, + "step": 10655 + }, + { + "epoch": 0.9046164290563475, + "grad_norm": 22.829261779785156, + "learning_rate": 5.502120379293585e-08, + "loss": 0.0891, + "num_input_tokens_seen": 10236864, + "step": 10660 + }, + { + "epoch": 0.905040733197556, + "grad_norm": 0.5002491474151611, + "learning_rate": 5.453769828241872e-08, + "loss": 0.0525, + "num_input_tokens_seen": 10241216, + "step": 10665 + }, + { + "epoch": 0.9054650373387644, + "grad_norm": 13.650254249572754, + "learning_rate": 5.4056267028813606e-08, + "loss": 0.0544, + "num_input_tokens_seen": 10245952, + "step": 10670 + }, + { + "epoch": 0.9058893414799728, + "grad_norm": 41.74579620361328, + "learning_rate": 5.357691108833584e-08, + "loss": 0.0321, + "num_input_tokens_seen": 10251392, + "step": 10675 + }, + { + "epoch": 0.9063136456211812, + "grad_norm": 5.183800220489502, + "learning_rate": 5.309963151264829e-08, + "loss": 0.1047, + "num_input_tokens_seen": 10256128, + "step": 10680 + }, + { + "epoch": 0.9067379497623896, + "grad_norm": 3.764021396636963, + "learning_rate": 5.262442934885813e-08, + "loss": 0.043, + "num_input_tokens_seen": 10260352, + "step": 10685 + }, + { + "epoch": 0.9071622539035981, + "grad_norm": 0.09938930720090866, + "learning_rate": 5.21513056395152e-08, + "loss": 0.0157, + "num_input_tokens_seen": 10265344, + "step": 10690 + }, + { + "epoch": 0.9075865580448065, + "grad_norm": 2.3056552410125732, + "learning_rate": 5.168026142260862e-08, + "loss": 0.0575, + "num_input_tokens_seen": 10269632, + "step": 10695 + }, + { + "epoch": 0.9080108621860149, + "grad_norm": 13.903623580932617, + "learning_rate": 5.121129773156663e-08, + "loss": 0.0206, + "num_input_tokens_seen": 10274560, + "step": 10700 + }, + { + "epoch": 0.9084351663272233, + "grad_norm": 0.24791185557842255, + "learning_rate": 5.074441559525167e-08, + "loss": 0.0327, + "num_input_tokens_seen": 10279552, + "step": 10705 + }, + { + "epoch": 0.9088594704684317, + "grad_norm": 0.47743096947669983, + "learning_rate": 5.027961603796027e-08, + "loss": 0.0499, + "num_input_tokens_seen": 10284288, + "step": 10710 + }, + { + "epoch": 0.9092837746096402, + "grad_norm": 15.714171409606934, + "learning_rate": 4.981690007941952e-08, + "loss": 0.0414, + "num_input_tokens_seen": 10289600, + "step": 10715 + }, + { + "epoch": 0.9097080787508486, + "grad_norm": 4.764758586883545, + "learning_rate": 4.93562687347856e-08, + "loss": 0.0168, + "num_input_tokens_seen": 10294144, + "step": 10720 + }, + { + "epoch": 0.910132382892057, + "grad_norm": 8.239587783813477, + "learning_rate": 4.889772301464112e-08, + "loss": 0.0495, + "num_input_tokens_seen": 10298752, + "step": 10725 + }, + { + "epoch": 0.9105566870332654, + "grad_norm": 13.610047340393066, + "learning_rate": 4.844126392499304e-08, + "loss": 0.0437, + "num_input_tokens_seen": 10303424, + "step": 10730 + }, + { + "epoch": 0.9109809911744738, + "grad_norm": 23.149311065673828, + "learning_rate": 4.7986892467270057e-08, + "loss": 0.0589, + "num_input_tokens_seen": 10308096, + "step": 10735 + }, + { + "epoch": 0.9114052953156823, + "grad_norm": 6.693690299987793, + "learning_rate": 4.7534609638321785e-08, + "loss": 0.0508, + "num_input_tokens_seen": 10313152, + "step": 10740 + }, + { + "epoch": 0.9118295994568907, + "grad_norm": 11.246514320373535, + "learning_rate": 4.70844164304145e-08, + "loss": 0.0348, + "num_input_tokens_seen": 10318016, + "step": 10745 + }, + { + "epoch": 0.9122539035980991, + "grad_norm": 0.46989282965660095, + "learning_rate": 4.663631383123057e-08, + "loss": 0.0304, + "num_input_tokens_seen": 10322432, + "step": 10750 + }, + { + "epoch": 0.9126782077393075, + "grad_norm": 0.2042643278837204, + "learning_rate": 4.61903028238656e-08, + "loss": 0.0115, + "num_input_tokens_seen": 10327168, + "step": 10755 + }, + { + "epoch": 0.9131025118805159, + "grad_norm": 1.085499882698059, + "learning_rate": 4.5746384386826767e-08, + "loss": 0.0781, + "num_input_tokens_seen": 10331584, + "step": 10760 + }, + { + "epoch": 0.9135268160217244, + "grad_norm": 0.2616358697414398, + "learning_rate": 4.5304559494030004e-08, + "loss": 0.0269, + "num_input_tokens_seen": 10336640, + "step": 10765 + }, + { + "epoch": 0.9139511201629328, + "grad_norm": 9.98695182800293, + "learning_rate": 4.486482911479839e-08, + "loss": 0.0617, + "num_input_tokens_seen": 10341440, + "step": 10770 + }, + { + "epoch": 0.9143754243041412, + "grad_norm": 2.2652738094329834, + "learning_rate": 4.442719421385921e-08, + "loss": 0.0104, + "num_input_tokens_seen": 10346624, + "step": 10775 + }, + { + "epoch": 0.9147997284453496, + "grad_norm": 0.2195603996515274, + "learning_rate": 4.399165575134378e-08, + "loss": 0.0092, + "num_input_tokens_seen": 10351552, + "step": 10780 + }, + { + "epoch": 0.915224032586558, + "grad_norm": 9.171103477478027, + "learning_rate": 4.3558214682782645e-08, + "loss": 0.0601, + "num_input_tokens_seen": 10356352, + "step": 10785 + }, + { + "epoch": 0.9156483367277665, + "grad_norm": 0.11924657225608826, + "learning_rate": 4.312687195910558e-08, + "loss": 0.0717, + "num_input_tokens_seen": 10361920, + "step": 10790 + }, + { + "epoch": 0.9160726408689749, + "grad_norm": 5.790630340576172, + "learning_rate": 4.269762852663894e-08, + "loss": 0.0724, + "num_input_tokens_seen": 10366272, + "step": 10795 + }, + { + "epoch": 0.9164969450101833, + "grad_norm": 18.95665168762207, + "learning_rate": 4.227048532710287e-08, + "loss": 0.0167, + "num_input_tokens_seen": 10371328, + "step": 10800 + }, + { + "epoch": 0.9169212491513917, + "grad_norm": 24.164165496826172, + "learning_rate": 4.184544329761008e-08, + "loss": 0.0604, + "num_input_tokens_seen": 10376384, + "step": 10805 + }, + { + "epoch": 0.9173455532926001, + "grad_norm": 6.414123058319092, + "learning_rate": 4.1422503370663553e-08, + "loss": 0.0687, + "num_input_tokens_seen": 10381120, + "step": 10810 + }, + { + "epoch": 0.9177698574338086, + "grad_norm": 0.15381702780723572, + "learning_rate": 4.100166647415437e-08, + "loss": 0.0225, + "num_input_tokens_seen": 10385536, + "step": 10815 + }, + { + "epoch": 0.918194161575017, + "grad_norm": 1.0370328426361084, + "learning_rate": 4.058293353135988e-08, + "loss": 0.0188, + "num_input_tokens_seen": 10390208, + "step": 10820 + }, + { + "epoch": 0.9186184657162254, + "grad_norm": 0.18410193920135498, + "learning_rate": 4.016630546094158e-08, + "loss": 0.051, + "num_input_tokens_seen": 10394560, + "step": 10825 + }, + { + "epoch": 0.9190427698574338, + "grad_norm": 1.0141817331314087, + "learning_rate": 3.975178317694239e-08, + "loss": 0.0707, + "num_input_tokens_seen": 10398848, + "step": 10830 + }, + { + "epoch": 0.9194670739986422, + "grad_norm": 11.05111026763916, + "learning_rate": 3.9339367588786644e-08, + "loss": 0.036, + "num_input_tokens_seen": 10404160, + "step": 10835 + }, + { + "epoch": 0.9198913781398507, + "grad_norm": 6.632215976715088, + "learning_rate": 3.892905960127546e-08, + "loss": 0.051, + "num_input_tokens_seen": 10408704, + "step": 10840 + }, + { + "epoch": 0.9203156822810591, + "grad_norm": 11.019655227661133, + "learning_rate": 3.852086011458688e-08, + "loss": 0.0547, + "num_input_tokens_seen": 10413312, + "step": 10845 + }, + { + "epoch": 0.9207399864222675, + "grad_norm": 26.46409797668457, + "learning_rate": 3.811477002427288e-08, + "loss": 0.048, + "num_input_tokens_seen": 10418048, + "step": 10850 + }, + { + "epoch": 0.9211642905634759, + "grad_norm": 0.4152030348777771, + "learning_rate": 3.771079022125745e-08, + "loss": 0.0239, + "num_input_tokens_seen": 10422464, + "step": 10855 + }, + { + "epoch": 0.9215885947046843, + "grad_norm": 33.51541519165039, + "learning_rate": 3.7308921591835074e-08, + "loss": 0.1148, + "num_input_tokens_seen": 10426880, + "step": 10860 + }, + { + "epoch": 0.9220128988458928, + "grad_norm": 16.225305557250977, + "learning_rate": 3.6909165017668385e-08, + "loss": 0.1021, + "num_input_tokens_seen": 10431232, + "step": 10865 + }, + { + "epoch": 0.9224372029871012, + "grad_norm": 26.616647720336914, + "learning_rate": 3.651152137578617e-08, + "loss": 0.1128, + "num_input_tokens_seen": 10437120, + "step": 10870 + }, + { + "epoch": 0.9228615071283096, + "grad_norm": 12.144709587097168, + "learning_rate": 3.611599153858214e-08, + "loss": 0.1289, + "num_input_tokens_seen": 10443456, + "step": 10875 + }, + { + "epoch": 0.923285811269518, + "grad_norm": 6.237645149230957, + "learning_rate": 3.572257637381182e-08, + "loss": 0.0611, + "num_input_tokens_seen": 10448576, + "step": 10880 + }, + { + "epoch": 0.9237101154107265, + "grad_norm": 2.817307472229004, + "learning_rate": 3.533127674459202e-08, + "loss": 0.0389, + "num_input_tokens_seen": 10453440, + "step": 10885 + }, + { + "epoch": 0.9241344195519349, + "grad_norm": 0.3340953290462494, + "learning_rate": 3.494209350939792e-08, + "loss": 0.0268, + "num_input_tokens_seen": 10458176, + "step": 10890 + }, + { + "epoch": 0.9245587236931433, + "grad_norm": 10.947976112365723, + "learning_rate": 3.455502752206152e-08, + "loss": 0.0594, + "num_input_tokens_seen": 10463232, + "step": 10895 + }, + { + "epoch": 0.9249830278343517, + "grad_norm": 0.28482815623283386, + "learning_rate": 3.4170079631769764e-08, + "loss": 0.0892, + "num_input_tokens_seen": 10468032, + "step": 10900 + }, + { + "epoch": 0.9254073319755601, + "grad_norm": 4.82904052734375, + "learning_rate": 3.378725068306298e-08, + "loss": 0.0135, + "num_input_tokens_seen": 10472832, + "step": 10905 + }, + { + "epoch": 0.9258316361167686, + "grad_norm": 0.40435436367988586, + "learning_rate": 3.3406541515832e-08, + "loss": 0.0199, + "num_input_tokens_seen": 10477248, + "step": 10910 + }, + { + "epoch": 0.926255940257977, + "grad_norm": 19.899160385131836, + "learning_rate": 3.302795296531813e-08, + "loss": 0.057, + "num_input_tokens_seen": 10481920, + "step": 10915 + }, + { + "epoch": 0.9266802443991853, + "grad_norm": 6.992005825042725, + "learning_rate": 3.265148586210942e-08, + "loss": 0.0741, + "num_input_tokens_seen": 10486976, + "step": 10920 + }, + { + "epoch": 0.9271045485403937, + "grad_norm": 31.746776580810547, + "learning_rate": 3.2277141032139746e-08, + "loss": 0.0575, + "num_input_tokens_seen": 10491712, + "step": 10925 + }, + { + "epoch": 0.9275288526816021, + "grad_norm": 35.81395721435547, + "learning_rate": 3.190491929668748e-08, + "loss": 0.0889, + "num_input_tokens_seen": 10496960, + "step": 10930 + }, + { + "epoch": 0.9279531568228105, + "grad_norm": 13.375768661499023, + "learning_rate": 3.15348214723723e-08, + "loss": 0.0602, + "num_input_tokens_seen": 10501120, + "step": 10935 + }, + { + "epoch": 0.928377460964019, + "grad_norm": 0.4103608727455139, + "learning_rate": 3.11668483711548e-08, + "loss": 0.0228, + "num_input_tokens_seen": 10507008, + "step": 10940 + }, + { + "epoch": 0.9288017651052274, + "grad_norm": 18.951581954956055, + "learning_rate": 3.0801000800333876e-08, + "loss": 0.0284, + "num_input_tokens_seen": 10511424, + "step": 10945 + }, + { + "epoch": 0.9292260692464358, + "grad_norm": 38.00094985961914, + "learning_rate": 3.043727956254538e-08, + "loss": 0.0394, + "num_input_tokens_seen": 10515968, + "step": 10950 + }, + { + "epoch": 0.9296503733876442, + "grad_norm": 37.419803619384766, + "learning_rate": 3.007568545576011e-08, + "loss": 0.0882, + "num_input_tokens_seen": 10520576, + "step": 10955 + }, + { + "epoch": 0.9300746775288526, + "grad_norm": 1.1467417478561401, + "learning_rate": 2.971621927328216e-08, + "loss": 0.0491, + "num_input_tokens_seen": 10525504, + "step": 10960 + }, + { + "epoch": 0.9304989816700611, + "grad_norm": 0.40472060441970825, + "learning_rate": 2.9358881803746794e-08, + "loss": 0.1112, + "num_input_tokens_seen": 10530112, + "step": 10965 + }, + { + "epoch": 0.9309232858112695, + "grad_norm": 0.4380423426628113, + "learning_rate": 2.900367383111979e-08, + "loss": 0.0864, + "num_input_tokens_seen": 10535232, + "step": 10970 + }, + { + "epoch": 0.9313475899524779, + "grad_norm": 18.734893798828125, + "learning_rate": 2.865059613469434e-08, + "loss": 0.0258, + "num_input_tokens_seen": 10539712, + "step": 10975 + }, + { + "epoch": 0.9317718940936863, + "grad_norm": 2.202915906906128, + "learning_rate": 2.829964948909047e-08, + "loss": 0.057, + "num_input_tokens_seen": 10544128, + "step": 10980 + }, + { + "epoch": 0.9321961982348947, + "grad_norm": 9.770487785339355, + "learning_rate": 2.795083466425252e-08, + "loss": 0.0174, + "num_input_tokens_seen": 10548288, + "step": 10985 + }, + { + "epoch": 0.9326205023761032, + "grad_norm": 29.964080810546875, + "learning_rate": 2.760415242544811e-08, + "loss": 0.0176, + "num_input_tokens_seen": 10552512, + "step": 10990 + }, + { + "epoch": 0.9330448065173116, + "grad_norm": 1.10525381565094, + "learning_rate": 2.7259603533266063e-08, + "loss": 0.0617, + "num_input_tokens_seen": 10557952, + "step": 10995 + }, + { + "epoch": 0.93346911065852, + "grad_norm": 0.8512893319129944, + "learning_rate": 2.6917188743614704e-08, + "loss": 0.0529, + "num_input_tokens_seen": 10562240, + "step": 11000 + }, + { + "epoch": 0.9338934147997284, + "grad_norm": 3.778372049331665, + "learning_rate": 2.6576908807720233e-08, + "loss": 0.0832, + "num_input_tokens_seen": 10566976, + "step": 11005 + }, + { + "epoch": 0.9343177189409368, + "grad_norm": 22.075092315673828, + "learning_rate": 2.623876447212592e-08, + "loss": 0.0393, + "num_input_tokens_seen": 10571584, + "step": 11010 + }, + { + "epoch": 0.9347420230821453, + "grad_norm": 16.559036254882812, + "learning_rate": 2.590275647868867e-08, + "loss": 0.082, + "num_input_tokens_seen": 10576832, + "step": 11015 + }, + { + "epoch": 0.9351663272233537, + "grad_norm": 30.172012329101562, + "learning_rate": 2.5568885564579258e-08, + "loss": 0.0305, + "num_input_tokens_seen": 10581184, + "step": 11020 + }, + { + "epoch": 0.9355906313645621, + "grad_norm": 3.05375337600708, + "learning_rate": 2.5237152462279532e-08, + "loss": 0.0419, + "num_input_tokens_seen": 10585792, + "step": 11025 + }, + { + "epoch": 0.9360149355057705, + "grad_norm": 0.48022231459617615, + "learning_rate": 2.4907557899581212e-08, + "loss": 0.0216, + "num_input_tokens_seen": 10590016, + "step": 11030 + }, + { + "epoch": 0.936439239646979, + "grad_norm": 1.483380913734436, + "learning_rate": 2.4580102599584317e-08, + "loss": 0.0183, + "num_input_tokens_seen": 10594624, + "step": 11035 + }, + { + "epoch": 0.9368635437881874, + "grad_norm": 13.737578392028809, + "learning_rate": 2.425478728069552e-08, + "loss": 0.1089, + "num_input_tokens_seen": 10598912, + "step": 11040 + }, + { + "epoch": 0.9372878479293958, + "grad_norm": 1.424195647239685, + "learning_rate": 2.3931612656626688e-08, + "loss": 0.0255, + "num_input_tokens_seen": 10603648, + "step": 11045 + }, + { + "epoch": 0.9377121520706042, + "grad_norm": 4.536886692047119, + "learning_rate": 2.3610579436392996e-08, + "loss": 0.0221, + "num_input_tokens_seen": 10608640, + "step": 11050 + }, + { + "epoch": 0.9381364562118126, + "grad_norm": 18.450544357299805, + "learning_rate": 2.329168832431161e-08, + "loss": 0.1013, + "num_input_tokens_seen": 10613312, + "step": 11055 + }, + { + "epoch": 0.938560760353021, + "grad_norm": 0.3562639057636261, + "learning_rate": 2.2974940020000112e-08, + "loss": 0.0113, + "num_input_tokens_seen": 10617600, + "step": 11060 + }, + { + "epoch": 0.9389850644942295, + "grad_norm": 27.98299789428711, + "learning_rate": 2.266033521837529e-08, + "loss": 0.109, + "num_input_tokens_seen": 10622144, + "step": 11065 + }, + { + "epoch": 0.9394093686354379, + "grad_norm": 1.067746877670288, + "learning_rate": 2.2347874609650596e-08, + "loss": 0.0026, + "num_input_tokens_seen": 10626880, + "step": 11070 + }, + { + "epoch": 0.9398336727766463, + "grad_norm": 27.640390396118164, + "learning_rate": 2.2037558879336004e-08, + "loss": 0.0656, + "num_input_tokens_seen": 10632128, + "step": 11075 + }, + { + "epoch": 0.9402579769178547, + "grad_norm": 4.924507141113281, + "learning_rate": 2.1729388708235485e-08, + "loss": 0.0192, + "num_input_tokens_seen": 10636800, + "step": 11080 + }, + { + "epoch": 0.9406822810590632, + "grad_norm": 0.15182463824748993, + "learning_rate": 2.1423364772445886e-08, + "loss": 0.0352, + "num_input_tokens_seen": 10641408, + "step": 11085 + }, + { + "epoch": 0.9411065852002716, + "grad_norm": 5.914322853088379, + "learning_rate": 2.111948774335548e-08, + "loss": 0.0058, + "num_input_tokens_seen": 10646400, + "step": 11090 + }, + { + "epoch": 0.94153088934148, + "grad_norm": 31.913299560546875, + "learning_rate": 2.081775828764254e-08, + "loss": 0.1124, + "num_input_tokens_seen": 10650816, + "step": 11095 + }, + { + "epoch": 0.9419551934826884, + "grad_norm": 0.4387105703353882, + "learning_rate": 2.0518177067273103e-08, + "loss": 0.0133, + "num_input_tokens_seen": 10655424, + "step": 11100 + }, + { + "epoch": 0.9423794976238968, + "grad_norm": 1.6567003726959229, + "learning_rate": 2.0220744739501305e-08, + "loss": 0.0908, + "num_input_tokens_seen": 10660416, + "step": 11105 + }, + { + "epoch": 0.9428038017651053, + "grad_norm": 6.256274700164795, + "learning_rate": 1.992546195686573e-08, + "loss": 0.0695, + "num_input_tokens_seen": 10665088, + "step": 11110 + }, + { + "epoch": 0.9432281059063137, + "grad_norm": 0.11509720981121063, + "learning_rate": 1.9632329367189725e-08, + "loss": 0.0392, + "num_input_tokens_seen": 10669632, + "step": 11115 + }, + { + "epoch": 0.9436524100475221, + "grad_norm": 14.866227149963379, + "learning_rate": 1.9341347613579086e-08, + "loss": 0.0275, + "num_input_tokens_seen": 10674752, + "step": 11120 + }, + { + "epoch": 0.9440767141887305, + "grad_norm": 0.6725902557373047, + "learning_rate": 1.9052517334420704e-08, + "loss": 0.0512, + "num_input_tokens_seen": 10679296, + "step": 11125 + }, + { + "epoch": 0.9445010183299389, + "grad_norm": 25.810312271118164, + "learning_rate": 1.8765839163381815e-08, + "loss": 0.0959, + "num_input_tokens_seen": 10683968, + "step": 11130 + }, + { + "epoch": 0.9449253224711474, + "grad_norm": 0.7116135954856873, + "learning_rate": 1.8481313729407645e-08, + "loss": 0.0589, + "num_input_tokens_seen": 10688512, + "step": 11135 + }, + { + "epoch": 0.9453496266123558, + "grad_norm": 7.129089832305908, + "learning_rate": 1.8198941656720646e-08, + "loss": 0.1141, + "num_input_tokens_seen": 10693312, + "step": 11140 + }, + { + "epoch": 0.9457739307535642, + "grad_norm": 28.044822692871094, + "learning_rate": 1.7918723564819272e-08, + "loss": 0.105, + "num_input_tokens_seen": 10698688, + "step": 11145 + }, + { + "epoch": 0.9461982348947726, + "grad_norm": 14.497418403625488, + "learning_rate": 1.7640660068475976e-08, + "loss": 0.1199, + "num_input_tokens_seen": 10704448, + "step": 11150 + }, + { + "epoch": 0.946622539035981, + "grad_norm": 0.1703585833311081, + "learning_rate": 1.7364751777736332e-08, + "loss": 0.0696, + "num_input_tokens_seen": 10709312, + "step": 11155 + }, + { + "epoch": 0.9470468431771895, + "grad_norm": 10.870919227600098, + "learning_rate": 1.7090999297917684e-08, + "loss": 0.0331, + "num_input_tokens_seen": 10714368, + "step": 11160 + }, + { + "epoch": 0.9474711473183979, + "grad_norm": 61.19886779785156, + "learning_rate": 1.6819403229607732e-08, + "loss": 0.0659, + "num_input_tokens_seen": 10718848, + "step": 11165 + }, + { + "epoch": 0.9478954514596063, + "grad_norm": 0.1849125176668167, + "learning_rate": 1.6549964168663054e-08, + "loss": 0.0355, + "num_input_tokens_seen": 10723712, + "step": 11170 + }, + { + "epoch": 0.9483197556008147, + "grad_norm": 7.6935601234436035, + "learning_rate": 1.6282682706208028e-08, + "loss": 0.0551, + "num_input_tokens_seen": 10728384, + "step": 11175 + }, + { + "epoch": 0.948744059742023, + "grad_norm": 0.25422102212905884, + "learning_rate": 1.6017559428633588e-08, + "loss": 0.0233, + "num_input_tokens_seen": 10733632, + "step": 11180 + }, + { + "epoch": 0.9491683638832314, + "grad_norm": 0.24908825755119324, + "learning_rate": 1.5754594917595564e-08, + "loss": 0.0556, + "num_input_tokens_seen": 10738880, + "step": 11185 + }, + { + "epoch": 0.9495926680244399, + "grad_norm": 19.16463851928711, + "learning_rate": 1.549378975001403e-08, + "loss": 0.0574, + "num_input_tokens_seen": 10744384, + "step": 11190 + }, + { + "epoch": 0.9500169721656483, + "grad_norm": 16.46047592163086, + "learning_rate": 1.5235144498071172e-08, + "loss": 0.0319, + "num_input_tokens_seen": 10749632, + "step": 11195 + }, + { + "epoch": 0.9504412763068567, + "grad_norm": 17.636735916137695, + "learning_rate": 1.4978659729210974e-08, + "loss": 0.0301, + "num_input_tokens_seen": 10754624, + "step": 11200 + }, + { + "epoch": 0.9508655804480651, + "grad_norm": 27.047237396240234, + "learning_rate": 1.4724336006137094e-08, + "loss": 0.0761, + "num_input_tokens_seen": 10759488, + "step": 11205 + }, + { + "epoch": 0.9512898845892735, + "grad_norm": 0.19785869121551514, + "learning_rate": 1.4472173886812433e-08, + "loss": 0.004, + "num_input_tokens_seen": 10764096, + "step": 11210 + }, + { + "epoch": 0.9512898845892735, + "eval_loss": 0.05228454992175102, + "eval_runtime": 15.8433, + "eval_samples_per_second": 661.164, + "eval_steps_per_second": 82.685, + "num_input_tokens_seen": 10764096, + "step": 11210 + }, + { + "epoch": 0.951714188730482, + "grad_norm": 10.602087020874023, + "learning_rate": 1.4222173924457348e-08, + "loss": 0.0577, + "num_input_tokens_seen": 10768640, + "step": 11215 + }, + { + "epoch": 0.9521384928716904, + "grad_norm": 0.10275991261005402, + "learning_rate": 1.3974336667548659e-08, + "loss": 0.0544, + "num_input_tokens_seen": 10774016, + "step": 11220 + }, + { + "epoch": 0.9525627970128988, + "grad_norm": 1.062309741973877, + "learning_rate": 1.3728662659818201e-08, + "loss": 0.0036, + "num_input_tokens_seen": 10779072, + "step": 11225 + }, + { + "epoch": 0.9529871011541072, + "grad_norm": 18.538475036621094, + "learning_rate": 1.3485152440252389e-08, + "loss": 0.0487, + "num_input_tokens_seen": 10784000, + "step": 11230 + }, + { + "epoch": 0.9534114052953157, + "grad_norm": 7.267714500427246, + "learning_rate": 1.3243806543089875e-08, + "loss": 0.0644, + "num_input_tokens_seen": 10788928, + "step": 11235 + }, + { + "epoch": 0.9538357094365241, + "grad_norm": 7.134186267852783, + "learning_rate": 1.3004625497821553e-08, + "loss": 0.0275, + "num_input_tokens_seen": 10794304, + "step": 11240 + }, + { + "epoch": 0.9542600135777325, + "grad_norm": 9.111855506896973, + "learning_rate": 1.276760982918812e-08, + "loss": 0.0298, + "num_input_tokens_seen": 10798528, + "step": 11245 + }, + { + "epoch": 0.9546843177189409, + "grad_norm": 0.16917872428894043, + "learning_rate": 1.2532760057180291e-08, + "loss": 0.0547, + "num_input_tokens_seen": 10803200, + "step": 11250 + }, + { + "epoch": 0.9551086218601493, + "grad_norm": 0.395969033241272, + "learning_rate": 1.230007669703681e-08, + "loss": 0.0283, + "num_input_tokens_seen": 10807680, + "step": 11255 + }, + { + "epoch": 0.9555329260013578, + "grad_norm": 0.17550839483737946, + "learning_rate": 1.2069560259243328e-08, + "loss": 0.0351, + "num_input_tokens_seen": 10812224, + "step": 11260 + }, + { + "epoch": 0.9559572301425662, + "grad_norm": 7.8040385246276855, + "learning_rate": 1.1841211249531636e-08, + "loss": 0.045, + "num_input_tokens_seen": 10816640, + "step": 11265 + }, + { + "epoch": 0.9563815342837746, + "grad_norm": 0.16808491945266724, + "learning_rate": 1.1615030168878438e-08, + "loss": 0.0182, + "num_input_tokens_seen": 10821440, + "step": 11270 + }, + { + "epoch": 0.956805838424983, + "grad_norm": 30.00806999206543, + "learning_rate": 1.139101751350402e-08, + "loss": 0.0752, + "num_input_tokens_seen": 10826560, + "step": 11275 + }, + { + "epoch": 0.9572301425661914, + "grad_norm": 21.37195587158203, + "learning_rate": 1.1169173774871477e-08, + "loss": 0.0729, + "num_input_tokens_seen": 10831552, + "step": 11280 + }, + { + "epoch": 0.9576544467073999, + "grad_norm": 0.5756484270095825, + "learning_rate": 1.0949499439685483e-08, + "loss": 0.027, + "num_input_tokens_seen": 10835968, + "step": 11285 + }, + { + "epoch": 0.9580787508486083, + "grad_norm": 11.867304801940918, + "learning_rate": 1.0731994989891302e-08, + "loss": 0.0548, + "num_input_tokens_seen": 10840064, + "step": 11290 + }, + { + "epoch": 0.9585030549898167, + "grad_norm": 7.94635534286499, + "learning_rate": 1.0516660902673446e-08, + "loss": 0.0353, + "num_input_tokens_seen": 10844672, + "step": 11295 + }, + { + "epoch": 0.9589273591310251, + "grad_norm": 0.13016831874847412, + "learning_rate": 1.0303497650455128e-08, + "loss": 0.0655, + "num_input_tokens_seen": 10850304, + "step": 11300 + }, + { + "epoch": 0.9593516632722335, + "grad_norm": 0.8015629649162292, + "learning_rate": 1.0092505700896703e-08, + "loss": 0.0381, + "num_input_tokens_seen": 10854912, + "step": 11305 + }, + { + "epoch": 0.959775967413442, + "grad_norm": 20.08193588256836, + "learning_rate": 9.883685516895113e-09, + "loss": 0.0385, + "num_input_tokens_seen": 10859904, + "step": 11310 + }, + { + "epoch": 0.9602002715546504, + "grad_norm": 9.255372047424316, + "learning_rate": 9.677037556582557e-09, + "loss": 0.0542, + "num_input_tokens_seen": 10864704, + "step": 11315 + }, + { + "epoch": 0.9606245756958588, + "grad_norm": 0.4058516025543213, + "learning_rate": 9.47256227332538e-09, + "loss": 0.0082, + "num_input_tokens_seen": 10869248, + "step": 11320 + }, + { + "epoch": 0.9610488798370672, + "grad_norm": 0.5226470828056335, + "learning_rate": 9.270260115723739e-09, + "loss": 0.0511, + "num_input_tokens_seen": 10873984, + "step": 11325 + }, + { + "epoch": 0.9614731839782756, + "grad_norm": 0.08098774403333664, + "learning_rate": 9.070131527609603e-09, + "loss": 0.0736, + "num_input_tokens_seen": 10878592, + "step": 11330 + }, + { + "epoch": 0.9618974881194841, + "grad_norm": 9.366537094116211, + "learning_rate": 8.872176948046761e-09, + "loss": 0.0627, + "num_input_tokens_seen": 10883264, + "step": 11335 + }, + { + "epoch": 0.9623217922606925, + "grad_norm": 0.4009857475757599, + "learning_rate": 8.676396811329145e-09, + "loss": 0.0327, + "num_input_tokens_seen": 10887552, + "step": 11340 + }, + { + "epoch": 0.9627460964019009, + "grad_norm": 7.252389907836914, + "learning_rate": 8.482791546980506e-09, + "loss": 0.0727, + "num_input_tokens_seen": 10892800, + "step": 11345 + }, + { + "epoch": 0.9631704005431093, + "grad_norm": 0.7880826592445374, + "learning_rate": 8.291361579752631e-09, + "loss": 0.0624, + "num_input_tokens_seen": 10897280, + "step": 11350 + }, + { + "epoch": 0.9635947046843177, + "grad_norm": 9.33914566040039, + "learning_rate": 8.102107329625351e-09, + "loss": 0.0544, + "num_input_tokens_seen": 10902144, + "step": 11355 + }, + { + "epoch": 0.9640190088255262, + "grad_norm": 15.225980758666992, + "learning_rate": 7.91502921180487e-09, + "loss": 0.0805, + "num_input_tokens_seen": 10907264, + "step": 11360 + }, + { + "epoch": 0.9644433129667346, + "grad_norm": 0.42005324363708496, + "learning_rate": 7.730127636723538e-09, + "loss": 0.0146, + "num_input_tokens_seen": 10911808, + "step": 11365 + }, + { + "epoch": 0.964867617107943, + "grad_norm": 18.510848999023438, + "learning_rate": 7.547403010037978e-09, + "loss": 0.0853, + "num_input_tokens_seen": 10916544, + "step": 11370 + }, + { + "epoch": 0.9652919212491514, + "grad_norm": 23.744672775268555, + "learning_rate": 7.366855732629407e-09, + "loss": 0.0814, + "num_input_tokens_seen": 10921024, + "step": 11375 + }, + { + "epoch": 0.9657162253903598, + "grad_norm": 7.666995525360107, + "learning_rate": 7.1884862006017514e-09, + "loss": 0.0551, + "num_input_tokens_seen": 10926016, + "step": 11380 + }, + { + "epoch": 0.9661405295315683, + "grad_norm": 6.401066780090332, + "learning_rate": 7.012294805281205e-09, + "loss": 0.0426, + "num_input_tokens_seen": 10931200, + "step": 11385 + }, + { + "epoch": 0.9665648336727767, + "grad_norm": 8.19086742401123, + "learning_rate": 6.838281933215562e-09, + "loss": 0.0681, + "num_input_tokens_seen": 10936384, + "step": 11390 + }, + { + "epoch": 0.9669891378139851, + "grad_norm": 27.355649948120117, + "learning_rate": 6.6664479661729944e-09, + "loss": 0.0278, + "num_input_tokens_seen": 10941184, + "step": 11395 + }, + { + "epoch": 0.9674134419551935, + "grad_norm": 8.20887565612793, + "learning_rate": 6.496793281141055e-09, + "loss": 0.0654, + "num_input_tokens_seen": 10945408, + "step": 11400 + }, + { + "epoch": 0.9678377460964019, + "grad_norm": 22.68846893310547, + "learning_rate": 6.329318250326898e-09, + "loss": 0.0877, + "num_input_tokens_seen": 10950144, + "step": 11405 + }, + { + "epoch": 0.9682620502376104, + "grad_norm": 5.500332832336426, + "learning_rate": 6.164023241154837e-09, + "loss": 0.1094, + "num_input_tokens_seen": 10955264, + "step": 11410 + }, + { + "epoch": 0.9686863543788188, + "grad_norm": 4.054877758026123, + "learning_rate": 6.000908616267009e-09, + "loss": 0.0078, + "num_input_tokens_seen": 10960256, + "step": 11415 + }, + { + "epoch": 0.9691106585200272, + "grad_norm": 24.178909301757812, + "learning_rate": 5.839974733522046e-09, + "loss": 0.0628, + "num_input_tokens_seen": 10964672, + "step": 11420 + }, + { + "epoch": 0.9695349626612356, + "grad_norm": 0.5533793568611145, + "learning_rate": 5.68122194599363e-09, + "loss": 0.0369, + "num_input_tokens_seen": 10969728, + "step": 11425 + }, + { + "epoch": 0.969959266802444, + "grad_norm": 0.2897357940673828, + "learning_rate": 5.5246506019709374e-09, + "loss": 0.0914, + "num_input_tokens_seen": 10974720, + "step": 11430 + }, + { + "epoch": 0.9703835709436525, + "grad_norm": 4.2502264976501465, + "learning_rate": 5.370261044956969e-09, + "loss": 0.0407, + "num_input_tokens_seen": 10979008, + "step": 11435 + }, + { + "epoch": 0.9708078750848609, + "grad_norm": 4.7640604972839355, + "learning_rate": 5.218053613668116e-09, + "loss": 0.0368, + "num_input_tokens_seen": 10984128, + "step": 11440 + }, + { + "epoch": 0.9712321792260692, + "grad_norm": 1.1905510425567627, + "learning_rate": 5.068028642033595e-09, + "loss": 0.0629, + "num_input_tokens_seen": 10988480, + "step": 11445 + }, + { + "epoch": 0.9716564833672776, + "grad_norm": 37.74256134033203, + "learning_rate": 4.92018645919412e-09, + "loss": 0.0341, + "num_input_tokens_seen": 10992896, + "step": 11450 + }, + { + "epoch": 0.972080787508486, + "grad_norm": 24.807628631591797, + "learning_rate": 4.774527389501681e-09, + "loss": 0.078, + "num_input_tokens_seen": 10997696, + "step": 11455 + }, + { + "epoch": 0.9725050916496945, + "grad_norm": 1.42875337600708, + "learning_rate": 4.63105175251921e-09, + "loss": 0.0699, + "num_input_tokens_seen": 11002944, + "step": 11460 + }, + { + "epoch": 0.9729293957909029, + "grad_norm": 20.29813003540039, + "learning_rate": 4.489759863018583e-09, + "loss": 0.085, + "num_input_tokens_seen": 11008384, + "step": 11465 + }, + { + "epoch": 0.9733536999321113, + "grad_norm": 37.211143493652344, + "learning_rate": 4.350652030981394e-09, + "loss": 0.0997, + "num_input_tokens_seen": 11012800, + "step": 11470 + }, + { + "epoch": 0.9737780040733197, + "grad_norm": 14.482527732849121, + "learning_rate": 4.213728561597296e-09, + "loss": 0.1097, + "num_input_tokens_seen": 11017216, + "step": 11475 + }, + { + "epoch": 0.9742023082145281, + "grad_norm": 5.938981533050537, + "learning_rate": 4.0789897552637735e-09, + "loss": 0.0678, + "num_input_tokens_seen": 11021504, + "step": 11480 + }, + { + "epoch": 0.9746266123557366, + "grad_norm": 12.1581392288208, + "learning_rate": 3.946435907585255e-09, + "loss": 0.0978, + "num_input_tokens_seen": 11026816, + "step": 11485 + }, + { + "epoch": 0.975050916496945, + "grad_norm": 9.135724067687988, + "learning_rate": 3.816067309372673e-09, + "loss": 0.0679, + "num_input_tokens_seen": 11032448, + "step": 11490 + }, + { + "epoch": 0.9754752206381534, + "grad_norm": 13.876908302307129, + "learning_rate": 3.68788424664257e-09, + "loss": 0.1102, + "num_input_tokens_seen": 11037056, + "step": 11495 + }, + { + "epoch": 0.9758995247793618, + "grad_norm": 0.29706308245658875, + "learning_rate": 3.561887000616881e-09, + "loss": 0.0901, + "num_input_tokens_seen": 11041152, + "step": 11500 + }, + { + "epoch": 0.9763238289205702, + "grad_norm": 19.376083374023438, + "learning_rate": 3.438075847721933e-09, + "loss": 0.0358, + "num_input_tokens_seen": 11045504, + "step": 11505 + }, + { + "epoch": 0.9767481330617787, + "grad_norm": 7.894840240478516, + "learning_rate": 3.316451059587777e-09, + "loss": 0.0784, + "num_input_tokens_seen": 11049792, + "step": 11510 + }, + { + "epoch": 0.9771724372029871, + "grad_norm": 0.4556736946105957, + "learning_rate": 3.1970129030481907e-09, + "loss": 0.0353, + "num_input_tokens_seen": 11053952, + "step": 11515 + }, + { + "epoch": 0.9775967413441955, + "grad_norm": 0.27915769815444946, + "learning_rate": 3.0797616401392335e-09, + "loss": 0.0642, + "num_input_tokens_seen": 11058240, + "step": 11520 + }, + { + "epoch": 0.9780210454854039, + "grad_norm": 1.1327648162841797, + "learning_rate": 2.964697528099469e-09, + "loss": 0.0538, + "num_input_tokens_seen": 11063488, + "step": 11525 + }, + { + "epoch": 0.9784453496266123, + "grad_norm": 0.9393741488456726, + "learning_rate": 2.8518208193689664e-09, + "loss": 0.0462, + "num_input_tokens_seen": 11068096, + "step": 11530 + }, + { + "epoch": 0.9788696537678208, + "grad_norm": 16.2606258392334, + "learning_rate": 2.741131761588522e-09, + "loss": 0.1083, + "num_input_tokens_seen": 11073728, + "step": 11535 + }, + { + "epoch": 0.9792939579090292, + "grad_norm": 8.848075866699219, + "learning_rate": 2.632630597600105e-09, + "loss": 0.0648, + "num_input_tokens_seen": 11078144, + "step": 11540 + }, + { + "epoch": 0.9797182620502376, + "grad_norm": 5.798458576202393, + "learning_rate": 2.526317565444969e-09, + "loss": 0.0129, + "num_input_tokens_seen": 11083328, + "step": 11545 + }, + { + "epoch": 0.980142566191446, + "grad_norm": 29.144174575805664, + "learning_rate": 2.422192898364095e-09, + "loss": 0.089, + "num_input_tokens_seen": 11088064, + "step": 11550 + }, + { + "epoch": 0.9805668703326544, + "grad_norm": 11.775348663330078, + "learning_rate": 2.3202568247974175e-09, + "loss": 0.0186, + "num_input_tokens_seen": 11092736, + "step": 11555 + }, + { + "epoch": 0.9809911744738629, + "grad_norm": 18.84742546081543, + "learning_rate": 2.2205095683833774e-09, + "loss": 0.0534, + "num_input_tokens_seen": 11098176, + "step": 11560 + }, + { + "epoch": 0.9814154786150713, + "grad_norm": 35.98866653442383, + "learning_rate": 2.122951347958035e-09, + "loss": 0.0566, + "num_input_tokens_seen": 11102912, + "step": 11565 + }, + { + "epoch": 0.9818397827562797, + "grad_norm": 7.811520099639893, + "learning_rate": 2.0275823775551817e-09, + "loss": 0.0898, + "num_input_tokens_seen": 11107392, + "step": 11570 + }, + { + "epoch": 0.9822640868974881, + "grad_norm": 44.668968200683594, + "learning_rate": 1.934402866405671e-09, + "loss": 0.0508, + "num_input_tokens_seen": 11112256, + "step": 11575 + }, + { + "epoch": 0.9826883910386965, + "grad_norm": 11.887144088745117, + "learning_rate": 1.843413018936535e-09, + "loss": 0.0302, + "num_input_tokens_seen": 11117376, + "step": 11580 + }, + { + "epoch": 0.983112695179905, + "grad_norm": 16.22102165222168, + "learning_rate": 1.7546130347712018e-09, + "loss": 0.105, + "num_input_tokens_seen": 11122624, + "step": 11585 + }, + { + "epoch": 0.9835369993211134, + "grad_norm": 1.5749400854110718, + "learning_rate": 1.6680031087286106e-09, + "loss": 0.0155, + "num_input_tokens_seen": 11127680, + "step": 11590 + }, + { + "epoch": 0.9839613034623218, + "grad_norm": 16.012849807739258, + "learning_rate": 1.5835834308228768e-09, + "loss": 0.0984, + "num_input_tokens_seen": 11132672, + "step": 11595 + }, + { + "epoch": 0.9843856076035302, + "grad_norm": 23.44938850402832, + "learning_rate": 1.5013541862630708e-09, + "loss": 0.091, + "num_input_tokens_seen": 11137408, + "step": 11600 + }, + { + "epoch": 0.9848099117447386, + "grad_norm": 28.59117889404297, + "learning_rate": 1.4213155554525513e-09, + "loss": 0.0188, + "num_input_tokens_seen": 11142016, + "step": 11605 + }, + { + "epoch": 0.9852342158859471, + "grad_norm": 23.170242309570312, + "learning_rate": 1.343467713988522e-09, + "loss": 0.0406, + "num_input_tokens_seen": 11146624, + "step": 11610 + }, + { + "epoch": 0.9856585200271555, + "grad_norm": 12.793461799621582, + "learning_rate": 1.2678108326621418e-09, + "loss": 0.135, + "num_input_tokens_seen": 11151680, + "step": 11615 + }, + { + "epoch": 0.9860828241683639, + "grad_norm": 0.7046257257461548, + "learning_rate": 1.1943450774574148e-09, + "loss": 0.0567, + "num_input_tokens_seen": 11156096, + "step": 11620 + }, + { + "epoch": 0.9865071283095723, + "grad_norm": 10.034231185913086, + "learning_rate": 1.1230706095516352e-09, + "loss": 0.0577, + "num_input_tokens_seen": 11160256, + "step": 11625 + }, + { + "epoch": 0.9869314324507807, + "grad_norm": 0.14725586771965027, + "learning_rate": 1.0539875853142754e-09, + "loss": 0.0315, + "num_input_tokens_seen": 11165568, + "step": 11630 + }, + { + "epoch": 0.9873557365919892, + "grad_norm": 6.678691864013672, + "learning_rate": 9.8709615630721e-10, + "loss": 0.071, + "num_input_tokens_seen": 11170432, + "step": 11635 + }, + { + "epoch": 0.9877800407331976, + "grad_norm": 0.4217716157436371, + "learning_rate": 9.22396469283937e-10, + "loss": 0.0726, + "num_input_tokens_seen": 11175104, + "step": 11640 + }, + { + "epoch": 0.988204344874406, + "grad_norm": 0.1474541574716568, + "learning_rate": 8.598886661895787e-10, + "loss": 0.0355, + "num_input_tokens_seen": 11179584, + "step": 11645 + }, + { + "epoch": 0.9886286490156144, + "grad_norm": 22.87831687927246, + "learning_rate": 7.995728841605487e-10, + "loss": 0.0425, + "num_input_tokens_seen": 11184960, + "step": 11650 + }, + { + "epoch": 0.9890529531568228, + "grad_norm": 3.589052438735962, + "learning_rate": 7.41449255524107e-10, + "loss": 0.0954, + "num_input_tokens_seen": 11190464, + "step": 11655 + }, + { + "epoch": 0.9894772572980313, + "grad_norm": 9.759099006652832, + "learning_rate": 6.855179077981388e-10, + "loss": 0.0764, + "num_input_tokens_seen": 11195392, + "step": 11660 + }, + { + "epoch": 0.9899015614392397, + "grad_norm": 0.7287328839302063, + "learning_rate": 6.3177896369071e-10, + "loss": 0.0392, + "num_input_tokens_seen": 11200128, + "step": 11665 + }, + { + "epoch": 0.9903258655804481, + "grad_norm": 0.5317105054855347, + "learning_rate": 5.802325411001785e-10, + "loss": 0.0484, + "num_input_tokens_seen": 11204800, + "step": 11670 + }, + { + "epoch": 0.9907501697216565, + "grad_norm": 12.197322845458984, + "learning_rate": 5.308787531147496e-10, + "loss": 0.0924, + "num_input_tokens_seen": 11209472, + "step": 11675 + }, + { + "epoch": 0.991174473862865, + "grad_norm": 1.3452421426773071, + "learning_rate": 4.837177080119214e-10, + "loss": 0.0679, + "num_input_tokens_seen": 11214016, + "step": 11680 + }, + { + "epoch": 0.9915987780040734, + "grad_norm": 10.827523231506348, + "learning_rate": 4.387495092587068e-10, + "loss": 0.072, + "num_input_tokens_seen": 11218752, + "step": 11685 + }, + { + "epoch": 0.9920230821452818, + "grad_norm": 14.037049293518066, + "learning_rate": 3.959742555111889e-10, + "loss": 0.1148, + "num_input_tokens_seen": 11223552, + "step": 11690 + }, + { + "epoch": 0.9924473862864902, + "grad_norm": 49.733394622802734, + "learning_rate": 3.553920406144106e-10, + "loss": 0.0671, + "num_input_tokens_seen": 11228032, + "step": 11695 + }, + { + "epoch": 0.9928716904276986, + "grad_norm": 1.3481730222702026, + "learning_rate": 3.1700295360181927e-10, + "loss": 0.0562, + "num_input_tokens_seen": 11232448, + "step": 11700 + }, + { + "epoch": 0.993295994568907, + "grad_norm": 27.198644638061523, + "learning_rate": 2.808070786955996e-10, + "loss": 0.0858, + "num_input_tokens_seen": 11237440, + "step": 11705 + }, + { + "epoch": 0.9937202987101154, + "grad_norm": 1.2682602405548096, + "learning_rate": 2.4680449530622984e-10, + "loss": 0.0601, + "num_input_tokens_seen": 11242048, + "step": 11710 + }, + { + "epoch": 0.9941446028513238, + "grad_norm": 7.826528072357178, + "learning_rate": 2.1499527803214846e-10, + "loss": 0.086, + "num_input_tokens_seen": 11246400, + "step": 11715 + }, + { + "epoch": 0.9945689069925322, + "grad_norm": 9.909138679504395, + "learning_rate": 1.8537949665997642e-10, + "loss": 0.0707, + "num_input_tokens_seen": 11251456, + "step": 11720 + }, + { + "epoch": 0.9949932111337406, + "grad_norm": 0.5570287704467773, + "learning_rate": 1.5795721616373992e-10, + "loss": 0.0287, + "num_input_tokens_seen": 11256128, + "step": 11725 + }, + { + "epoch": 0.995417515274949, + "grad_norm": 13.057730674743652, + "learning_rate": 1.3272849670564746e-10, + "loss": 0.0433, + "num_input_tokens_seen": 11260736, + "step": 11730 + }, + { + "epoch": 0.9958418194161575, + "grad_norm": 9.216782569885254, + "learning_rate": 1.0969339363497975e-10, + "loss": 0.0669, + "num_input_tokens_seen": 11264896, + "step": 11735 + }, + { + "epoch": 0.9962661235573659, + "grad_norm": 8.39694595336914, + "learning_rate": 8.885195748875584e-11, + "loss": 0.0291, + "num_input_tokens_seen": 11270400, + "step": 11740 + }, + { + "epoch": 0.9966904276985743, + "grad_norm": 0.7769232392311096, + "learning_rate": 7.020423399117791e-11, + "loss": 0.0136, + "num_input_tokens_seen": 11275008, + "step": 11745 + }, + { + "epoch": 0.9971147318397827, + "grad_norm": 0.41244271397590637, + "learning_rate": 5.375026405352034e-11, + "loss": 0.0776, + "num_input_tokens_seen": 11279808, + "step": 11750 + }, + { + "epoch": 0.9975390359809911, + "grad_norm": 9.232144355773926, + "learning_rate": 3.949008377424068e-11, + "loss": 0.0341, + "num_input_tokens_seen": 11283904, + "step": 11755 + }, + { + "epoch": 0.9979633401221996, + "grad_norm": 8.324690818786621, + "learning_rate": 2.742372443909069e-11, + "loss": 0.0601, + "num_input_tokens_seen": 11288512, + "step": 11760 + }, + { + "epoch": 0.998387644263408, + "grad_norm": 14.6475191116333, + "learning_rate": 1.7551212520339197e-11, + "loss": 0.1003, + "num_input_tokens_seen": 11293568, + "step": 11765 + }, + { + "epoch": 0.9988119484046164, + "grad_norm": 0.8326796889305115, + "learning_rate": 9.872569677438213e-12, + "loss": 0.1294, + "num_input_tokens_seen": 11298752, + "step": 11770 + }, + { + "epoch": 0.9992362525458248, + "grad_norm": 24.4580020904541, + "learning_rate": 4.387812756578846e-12, + "loss": 0.04, + "num_input_tokens_seen": 11303680, + "step": 11775 + }, + { + "epoch": 0.9996605566870332, + "grad_norm": 0.4780389368534088, + "learning_rate": 1.0969537908023242e-12, + "loss": 0.0059, + "num_input_tokens_seen": 11308288, + "step": 11780 + }, + { + "epoch": 1.0, + "num_input_tokens_seen": 11312256, + "step": 11784, + "total_flos": 6.605086766609203e+16, + "train_loss": 0.0722960903008882, + "train_runtime": 1744.9763, + "train_samples_per_second": 54.023, + "train_steps_per_second": 6.753 + } + ], + "logging_steps": 5, + "max_steps": 11784, + "num_input_tokens_seen": 11312256, + "num_train_epochs": 1, + "save_steps": 590, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6.605086766609203e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..6f3d4c8 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae5a5c7dfc2c02575e042ac44f9ddaa14dbb7c0c73a49c06d3702a5761593d9 +size 6289 diff --git a/training_eval_loss.png b/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c8bafd08314759db9ec955dbf4d80f1c7ed620f8 GIT binary patch literal 47369 zcmeFZWmr{f*fqKU1qBp?mJpB<1Vp660;Hw8yBnlYK@bF_I|LB~3F!twL_k=iba!{x zx#xbr^Zq^O{6E*Vy|&<5b3OBU?ilwNW3CUcWW}%HlH;OKsB020UnrnZn7$|!#^e=j z_|4<)(OLK(zmuq%lcKGOldHaiF-k_?$_0V~JK-u(o2UV*wS+U$RaBoMQ%!hQ=&KIv{CKFO7$ zOHs9Li>NBqo_pwxFfIbWuaRGNJo8y%w`a**_m_k^yP1O0@A}<%c7lqgxX7tIH-F~X zC`6kNmoN`#yJ{=*WGFn)D7g|M*7U=$rOz4*8(*)%7lW!+w)d+{<=Os-kb7sF)8^#U z{K|Q9X@=Y6A@6PLj=tt3&f1gIueXF!KF{bbH0w^kZ>gwVsNaOmMSe}!`^eZIJr#of z&XltAc%)GOxiU}YUyGRqO?MP~w-kHruXNGiii2^FqupM)qYa-6&3>7j{qwT}-4wTZ zve-0~)8c!cEr!GM_~%Cb zMW|TD(~NGr{Z;kYlNfg0&TRP%v+)|f^Yil(^WOIXRPKShySom$rWfaD(#ibMASNoS&U^ zTUd5ZShA>mpG>gKe_AjQ85`U3RVMNE^asW`E^`HCo({t@tD(lBVpB4HC$s-^`-cmu zKBSyRSK!b^bNh?zXT-kBCb!LZB&yfEw*Syle{p^SCy(=yf-mA^y*ljUNBnWO1xok* z)z%#4Tv9HxSKqAFDjn$IE`^f?T)(>Q%v12%H7F7kT(R{uVc+eMVz-|O;o-CzV`nFL_j7W~Q8gwmjug%VHk62e+@}1I+qMxVCgz{k zR%VBbUmtFN2o0St8?nidh-6qDuT4DKStxdxlPt5H2u)4ZjV~xQ?`0TwUwJTGZYx*q zvNl-h$mn~?H}*a~z5o8&h^q4=zku0=1w#%}Hs-hm(%EObB*poO2 zKL3le4IfkzpCh*Kr%#_0!a7=8Y3|-l8(sU3%c7cpkAdO+bxM9EXGOt$om%(PqlHwq zk@d<2DhTeK1+-~r65k6^Q6i7i#qJEZ`FPVS`1nfYWA-hSC6)uBrYUY;r7h~vWVS6(YRQ=4Imh@~Z6}pA*8eO@Nke`+=w|VEn{mbNl8$>FXv-$?{!Dfq zZ%wqVQ=dI&uRksNQ{q=q!TUSpP79n|C;XfGyrs6!+1{6|tlNZyzg7zB^Ra!0M@L6# z1;VI=60)CcIA`JP)`|&(BY;tE;O~V)k(h#vKr=`XgYM<}p8%a%eyNmKF-_<_5%*1egSJ&`(K#V6Ra>iwE zZyzVk0LNP~9mJwm?_FCZ_w*^o-g3X;>EV`WXKH->hi|3Z2e`iNtYRli$2~^U|eD z9G-`Ek0%}OuPhuiF68OfdqX(x9L|J2vKi%VY-$?2ex~i%@fhmFZ{rcz>xYRQS{`eq zf;)d7{rdF_#o=}A#6=!5ymIpQ5jw5g?C@a1r*iL~=GAN0GHoa7mfeI;Rv$r?+T7X6 zfS?_9Xn&BzYoGq|j-*F&lB07_b@YN3iN+5D(MpdCm$l($NKIL&p%2MJAeJF^UG3IK z`C`wK&>e5Ea4OcyMmXZ7U%Ys+*{3M?nF*IsHfi$x9h%;nyMLkEdfaZD{D|ue#iop7Mk!yFjzjqPhiORp8@+UFt%f1OE9pdhj&-u|pM4dRh3$r6iQBl!tyOq%m zN=49F)8Lq;2%NYtr_cMO<^EjpSFc`$uL(oVe&zG&=hnapoF5tvAyhUCi$YET@(#Kw z9S6rJ=aqk3Q0OD$<6A8YYE|;zM~TT3=GULQ|0~c%`(3>q7!qO(=_8#goUE87yY>5y zEou`ja$z^-JkVX~Xqw`&&gbdn#pks6sXcbjW23euidnG&UgkFW2ImhNZ90&zZ8csK zuW4IbI>oAp2(aM{F5|gpzTnUi(6*Y%OJ_ayl<7kw3Qh z^_)QH(vuf2{6-vOGHc`G<7=R4e1zIlyjP3b$Esa-;BM?D8!()OU#btQ zsNfJ#8TDn$k2<1-KHlL84HG^NOth)`pq#5>zt^wYTc9iQRXV;A?xYzKO%{3q%4{H? zl9`!V3la$l-a*5#o40N?0&LO3{8J<;C1v>M6V0gSP6x7PB%5J>NZ$Q?buY~m&ha5B zzToL*Q(T7$J~r0Mzr2}Y%K~~fwovHD>HQW*iy2`R+ifgJQ(J__T{*&K`H%JV^z`-W zj}~61#T$@K&kF`J`Yj}+q-NqxQ>NfljG#ur%RXkMdRO&1%BBd$cXhpjBG(E_iW6{U zgWh1U)RVa^Ja=@OBH$VdCk`=R=|5sq9rEYTOURkS*+@CHTKCd++SD&!?)}TtR5dzo zYKvz15FAX$$r*8zPV5HUVc$1JG9h=a=kM{EZ;>KJvBaW3C|1W^;*4&inM8N?TUN?6 zZ2o+wz{c#~_(y*edBVKUj$skk_oRwh3lsq;0d)G-{umDzH;&&qr+pzyu)s}0;g*2Q z>SX0Y3d`Q(s;-2`mb?zLS7utm81LT?g2)eWo$!i9{x5QITKw_}zeOJt6t0$b_WHj; zL@Y?AWUCfXZPe^sSJiQ&FR0nZP4zkFBM>>|0Hl)v74XSWom5++!H-Yz@e(>ZRGQ^B zb9)22jL?5$9%#m_*bC6p)8C|`()nyCwF3nsz1!^dZnwCEgoOV3XeB;5S0Dh97?w{p zUGR|~RdVN`jWE#By;IfoihgMLyU=aNq+#pyS4)c_cVfJt8zA)M$-TkeAZf%ClrkraLvbLO7+^0uj(-P!Bp$Mr+|rt75(?G zw6lk_^hZv3p#oYxW?v&urP+$R+Db}FN|Bb1E?Qu{;`vya`<}(+S9KTX9=NcGM_0VV{hP&&1d@jF&W|e32D6vD*%Idp9y|-60EEH~PWuQP6 zl?fo0ZNt8LEFbbk@Z(h1l@Je)s`MZm;7`lriti4(EidiCuG?|0@se-C_36&Db@W1TJ|f65q~G0Ab>$ z)$9*jvYm7!@hM_Nb!UiITZy==#-;ZzMUm)wb{n6pk5v;8j>+f_fZP4nD!IzYN+7+WVs*XM z$5LWqz5;@(tPyED)>a@l9Z4$B*RH+~1@`{V7RfQBC{c)5hTvYG;+*jQ`N@VVs{DXY zL>GFb(~*tKc}(rmd}3xweFfSI-lCP#0-*&b_in@7p8fsx zN0a$7vLqKK`I_aj7|^f25)K_S9yr)rzal)?Sb*+SQCqiDodq~2sNcf55eS`=AP_vq1GPM?1EIpk0 zlZWZ_%SR++WST8XdrB=;VgAWqcb$t?y+uW}zC9qJp+Ro>@4G(q$K^8P_L%N`ZGqu3 z>vov=`bVNwb*`pe$BE9gHYTE^%yUu1x`c!C$YoW{Wp(iL`JMuWset$4Q~+ll7b^rk z0mMxCvF*E`4G=vtl9IS1W!8>EuA>$9Y#UopVI5$$x2fGz=}Z>L61D{-rsdqPdHgnGq80JSQr@&r?1^q8RS z#VgTxVEFsvRP{z39Se)p7?iHmUclziV!qmp6q^!Uy?S+yoV<1+#Y5}7*7L|=)kOfJ zsJQ)v8#YZPPvhIhNKl<@s^|7q+xjzZ81nC9;avaTRjHD%)!Og|t9`84Y3lsw=mDW$U=BmGN!9|QZ5+1W^mVNYC zuf(a<*>GueH&;+lr~s%9HGwQlaQkIt7<(|(@+y4CvDY&pTxMOqaPlgLNzfQwm+0$Y z&O!Ap3>$G`6heg?mC3rF8!Un~w*`gkbGO+j+Y8o}wxyt&)QzeYHs`3WrN$X(y1cII z6c04Z?%lihMS7#Uq2Y;5^&0oy1opc(klxib6Ki)yOZi3=IG zMKL2o0@4%a_-(ePn~@0~new2Xwb5C3&(F=HN$*Li=I!#LdZo^g}o|vIt4y_{5T7^WGa}w&h99} z8+!0neEcsS2kQvTIN7MLmu^aE38T&eWF|_CsWtk3n@P$8L-_pAkl%IvW4GBoR@RU3 zIWQhhZ3~_p9&$L$zC?1uIe!q)Au|(`-BlC;mEa%15zhf#w&wu|2838CXw4D~NPzgL z$6lwPq=fzKEK)l`+sP~V z?B46+J{MkpO&tBl51^#lAC>R4WI$;aEgM$*Xf(XD25<&P0( zWzGDnvWU9v3MZg(Ac*}40SnFp+AR<{+RN0AByFv&Q&lVZkTJm! z_5X}JzP&tm z?p<;C=oM7}0W||Ze}enovhw0#P+jU+DIeT6G+&M6dTJ^vggJwbW$Z2sz(r~HfR2l+ zZakd=V4HpA2LnNb6GoE zEk;)WHA%*8McSJsmjz&V(TN{+qGCtDLsD-9L)CF!9+3SG|&py0akPXnpHPikIT|JRV~;NIs+6y zB%qo@Yt6jw#%fco@V@d%CZ*;$+SXnOwKh`D2%`jA_~av$j%`?_J_l(9^hxb~P3ST2 za0u8`JGoua^2-Yh8zHkhyX#M-w2u7Jq zk&0ksoUt0M2w&>Wx_#%46bzCs)yY5K;Z&coIUgLhZ+kPZxJs1UjA@;xCG$BlpaplN zf2`Fb10k@5hcN1CyAK9KWg&fNa0&L+0A@=dOS7Kb@HiOKR#BTmp4>H6AVh3tU!b8kn4RU=+ht(BHIi`$GIVS5 z9YIg7D*NNDlLzIa3SZTx6zcHUMUAyxk9U`lafK8gd#`pu>y`8LudA%Yc#zC&EvImw zT0nOp)D0lxTh!EOdfTl^*fTfaZIK;-$%)H)Scx(M)nbwCxX}3n8in)QhYiqFh5iNN zHUIjx2~{&PI(n`LZ;>L$A)o0|OIM$=s&+H-2k5`oOPR3%-u0Ew5PJxK zY_EM0ptI5Mk7UhkQ=uO|=)=@JxaJb8^TgGfDXeGsWG+^>r^d|@INtSbtmS;mdYERA zPLL@T+f#0<3-BvT_Mfvwclu>uae08^Q2r+J$JmXneH!oxQ_!@7km)h@LR)Fia8I$R z973`}ZT&Od98C7e1YatKm3g4(00NgpLdMQsAZR9uf|7zlE5AjD(rtSO5R@7e8ZiR{ z+F%lP69D6EKJr;vS;bJ>q&oKo(nawIML(+-n^+WB+n+og(qXI~>x_G7_)<=80iu|V zldM*70jQwM5R~k$*V`e+{^e`87o&w&iW*vmP8KDM&)ANDI)QXmycnx-+qs2a87*h6 z<%7tA=q++t(?Bxj_+UfVC>4G|EHIW`!{$vzBsv9m=d?aLG&4}8mL;3K*!S(Wobb^- zm^8k~r3$lZReU1Rd5ytz@@mx|D8;esUOIx9{jgq@0v#C(3yTl6zRjhdD-Ik*hfiUL zIe{6ArOJnLfJ@Jw&$O_y197cAjxexWufufQl6mdlHV2bbcf4>onv3ntlp=X7ipT5j z3cP_d5YFnA4w)?iPfLL5X|&?r0W4N*(Vs)>(=9+ZgGZ+y+}sFpyw?4m_Zk`JrNoZk zr$;;5Wk_ZKq<5x&1?venO!lc12*78T&C}x`HJhjt3QCXSZLxyX19(~a_eZft9x_Wq zzb@A`%4C-E7!w*mC`71xqM`j3rK124JRxyCVUAPFQ;AK{wAC;J3jS=lxw$QC zr4*IqeV0uY&bxm5t>NQoAS9o`sFJ|*mn=&AH1ktTjLIx~eo$GHMd>z_tn7<2HP#yF z$-r;iAR$qm{s9at)X=w(uC;$Yk=jW#DMfcgrcOW-WWc-(tPc$Bf$dt9ys6UVHIABW z+y=j{IPp_LDY6Pbc$dz!yig@r&K~i!o zP(=U@fDZ7<-SJ!xFx5Znjez2X-A0;okO~my{T@>ijKfew=b#vy{0h8jGcp7@61#E$ zwFxuq-&CZWuzh^SF`R5fTwM0K$?I(mSs;-_=a*fU`>y7xtZXL}-MH~;{cg5sF!Ab0 zIeM=400w1}_}^Rr7zjn}1#+CHEY-B`1bK+4e(B&(&i`{_0wlGL6xl*2G|jG+Jva?? z=7aIWr04?cb18x#gV?0mR$^{Q6-!1Dd z0|20av6BU%-hsb}iXt5MKCy=G`Q<#?6+ZU{5fOr>0zZB<+8D3>n<|p}`FTK9$A=j% z`{@^m&|*`6=2B)oB8Id~U@VWIH336p^Pz<}i*>Dby=q0R&XcbxfWQ>;okGYx${&xU zr_RfxTtpJ$+W>aqZxUZ5FbCfp36sxHCN8$2fZ4&S3cb(Vp~}ANFfp8{^YYwkCi$*Y zn*j4%%r|S}eh|4Bq~jWNea=LUjP3z5LetvX>Isc_Sr}pPocMLz=7?b0-rU=pYhx)e z0*F}yeHEWtI0@*HPtnoMK%t4-+1XVFFs5f@xWRnF>#|Ckr&(?S)p>QaG7=ds`u?tI z|k0`twaO3u>gE|F_ zzw!dNic&D~1B6t9QictOU^rN)f9djNd(a)~&W|k_WRw0h5r}Bb511dUj|pC!?(D7% zguyu=#CYt=n9GQ@vG4$7#Za|tLBed6;M;famIex_k=fz$<;z@FgP#Chw6bI{&z-SV z4g$~ryU`yH0T3ODJd6Oj`g2u_O*+WWcTz91@md?UGPvO;5O&~k%P>$uZV(Gg{kbGS z42!1h0L+L@;5`I2z~Y$7R9L_?OhZ~)I{YXL+G_^Pu^*{LQf6n3<~x(6AOI^@@~b7i zYKTg0#`qaz5`M|>jJ<%C|3f1WDI9PLfC0X(laP?K009N(>=+mt`V|WNF7N}t94COj z2U0KK@bER9F+5R0EbCf)0$4Wl~E^4kz zotd8gkC+1T=LxLln{Itw`^tJJ0zIM22elfd!wFaomwW)g6JJ=PP-cQ|s|tZGX|3wuP{avGOD`y>1q6a5^s!O412#+Y zxT2B)n{pQR+T94C%`y&;kMAS=(>6pM;YrQ#nhT;?+4RoysG}@jfQ)kh(8L1VrSbu@9C=)D!HnSkbP3OT4<=zO_WZQrSf(n|%EG`=P4usL zjhV*XdqLB$*+Fv9bpsYGm_kv5JZ`#&+7o_|gfbHC>x-hld-sX?NLii(WG>6e!G`Nl zLCHYtQ4*mm3%U|cvH(Q1%(S$$mmcjA4AZl%k!_n(P0^n|-Do!AWab%jigY;MHNR{p zk*qlCh-hGNy@s_?cF(o86K#hdP)nsMm%b+Up$(oq@kRPO3vgu2+k?p)&3M!cP@yF~ zJK0Zb2(!Knbc`*mT%2!QL;-kW+FKb=1;h{q<@G+yr0jeAK??im)#iewa60iQgn2aY zPWMN)3x*$NBon;Pk4$4{po3*0F|oC?<0v7jFl;&rEm1Ln$2K3pDLps03bc*Vk%2CY z()$QG)B~eovmOT%%tCEI66adMPW6Ei4xfNU->vcZ&Z>bJianHdxpbWNyG$7rW!Cvude|5-FVj?3&63g+&^f(s1 zm-lGq0`rIBJdz16f0A42|42FXP>>YH=oBI4u_z4$kQ+leL`WG6NqoD$n`_+}I8XxR z^4LNarH#&@7l54YS7GG5b~ajKu856K`s(vCWodXHCh@bVA*6;w;El;%LlCir^CnKH z2pb!|r+;?q?3q@%+XIxejLd_wRa{<%V0S}>jLjODSfhA94Crwv@!}}E{qe|IW(6@7 ztQS8FnsLWrB%x)wRPDN<^LhLAXs#9X^zXm@lw58o>B3oy>qZjO! z>Obhn3%}SE-yzeji@yMv5-au{IzO-$IWMbPy6S} z7YzVdD&^u5Ei{TMq&R$oDif&*!2e3^6sf7t|iXF4W``cI)f=Twm? zrw?E9#Uo~&gkYg#X8ypYUBzKLUUREg5+R$iQrv@pdg#tkW z-j8Tq;M5R@>elDAy|d%C_&GqW$ml9)&rtOdqye;gd!LD^M8$1J*&8q`{1j({+X!rn z0%>LDcL+v8-BOL;`y1&T+v2cO?jR`pb%)zVmAXgxy_P=Azn%57T z-d{5W#Q_Geo1*fh&MMt zgd2=B+h0Gy#=((7F~M5;IGo*%RZ7(^k7kGyR5`EAGs%85w9$seAu39k(7syunK*Dx zva;7*4)PGTgAqhroodZCDL3A{!JB1^ulK{`m18Gjr?OLy)WJ*EjHIP5#C`1=5x{*dNp||42qT z1O%rv>2ZL7r!c?LneZ4dvh~Zj5k??lX0u4Qc!<;03)FCXEW2r=-_^ccRf1&M7K9rD zJ)kn`<_1(d4j_2;CORd)An)K7l`quDqyLvc^f*8^tEr_WOBTJwA`^|{3yuf?5fPD* z+GUK4jJJu1=Jr9y&sI1H3l27fCS+t}r1wKDFfcF;V!~-{SXn5b*?>k!Xe^VY@XN~a z@i8lby{;XP{Bf9@MUK&Lq`1%vHwQZO=u|iSu1p~z} zVgly5+5n1+#LJg<9#pWyrD0<>3xIgwRsd>3YwodBYGMw0+iWle@CU)fKn(W}8KLRF zZKH1Jfx`t+jREQQ)OxrgNDCqhI8GeB)xjSK+ym%>vDtt{&;>dXLg&Ejst!4YsF1*! zDx)i{hB#?yY0Y}RT!m6L{|1Lj4{izJT7Bs_l7Q#}sE&9o5N;hTObD0um8bzZ3E|-8 zbd!|LgM)(Q`6~p_Q9u}WJUz5$B@S*+e>L^$0cHab4fgYY$x^(J%_AZr5K$j!uH|Q8 zetwr?7i+;nfkZT7PC@Jwa9>*;JQH0%48@&OfBkxaO#coJoY%;?kb6Od7`6c~FkeA) z0s}=4{ml{x$0||xKnrR&87Y+q+o*p^5#ldlWo4(Qo}gt(0+Rg_n;pZvR2n_8VetuD z45?Sqh-?imxH7F4d{T}E<4!N-(&w*LN=czfz`q{|fjTaoB3P-_0tOrkAjdO_KjhG2 zq(QvOx5Ghnm>>p*hK2?QO3eZtA@)*9C3-SU;>QX7CZ+?ePMyYyxmC>Y+HZeN9)aM2 zpieLn9q+HT!~Ai%0cMxJy$r$2y(#>PfO`y~U-G9l(# z&~*$R>)|L+xIaKge+RBT$j9jjf2>Jhn=*kt25(I-%$|rm3MpH2qD>l^m~6x8M7ghl_|gkf4FZIrQMDi@FbLrOlXq|5s+E}C1hWJ? z;#b+IJ)p9!*%GUy&e)_vHb^C<`S+VyR#cAz>)E;{x(1jGBV?VS?8Q6n?=XQcD=fGmZ}xc<2F##Ai{t7@+EsDhE2N-nMz<%YVGX&ZBhDf$cd#t z`jT(DSX&$Wq@5%;3ZstS;V*ZmG15OOxPEu%a8=?tU; zDbQxXXs`nJ46rkXRlRX_sJOO=SpFd=&a1wcaO`@jX>VFz2486j;#R*x>$iD{I=M{J zW)8Lz59}Ub^9rcS1J)jJXD~pt*d+?+8eqY2p1R@-qcgO@Ig9pYZky4U($e?{4FFgt z6Z8%W5%1H8PG&(lca&&IWQyg~;E)!oitGAMt5OU*?iPjU4uBhyw{P5NK%!xHsrMUL zB0!Cp0#cuzhbQ*xZH7(gEwM|2&=E}jeOE8B8j66<#A!P&*wfR~Wp-aUwF*85l%YbS z)?3gwe}lK;5P%+wr|=ZzEmROWHQk$M@+qRPj#UEXg3pva|A#AdO!(AyB8B zz}sD%AJiKe8+Su^B4nGHnOQt~3oI=WQBl7Je;9TzIY}q+h65IGstWQ6(*fQUF{pwC zQ=dB#Or%V5LVJV=ckGXbd$j;K^o?HasM93zXerf*i=4FQ8wP_&i(u5og|LiDUG%v% ztr_XGYI7dpvpzxvyus)(n=IHcgsVm-bLFulDcVwBDFz`p?W2{4nQMy=V|glls18kb_61$IS{$ZJTkB2cVXjZHX@f$B|-T2 z8V2_A?hxytCX-k%-Fz0vX*pW)qg~QK-t{FxTJ}xZFY^v;BX7f0o(GsIhMzn?yS7-O z_hY#AZ>)PH)WsJP_fUlWDL|yPO>oOUwNX8?C?$V(hUbA{OsWQDlPMfxdRV}PE%sv# z<}UxWHFCmw4HoHS9Hy(E!M7e5@e;+9^SD}DUv+ATSb zG4}DXUZ3R;mQOJ(cux_8GL3vx-dU^qtorJu!(}tFn`&PT``mo_IPqEiZuI{I-mViY z+JfHWuyf#?n%>&^M+6~e(P~gy%<~Y>?w;2_i*}VbtqvR}mN#%h3gD;42V*!$!%*7O z@sUOJjQ^5mRS{qi5rRW)^GqtH4McYJ3QZ7FO|Kscocc zpePPG#Y|KkW<-_&CIktXy1~jw2g2dgci7>Pkr~%3>%e3DLQD*7s*T_X3+T5nhJyuI z6b4G=7S#R9BwJv9w19(JJ_0uAC5U70^#B>bzmx&(7AEmqckZ-Jpgy}LL2nQ`NWgLP01%8|BFlNmniXRtO z6tGZm*9Ue`za7CsfO2qf2n-8jgnvOw7B~R5KUQhFWl>H`U#Syqmagqt{BX-~2C zh5_ULHYx4~Mi@L?{Ugvf8lgA$@9!;kH_au1x9k31Rr+h=6UFC&lApp=8@h}6P_ccE zu66g4E{n>yJq{?$NqX5qSKrXN)u&a(ldDQM%=*H5HfvyO!*NU5Wv!@Z$q4}~FduJ& zoBJU&xRlq^SGjG+@qKj)T!(Zsx+}&^%2j21P9yS@laoVavkbwB!uFK*iJsn&g71gz zNYpE(JJoU~X-r)leRA3znm1{@-91alajkIUk5r_+k9!Dm3_g&#q13E#hfG+W{$8Fi zp|PL~5mTJkAI$mMZMN8(MF2NH1wt_cko2JPS3O&>0M95e%e0J);%k=?%Qm2B>*(K2(nInRN;IKS=jDO zIVJhtSG4p~f*}pfV-jDs5b0Pawmg;R*$D)N#{^g-Yq-qn3nTGt%F=!}G6!qi9s#L% z3kDB*8k#qA?Xkdgk-?;-`gtdW>=8Do<3M*I!!MZDOt)uSPY>!Z=s7uW`9h`G0am^c z%t&EfJJuvzSCmQ~? z5U3|+p_9XWLi3^(=yarNkpaYnQEna}r4i6r2uvm7PlZ|uW-ErrkE0>CkjF$E`ju55 z`kZ?@r@{v#Q!JJHA`KGbP@WILqHWfnLxd6*7Ony3z<+9&xHw|>h&nn358F+kO6EQX zP^CkG!oS1)5h;K`o1?lwN*(vu5CZ;526;3Apu;cFoYX(J+@~-i+NA%a{30bhA|8$Q zQ0%hMGh4u>B0HDnjcwlu=u}{Tf}RY$CfFspG33mdER@CBZT3VyO+v`$xD2;o+2pD` zV^oQPQlC{3ZiP_~7E{jX1S4hBr3cG6?pEuG#W)-f%2By_4h(#%N(N%WlUGgw%jfzO zWJ1$Tkb(A{DVw=q&(Qu+7LPJr=s;D?w}8MK`#})qw|?HVBDJ+I#Mc$eLvS(gvP@{K zy;Ff%?6GF)RTK%U+7lQu!K4q}=XnQ{P7>nu0`>v?@rXxZt}UAFz|_=iGiK3sJUrgi z>tCxRJO7Hy-yK8;6h6`Y=-wi(Fpq&ts~ zU&Gi|2@eEltFTyuX9D}oQjzC-;PYBGm|O!-SiLP0P5}& zpx_x8s9=UrUzfR|VE}hCSYjSIVu(;pMIui;J!{&}VQiWQ8{7<7xS4_)p@Z?+O`-<0 z97Ede5w|o@H;4fOxSmfyxmaob-;9&|L-Ee}Ptmr`1L9JNjg_`@d;f;t5e^bQ?%olo zr=qE!^kmnoZPfDRGtmqm|G9j3#EM>EB1jBt?;Qn(UUo6gD+df*`EU1V#MbBv%v*B= zQGy=(tYAr%;aVbuo~WPzgbffAl4Cv~Etc3$qyWeQ%L=LBYYt#B)4=YqEL=YB&VrmS zQkK9ffoN5uAhR`>IYUi>F>LQ2)U6=FS$?#M>Y!s=_sPI#eGYf+qlx$8oklzT`--R< zo-&qe&rf+HjZGxjk$O^~-#o%ezvTN&>ALBMqKNZD zlE;xdk83ONlkKv}_H6F!!TOL#w2-(0hwvu!e?*Z0CtKb=74Wa}WA#8!BHoPL{Hh>8 zY`ySoA?-pEh=`C{iWkZ8@yt*4E>lRi(he9jO{8Tgp9Srm*KX}RwQ}))y0+X~t1h%o z+4YvOeyHifoA2*~vBd1>ZTcA4J&t(1v4)@R^%J6(8_7`o3l|u1r@LlcdS%0{#euE@LaCx-%k;oMmXw(Gv>{3$|F8W{UPmwfi@Sx_Gd@pWY?)cb${a3;Q#m30|P zNlKDdQi>Wegcy_@6#-KgLI6RhN&rCvvkRW^KwuZ#q>ji9D=WbT zw#}A(xJ-pZM+;Ggi#3MNZ}VFp)tenKm~5X-WMRa)J6ob`mWKn6R<4If#iNU?8INqy zq^L$rFFsF(tl?-sMR3Dn-sn9-y~^spVYEy02rcQJa#`e^uJU8~K>ntq5&6-^=kkxh z#Y%WbJka6>iX0yso;gyhc6k6G3ai1U93k$Y6a$e8#Mlq`0iGs(>O2Cz&PC8hQ6RJu z5)!8MS6zRw?8LviKfRN?Hd=Yx7yDX{>s(-hm5&0kNpAAp41$g(tou%(e3sWyp*b{^ z>6Fgzyu6%71=21*sy_+1$P@XMogeb>(Clh`+P$Q7rv7>MlRgSO;#|sF+2?A$mP^$) zM6lxBRC*k6&GdlxxnkRE#)X6cq?h8WyS?ldShYIrLOz+j;sBJF#%l$@ zMnuoZ7_c?dg2e$o8$_fA(1JpI6aWK609GT08$|3t2utWxn4#n!5MLt9BA_|kynVX~ zK~BKjHG)oy;`Kb_KukK|7y)?p!*YNP9=epQ_dZ*MxuIY1W~r9ee|)gC-KU5n?GA5?RC1`!yG0rY_PoC!C?vGIpl9c}j3mq<2PR^U=~zCo5CO_e~V* zBh%ss{R}8+mq({7gtW*^`RsMPSh#Z!5kvErcD4b7|3-)6=NH^Jx$v1Uy5>{GJMG)6 zebNWa)egE)<-z>htlaspohjiuHSUI{HtWN2lvmAHW&Wm%zCZ~j1h`F4Y2C{0=!_ka zHBgPj-T)lB~R`c_u zoIIh9?%Lv6b*5&df$HutnUnPD0EO@66$1UzA@-M7w1wU)DHi=5B)YQMPjvOXLB2(; zpjS`z?jv1y4CGc)F8MaD6m8@2IxyS;6evT)-|`;IbkJS!nlAN&$b2jH={iIAU$593 z8nzrOJr54dAEW;MVM%LHIi*c&i&$hhYj3_sjqt0s5XG;@)+5OViOg;oD6!~71pwhN zTK}WgAdr>D4pW?9I9ZhoOYgkuIjLvK;T7AstzwAT8_8hSjY|D}r#FXsC`nEX;rpFgg6Ncj$G zUfs%~Ku6+@)k}T1YOZfrfh=gmgIQl^??6JlwVYw^w12o}PQRGt?|EzZf$g&i6JufV zv;uv+EzBz7P9#bls0`3l|t5UUz(R5-;rK?bE$x z)|Sy=vH(B>qoUf!v7E(3>XucXqqsi%WJ_XbM%pLSACZm9n<8ZZ>( zJN&g5j819MZBanV&!+S44M}$7faH*Chdf%7G_q@6W@I06kMQf$YQ(xWI$mw=-)S$N zYI5oLxR9MW*srlNa3RNxO3yvtUoe<5`Cs^T9EyHK0&!iJT>Z4?>xnpo{OK!28op zZGDG@0hJ9?9|z*8H`w!|6nG6m+8^TbxS#b0-7UKD0@xx`2^3QeElWKrxW9XZNaM}6 zWvZ?0-G_9k*bH>#`(_Rj*705CWarqF;^_H85$4=BcU1qWkN=YHIj>U;fwu^50}GwN zqY_0d^0<#i^P1}Q8~8Nd{*@?tqK~i@1jttSKl+2a#S>K>r4G3^vO<8tF_)vlmTSyH z%hk{@s3U`+RxaWXC%wAgmgj3$%#zgVv@0lf`Hlp-bhC@Z2aDsY`0acPk`F4mQ^#!G zD9`F0!mq#+eH_L7VvvFCHoa=7H2 zB;9Ukt3XT^J(}ynJ@TXVq7mOwT z9a{AaODg{!!Nzp4-_h9%=@TfMtG}MD7JC%3Mt-T6f%8gmM`w;tL|=ce!Yx_2IwKr{ zV5g00pFf`d+$x6WML8Ypr8y@lb0X0 z7j^tp%qGzJQnb{$WOj+0vq^3UG))&#{rvu|IiD6$A^op@Q zr7-?aO%8=ZHlyH})KovWa&|KDzG+s?%E`ivw(fZpPr~mnZys!QTkv`^1WXX9$g=A> zyf+k{jmcOTbP)_ey$j{B32n}bn_7EMTZvc4z%0wa~$?>wCn%)qJ?_W*V8^QguCHT>a=uO{PEMe z8m@H3c&gM^U35cP=##@##dzQUDA6fCNb)d;&I`QF{%F28JEN%t2Bj=o-S?BII+|*R z1REZ5jnz9}(ztf&vdKCUn&q~pF|j7t0X-07b67^ZO_|vC^%TcujN!LIB7b~D*0|C5 z99gBaFox66dmn;KqpiKrNR*(H%$>*c<9?2^NiHHJqvoZGm8SGMQ6Ek|NbD8y<*X(;cY3LnvETVTRI zZX(}MBA%>^QgBCAEGS{KxG<)7^QxSye1*9au;|hNKgFge`(#lb4f(xul1$EO!mz5I3slROnnq$ zX0ec=@+NXL_SY}oA1(#UIINbey(VAT&rIvJ02n>DfBy4`@G*9>*aNxEt)8p9d-{jQ*SamZUc}$0LSfeiRBIH;6M~)g6{RC$}f&rp(i6P z`G%-Guj+g9hM@G#VY$%pdd+H{&+=xi^xZYBWZ5YlWgJIsDyZ+Hk$>LIes@S^0mAnu zy+Gj*(RnX@LgzI~NAf#y_sb4v3wfN+~m5MFs6)))gACw{*_+G#JhL-{Fr) zTDtO{-S~XMA7&d;-wad7!Bi*nraq_zeh|KNY^!H1HqE(X`J2abq0#j-?mU^^pG@Co z1Uc+0nar#IP+OC}kN>iM!a3nN^N_JHI>9IMsQ$rK)Ze;qc<$688s$7aO8f4A=s#TF z%5XNii<(&FT?==F-ke>H9H@NEr7o6(dqq4CBk$@`uP+Ux7bU2$X6T5gb>rCWV!8x! zBXLG}d0a=gOsCc#KR6U<{{p$8wyEgmXzLZM!sFC?VabXB zNXPDB=shl>qY3d49{G*-hy;Pf*oCcn_LiMWqISm#&s30!UrYu=K94_1Srb`^ba_xMb&C#KDAc5LC_)~ImCz6fE`cS;Kk=&3*M9jkJj91rgCg#za{-tRP#OYHC_658&Cor?Vnit+5{DY zc1eIg?mz%$L8O+|)9Ai$}-~Q^JU&Rj7vnd}Xs4R}c7~K!;J7 zJ_oaH2H4fW1c6YWV7S*_M|XCb087SY(RW2eLlyaCDL@yopJRH8nJAMa} z=!K|KFvM-vzv=x$c%%xr z-;DJ~KmYf4~ZKYr^Xwxe5)07xGM)dYKhj{pmJdmRu_G%z%zhjq$@m znM>U?Vt-WxQqSCKauOA3OD`oQQZ>l&P%`rMN;lmQu~{0gC`wdenCjG@OxnTF8~D1n zI7a&sGqJHzxAUOUMkpqy^O~pmYsT0YXzuF9_{O_Eibof0#M1?gWfE5=)s&NU^>*5v z5m%bc|7Y?rUc{%a3JshAm{BOeu9*hRC?cK%Nt!Jb4CV*qAxy-OjXbdi-{6CI#9>hW zgFG(A99Dg=o5owKn-kMl^iG)a2=(zP<{ds0@ewvzj7aOU?fS;ar8l`6Zd~+1jLEuJ zriXj^GYd&JgX?7@;=;vMyfGB(2R~@Pxf%P|2ncMplhh7w!%`MVAB%$YEdS#>X5_?A zvg=d6d$Oz?R9?_YKoMk;01Ft{&LGkrff+m zJK%h>U+!Zj6x9bs5We8a*D}8n+ZWynU$r&qkOIyGh|WlZ0!!{h(3G4i-2er=$(MP4 zb*tBrt)+3G->Cu}kK!Lh0|;FiS<09Qi%2m13`m=f%~p-5gXQ)@m$Q3*v$4J?dS2hj zR)qjiu|Pe=ROQCS+zSgTAOl~|zrQZpIg@ljvXnV4f{LuC$`i|A!w&C9tX22Z|JP#& zk5V`dQ6XGq#ED-yxa>05ML<(Rz7`5Bm*7alrxJ|2j7QuB-pM@*c#;;r>WNd=^?#|! zAFcshcita&R~z%9HqKnWXyhLYs{Y-y71P1kIGcZIxHFlia=4gtng@>{W>CEgBTl63 zanSa_NOr^!IosiFFH3%r@rFmN`PQlly`zE{)nw&y{raV&WKtxeF zoPEyTW1l_F81MUs;{n!ObKdj5ulRmHmkA_A7iw|L>f!8vHlF|Il*c>^T7|z5A0gc2 za;NX2i0BMVZhg>?HJNn-i&hZG6n8zq14s70CJ$c9y0+jSxg)M~>ZGd$vo7h-W;o8m zVPQUg65Hd;TfBnz&~$OBz|N>PZ`p$1$YroziPrT><|tv%rq5AzhEJupQbeCX*{jz- z@jrbKIU|$H9w1-K-mrSYVV2g%+bk-#>D~OM7*3y?q&MpUkyNsVxueL>whvcYQg( ztUJl43vU>d5mUdFi#IP%nny-Cz5go8^UUE|9ogGLdQXDPRPSnH+x2&+$J3wEloli; zIjBj`zz~%l;}&q!F`HbNZuFsW+@0meW{oAtbI%4vq)U$DV~~6uz-}*b_L$9wA=y@RC>w*F{ zf`y@#@fAmbN(pJl!$ynnfe=Zn>=P+y)@FfvHtvj_lC8`76C8=2F9gx{x=dd#@P!=E z51Du^R`l|Xdl94a^{YKPU+Jex_y|`1E>0{~(l-%{#D8Qw|EbgPwAUE+Ov>A%Sb2l@ zm$k(iPA0z${^xrcVWYofj)6Vo_2{SpQj$h=6(LOE+h^xYOJ5uUGe5O}Sr2qJ5LfL1 z;%I_a%+hFic3mGd&X>Lw(*pn?6Xd!dpdW+yg%QlpiVW!DaDh6=2>g;9Xfgnt3S6DP zyL@Ku%@AMm7WO>uKda$-73{Xr9iuA>gfgPVvxe=}a}O_zN{R@(Y8-X^AT9QI>CJdu zSp~gVLQ}?`bci#T=wv5yOSYqOetP1{p#)~rnlJAecZxapO+*}wyHfq`-P4Yq z9)JS^+HAJ!5UrP-3JKgNn^3BH&F%JCHz?$A_nDXrlia*#e4<6MSaEsh!8zr@E!6Fb z-^=?e-oJ^~d>{FU;l3|6AJ*VY2oLRhxaP>8O80tcs6%JrCn=}xuaR=$i$Q+11*Y@Z z4G>q|Ol#8wkp`{Qz8IhAHCY)KoOL=cPqP9zlyKvI9hpw*zF?BmmSRqg|8qUUtZB7Q zoPtc?WEFe4R@aybDIAE3FZP`{w_3tSe-b6NluLeA;Aj>SeC@paVxrpEpzgi7v-(6g zrVLZBf%m!0v~Ne zGk4KwYj+^uJ*;t(wGb&f$7*6W(Ou*j)izsmj0m~*N75T(`e@xQj^t269~Fx8N63Hj zI>-zfUWX183YuzYP?hLmbAOkTTRd*O6tk~?W_D>bPo*#bGg%(WT_oiZmNRkn=L_H2 zf&fghmj!P5ECa4)SI1o_=fe@uMjNT*+7$q`Mf29^pqk7evA; z-~LJ}`s#y%aI=c~VJyw+SzY$9q-qOB&LUNwbgQ8a?`olCC-OLlUtps}g56ke*lS{V zr^Xxm2xH8on)AqBdFOU&jxLSxd?Fna{Lp=O3GP z&d;W_X5)xn>#nBRJ2f@k_BlfZzV!C>dpAneuDTel)3!2AB(cbAGbce^L@WR~yX9FD zkJfp1SRvjqutHq-Fgb1J2Ak&EcgrJq$Tv^)Y8%jBa42{^IG{!kDxunj8*n3YZB{4&T8QjI3|Ba2!tMX$A37uozne2D)CkO#+?f&_2PRC z{w_~ATs%HjjmGkHUeLV;@8M3MFM4K)V`Hr6)$_&rMh-5d6ur-mDzf~N8{~@3;%EGH zu;jWR6v5M-WHxqJAzG~TC}r#I=+`8%8_C$C8W4pl_MThOiuB@Jnp@hjJn3hPIbX1P zt}!2z)%RQx+kqneElO?@sI_EWf{RVW4I#+9Q}Mfej0!le=Z_>P6a?TLsT{7qiS}gA z`2{s{x|!zd#UH$j3+C2v9*L0&N>`&Qha)2OvcJ$2@o+Y_#5K|EoRV&gqM%BN)j0;O znu3D2P-)2fj9j<+sW-gbjvnGM^Z1%2s|*@CWv?J<{y4eZU@5DlZScPL)!zKLrZ&hn ze~(uc7jn5@H!9Nv))c$!!e-7Oq8;(A!C z?nWC+-zm}iPo#fTSvF3SrQLJ3qNbb4G!oz6l(!6#_OY$(RZ8pIK0M>8{u6rbi*-LF zmkEc@TM6~C{HwPg7s&_o+`d$0SHJ<*vzE(@c?Y9qC4d{$WCca(${ByDN!8;?8-H9G7 zshE48ZSZ7+sPD{fIp|(%B%eDLEGvzS$V?@o3F;6mgE#sGyeC)}b|qJEDC0xArKX9z ze4JGt*J!AGDL=A)s!}Rp+lztvVI}WiI@S0ikLo(V0l^xBhbL*PK3--B$L!aKCW9yv z-XhBFpBp(=gJY0c1^vJ;|5~{&ab%wIRZ7Plbd~1c;WIX)E?$+Yb)TLLd zw%jxyd7G&iO8ZOfz~uL12Ta?HS(%Vmu;UV3c4Tz|4foE7nh%I+yr}9`FlKIx@NYmf$Ph=m(Uv9rWuuAB2hFKuG zX?l-vYqvRX#7dtbBtlZw=U&jj?t+y4N}z6x|is0_6<<=6P6N z?#ZwP$JST4Xnrwk;_&A%x&2sNNqeZ)U%x6GLa)-Rt&N&r9tlQJc!d=D7iw=Zqm{F@ zX>v@Cy{p3rvtVO^a|LRkdP>_!jH>{bY|%!7We!E)_uALzj2!u z8I)~Hk=X`C9@Xs$b1i=>c-M2?Kj^OD3>7oU+2&UdE- z!My(CF&b2}KaYfQ7`QFF?jUiF*H2rO;mGekV!HbA->PQKU8Ewv2=m@LfkK7EB5EPg z>4A@LC{6s+Bb#>a(l|mW-fFwSQg5XaBO|wXLP$Z414 z$%oFo5u{c*u80HX7}Yrw{OI_fGP7>e@w0X!ES~e=+y#i#-ZSzxr)qP$iN7D7E#Swn3$IyB6`66mFl{NB2XmAIsOzj&T{^3XTbV zy~_Wrb+Mp0xLl1r+}{HX6ZW2IL`xYYZx- zB9q#$^G^_l4p{t3^N)aT^RD9r^duGfOzG7U=PA=Wh3aA9eYF#w816G<+DKHZm=SoN zHlyRs1SW)QjQY*6nqse8l?;-|ad z;zZm|P`Coaqn4`b^Y15OP^bc7dJ@>%fHQ*_-VlYQw9j7N5!n9fCS;Dc6yW5C^z4zu z2xM%F71LntZun!+&^DLL)imN~D4x4ouXz5#n%Zp}nd!6jO9b)e7I#l2RW_$ju#7B* zR)d@F8#e)NRAOtUc4Et^~p5gRI+>VaNG!fn9?Qot+V!T-1Z`D=fF5nT3< z>d!!W-3dDMBpE-+_3^5op!-^c)Y2AQ?<+FkEaI<5{HZ`NM&LXNZXh?RU85l(*=x&j?;QMZ3>U(+Jh#}L1&Uox-q3!z?LJ!sUIr)#SA(4I;X4LD8pZd-f+0GkJ z?qRp$Y@_aR!09<;2*n2@K5EFYKZA~M$O-Hv;7lk77t2?&J|ilagChQ1ai0A_`*UNwC%9QOAikmj< zv}mZz=D$^E82|S1SBgMm2rkzbV7wC& z`g+d?mlU3|I__;EYp#NjqEN2dE}Qv;^*WHxSBg1Fij62U{fN_ix~W zv=mAjG6o@M_iu9Ea5s)J5V7a*$Zy-Kps8@|4wwey-xW-aLlIy#vr*f?|Oe_b064KF6ovuP zLRL9;!cd2(8ehH2oI+%J7kUM*su+r^IHw{X4+nr!?r@YbExTrI0S7MgNOj90cE8ZE z@nCRl1$oYss8U-?1vbER|3IXs&moBJ&u`Imjn zji?Y7*u1%PAEWF9X`g|E7FOblDCq1#vW=h^{yJUfD`cBM^)|6_2D(?MKdC?!MAfMq zRMf@MQTHPg_!Jh$J&s}&9N+rO^<^%zL57;=s*lq=T0^<5Y=)=G% z-juZyARyamQ^`g0^myZ2j$~#~d!J%Dzqt1iTneCG3CCSIR6`jIV$=Ke+cm*Y*_O}R zUXl#mZi#kzo&}h4p4vEMJH1_nj{r)r&A}0SaFEMAG2r>9_COzrDLL znG7_iZK!PhhEmxec;P=DF3?MQtSvp9;!umvxI_qT2L6W{P-#SjX;32z8goQ2D9ANK z4gT?dvv-nSA*;}6NB%o0imShQgo9TZPDy?Z{`~pY^OlY?I0&yRGYf4OUy;B|BBPM_#=E>Go$sPYo&({W?yZ^y+G;+ljab%>$M5M;?WE5n;mkI8Mg zac7)TMVs=={0)Md*IzE6Y+}pE6+v$QxA$1|2KfUi;3_K;#bHjxjY*%P*Ek>7ltd)R zR*I^hJmDj=- zSQS&}%2E9`mE~@WnqR->H2J#EtY+V#agDX5JurD6i!Qt@PZV^p^FgQwD>k?v-kw|? zaTL-fIRr8sN@Z)L(choH@%SN~*n=L>kV_5AxDs^TrrwZ;RwoIyHI7T~;u;2DKKsNy z$ROU}!A`RhPl$&`aH&RS>p-AHeBnThiUgq0Z@|ZR6$NDd$l}LGbw_Vp5g7^0LZ@GU z(U@s8hfkBxG-pC!a+}NonuHN4z1&xBA)#13To0mvhBE{Or*JgF8HWO_8-Vf4eD)ti zirmsM@j^Qo)_fITJPX0;*m8Cv|FO8#)_diP`?I{AX&d?d+8Vx;$S;09>E;$}?Q680 zv6O;UD$ml>?%aQzlTeO#OPjLu43?^U6G!h30z@-CO-)Pt0%C6d{Oc{1DfecxK#-p-yGF;}HWQNAEZ#Z4*alyj)E@zS2WzB>iEPyt6FmCS6BMg#B%q2MAF z{ro~Sl~S{fTe_RN4v0Qe>&n=;EA0d<{u9vq?&53vBoc08#K)5Vsb~pd@i~9BQy{9Z z6fV(&iWC8(G=r6_3+%$ZwQh?D`vC)Sqyg!3#{etDHa3>@^Vkv14`^D)QDszV7*Yoc z6YjF{HOU2N%%%?}#SII?@u-{Af-wVpjAn%L4;VY3FR%&y__|)oLlEi+ zm=U}1x0j<77tV$?GryRiIdTGFM9FO$$fj4K2>URGR(BuJ_`rs{8%@XlflaE3ra?0Z zikKqpKZHg$i{MQkPLW0%9)6>6yZ%{}bat_Ud+o*ztk=3$M6kQ*yjN2_@BkYfr&d}I zENZ}SkOK$+0q~~TCvUNgRJbgL2CRF8?3lo|m<+WF#D~(5^B7$o{^lZb22#N-0c?Z@ zm;sR~0}xca1;sWDG%#eOpdOB_e~i9gOYP{P!e8@4@7eR$eGBN)b z+mra*?ulr?9#56GMZSet`=Jf}80bPT9R0!0U;AgJP@vWo*7!E6&nB6>Y;0ilUCzRd zO>!L)KjGlFw_GKc^ ziTh9k7x5UFJGQ;Y00IF!CVynJxscw%D&q>r)<5ssB{t+o{)Z`A3$Oa(jc z1s*Bvq5(UYHo(KBmBtR(erSHegoI8kGAXn&TLbQJT(e5y-kt=nOmO#>bLvbFp21fs zIp~IHhO492)Nho&v_YzdmjoySd=$j~4=b{NvySbxI>%+{y{}IKd;;76m;5nhgfF)v^m)T5Ri9P!nJ+265QW1BGC|R-S0B1 zI`c@N8mJkHy4?CGaS$~Kh$!g1Ekj{>r#{6@4g@pJF$b!MC?(%QXq&L?JJq{3zO9;~ zuH!dQnk}gm} zA*^-Shd+aaV(g>eHPgCBtTHlb;FzgCat6~DApEWZy&HjSfYl7a68Y?{Y46`uSASIN z2YziRT7f<$e*>{Pg58T58vbxL95;c7261^coO*@i@qCd$gF{p=xncAKD)jbLNr;bOvA>1q|N7 z^~rBwKD4@f*AUqL-yM*g8Rnq?R+R{`|mAl>l z={jBqjAH%~kv5}m)jB(bVa<%$sy!Ho2P5@{A0z_mk3?)gM|s4J%7cWG|L~+Fn5hYX zZ#&-m%bX}+=KgCmTcm&U|3ot~Upob&RFOe>AasjsQrOtp6-iD}0KEzZF$kc)9`wPC zjK6{o0`0+APmV3pWej8!XkW#c0oJD-z~IaG)1`f=lY5n|t#i8f0F43munDG^*jSyC z6M$^p4GavVyP`6kMDRd?-Es;F3f(4z{30S%@+7~X!jp;uaEXO5oxrjbJVFHHv(k9=>QxSWN)gQ)7aTCF zpW(B2jD+%4#sSFSQB>@8*i0N^c~L1ajG~>MoQVlW>hMPMNlHj0!6c#Tc4#5TJwAi6 zSm9vPseH8j_2OcU*DILl!2usR`X9Q{%3X>qNnQssB&1dQNsg3;VQ)wLh0ly~OfY63 z_>r1I{TJLw*Ma2wx(9;Ig%fpxX%OJX$UlO}Wbz&uSK5#ucIk>umdtcc>t#=4yYDaJvU5%1%R~uQQ(Vx1NtjqG83T@MFGeiu7aWf zaCBYJb04j9=iU5xRMPzzZYLtUf^D7uVK3PGuYf}m(1QHb{3bVGIu}f(K}Iie4-#6i z+Vhw*GlEt1G4(ajzX#({PZwV1n1$oYqre4~7!%V9Q_i|z5JfWtCm`Re9P9Ee`Jb?j z-i>cJP=e*r20)@xhjTGfrw2a~h&&(*kbt!=#j`V-1ALGW&QF@{gO}PCYP;6`^ISOqP;fqo?W|_l=+p9e*g$AsOELam4 zks?v!*@*x`UxU*gu@fVj128sy9&{@9V2_N-%YVEB0w1othFi6sbe4K4>6o}h2yilYULW5mG?3a?tO@0WtN%E;YW zZ*=koU|7*#!~DcZ@Hc?6e*q~A_^9mNBf(oPvaSXoMZ_{KJt)ZaFrH#9IrkTuzqO00EV2kIht50^Zgc7e|H=k5?V#W9|!a6 zBFyK*oq<6AF3{dVScuH{1J}RwH~7no%kD;)zV0aN_^(8xtdg`1nyp}(;JUrdlZ}Mv zABtKE(ZJm(I6Z^dY_!^$8TOQHa2z8r zQOl<6ry`w3%Q0^`lI?#$LSPvF;Bx2*tQb1+9M%x&?j^x{M7iam$1$#5GWqB5D6dy2arLCAjbUBDN4Y~bMsj`8(SL6Ce89Nb+D(<=13JTgUeOR%UNJKFP58~ri z3lReT2H6u?gIVc=O<*v5uXf(Q@e}Uwn_+u=el3}%mf-$t)_=>ab`aq21C9_ZlFuE* zlGFiyXs((JY_dy=#rU@$^3bA3;fMXxZ?r!Uc-(;mSG&5iC_$JC0xS!vF^IPt0sOVC zW*@S?p0=03b42+|9g|1|-K zjWm#QwY-o|A-dibMW|dl__aKf0G&#(JmnYPt$Q$o;)8hdWh(UV99a8fdo%O zdVm@-ntnS5-hex9eAWw8u}moHw}E2`iAqlbNV3iJ2>`@|KYKI5gvbD2dpjcu<27NkXn_1`^|Y z#Hdt)%zujU|NhWEO7c$gnq-pM+YmOm&;l~#?>51eg4KQ4M!@|{Z#M`k>mB8z(4L=P zUx0jJi{YIw9V6b679>M>hDtRV9Xzz7d>+z`PV zguyE(AQ%QAX3Ydl31o_t6=k(2timfJu0_rIDn(&&8(Jq9PuU}jjI8j#(>g(YX$_AV zVwVsgbOCQ_yM9KkRzeaf206kNgTm;VU4j{}R+Nc1ibnj5WRkaJYj;V5~|npn z00I!OM|WV}m-o(6GZ>oh%$fYpa0;yU=RuAFSuAS#al8h} z=saakcJ9DnpZ8YZC1G};I%kYGkj7k$!l&uIINNw!|w<5!zd@0JL$DdcSko zcgCBJ&NZ{RaFP=ucM_Pl)=&cjO(-)u>^xUNJF?>7Ubp~K#T9BRF!EJqt$OnkY2y$216i$ z^7Q;~!vw<))apu1&&jLgpEcg)a53~j&9?ZW?@!r9|MQ+GH2t53#NG(n_bLqj06;)H zG(!x5TQco`CJkIJ6vKThM9CV(s(?>KbQdrINV@^hl`w5{QPrO`IFCOtJfai_93%hi zwJTSk_j|b3hyBkst@yueeR%5a{lB(8@;0u*5K)NB5efk^lK`1l1@msQfFY00PS^r9 zxn58&coD9`gaq*LcXs)0$?IH&P6ufFDz=NEZXbIEMT}tqpeaH}kDWRmW)oy>1=_#D zD|;I)C7<>F&u-HN&A0RCXwIu9w;cL$fN{D`YX7|9q280kiG&CejktbQILm1v<-~P^ z{dG7pAI`;hM&aW`m$u>GS%1Lqi?6NttdR|+-gntp$WGjx#w+x1q-Xz^4RmgP9Rqs= zsr$1Qh#}jr5-6HLk!x|Mr;3i{)UtsvPm0L(c$u^;9dG#JM%82evDk27FZI4hkol~hdD8{fWyD? zfj?pIMa+JCxuDO#!0|0iCoF#I^Gmc?IY{}=fjOG!)VxsIKSddfF5|Hm>TOE_2mJjm zmA`*~(FfICPEm975(=UX+pe?qE!FxI`IFt_y$(4{qR;|(XJS*4)1g{D1gQWOU3wm< zoXx$P2(%}4P=YjsC0!yMrNY6vMTw(?5z^wMI{TjJ)JdOzLaTOZdU3yUbSBX}(>==+8-$)mXt1tWlsu zAtthqIXwlA;Ds2v^!Mpf{aA!H40^Ie=e{wt>dM9`k4Yq3Fo_jI4+h{j8R=i&qV#ef zKIL&rhu>lU=Xd@oj+q7&9ajuv@5P_b2QT91-D9A}LdQWd>7~gaKSeR_w{8T|F~0u1u0PvV+TY#hr}t?v-r?6=C2^F%Sq+U+{&*U)#0ijFAKL*n zW3!0oENLJo)e6vP?#tl#5t!l+&Xj(;46SaLmoi50xpFEOrssGZa{ucS=8Eud$JL@z z3;pRs{?zNRgK%t!>^H}#R^M(p35J)`?h{8{DRXUj1Fj)&00-f(!tuB!kNTFdciV;j z0t*x03YDaR-~k+(g~D|%-YSS7wFiOM2>eJlQj7Ov5YUwFtmaxLfngEkKBQ-oB82YP zXvoXKOUQs0`^Lc1*rb=g4h!Km(jrt0J0_Nd$oCSVwO+I8%=!CYX3b^a8EL#aIw6*9 z!D4KPJuy-ucM<|yQ6E=wgs#n1c$Jm^KFNXtBLY~>YP*p?#PxBxY!|j?hKn%_%1&LX zZdhCf(J>G$<)7UKd(!P%-he*qq)vgdyITVk7p(a_WNlGioZov8eW|W&6qJFd4(DC& ziRgmJixTtA-2t<$pGdQkO!83IiU`=BD2|T7xYz);ZivNUJ4KN zaqi)wpJ(N5pV8GDp7>l@w2C^PUu!Zc6J3*d3zY94YWFhq5xj%QKUejSB+Q^Fu9D&a z+}F^g1}2P-%P&yj(R{&rH@YU};be>g-*zCBqZ`J7wUu}O?{n`OQ3*|N{^kBE$y@^D z$m5M93Mc(>g2$_ngWZMy>CZg!mYID0?PtOJU68-poV}a=-b)1`FA^RrCW(U-Pfv-w zYR}nj89obYtr&~Eq3X;NxZ${Z8=5~h&o47MBP&t|S&?1rH54>~DZQdGci+!u5B4|d z{MjmvxoV)^(*aP5TqhmDRspxzXFm6aWhj+YKM}BE|6K~%P*-@if2Ud8XN&IX!E83~ z(TI*6IzTR)O!ON2+iQAfn)P*HOO6Nk!LRSlm1g&mHj?n)56lsYxqfZ1v8M3E`1R+^ zkk%%9oOAxC-+6J-3H}qIll%>^ld&SNyLt+SMOxd&uw*^tkL_Ef?|l&g<8Tv1@MToE zKKo>jH=|0)&Jd_j$sm&x@(GH6KS8l{kYc6~i)HWy+`g&CrH8OaLvU!f#FpUgrnt($ z8@7r=3%WZARHeVM}sKwcmWmG@JkPP*wPdx6|w>TK#zSuzn zC>iR))VZSVDzx+RRPbBN)0E!#Tg{s=Os@g1^3MHO`zYE%gzOIC)#3IcmPPgV`x7-+ zpg^$zg(BXHGjMZtz-&RtUT;*Se4Vd~1BbCAa2t4$JBUd8r!JK)616M5yXyS@}7WsW}sK2Jl z&Q^*xyW9f4cbA)st_-5!7U1$mDgXuR>`KnfixN zTcNh7Jn0X&8qiZUDY?)@7lxrKqpPp~)nUmGaWp^i7N#pRSf4YLs;3ZVt471d^^Q*E zv{7xCz2^$Cuh8WY4vZbx)RsmC6F~@gxJm~4VOYPM1;0^SkaYnD5+*R1zE@FJj*g85 z{1q^+DfYE)-bALlLd7ZxhVN`6)JurwK#NIk6Cck2(a(+&I4%Js7bUk&>*rj7mRC%HWp-ZgE`>xXqA+ft%^U zKM4a2J?4YS3bc^-rAjKAoP(@@g^f+20Vb!l+|i)U`n>f`bq3C#?)2j^M&MJn-WnNi z($s(V91c1`FeNz`NzA-j)zuH}v#wm!c^cb%UcLsHS%Okh45^y>*1JwXp$EytgGg`; z^0=ODZMpdR`a*-bp<4z-QFrt)RFsq~%MD=ioWQpGe@_xdyza8zuz7+3bdA|*<;(<` zry<07nVsDPX9Aw<0^S0bWs!KgGCN}sU3Hr<#G zZ}_Jf8Xu)yKB(qb-;kl~iwH9nG+WSd zuK-S{d!~xKJSLPWS5E(}=6`?@xn`!l_>3o<$iG8ag@RCIJGfk<5X0gqpigc8DE5c$^2N8ssU+9nt0EKp|r+JY0Z zlIKQXavKxKN@$0B%M(}Ub&u?OrE0-&$siMhC0Dv&xnpNWmP#Eh)WaadrT|~UKKkE8 zr^KlC+AEN3{HYGBnTx#)I*YMaQJU6e0{T_kzMDD$ybCr)--pIXF^*Zd1s*TKv z`FA<9fN;-KdKF*1eK)iNVdXH?Cc=Q8{Vt5~B0nc3+F#4IVp~xX8p5lRDnD(vCT`V} zi-aFP(T`z*prDr`iyymoM@R5WS}i9N)2aK{RR~j`f5Y`CV0N5~01iUC3BW@(;yiTX zo!Y;4RE`qtZD*>n#nd$bwqX~O7))`elJQ0%mnr3S@ctgMmq#~`rJB@U-F**C@^gx! zgu9w3{Ql7-QqEq#FOBs_)hxj8%8Yf4cb1*6^6>f&zytugC}jtv^|o%+W~8J+f6I2; z+fF1xcc8s*UdK``gj^RX+9}qC9Qbh3WL%Y2Q;aIr4`l~$bmp-5?Gft*8Yj0h?LL6~ zV0isp#G0e=df#>P9_&Kp^Lg2}rG7ycEt9b|M8CxPS;q%>YjzEc!hKWZ@%y5Xqx}Jh zNWg+Qx=xj@ytY1T&IRqHSIx>9cDL7u{(+y=QqkHL3le#+}a_QOW2m{s~ zRyQ1HIJ!w92G}~Z2wqHvHRKl5mpxS+N)<#=XcsO6PSKizL9=P-bJaXu5owA@?Y9Mo z*ZJR*UQcD$9-%PgvSaTKkepzkHf>Uuwo)}E?_5`6d|p7$5@S;NWWGX8pqDM?<_#^< z0Co(VxZO*l!qZHOL-kjXzkM@$?ju?M=;w@~N3v##nmDq$ivl;tA4OW~rc${l;1^ei zzf?H8!DvtCXz=k+F?S@NZOC|i<0hkm71p^m{|E7ILBjzfPs9UdOQg{cW5!Or@-)K( zv@uX~emQkr+>vQ<(YzIPL(%DLS{RE>GS>XY>~skMda-fj&C>u3i{BaeIUKmHj&48V zHX|Rcwq-^pjAVVXcxr-cy+aV9&Y3x7-=5z1hrsiMjUH~$v!Oy)-FHkPB%)GvJYpYPP=u7BQ~kTny3JBk1FOI_JCWiK?{$whY*IwdAf z`U#2RMSBTcxr_EOZ&@v3#z<9oZ861mo7~&xBDW6eZ}4Gtk6eVj$%5h@3N*tOY6s$m1Qams?1TS!MvpVM}Bx?q32 zo1kzmPgkBh!SZzXWNSCjm6nGGnB~j@WUZPh7MjKucM)7c`uNa zn|zQ@d8>;9A0zR#q4DPJ9#K41`XC)!fwQkEHN1R;lXIRw@09&24@;Dg!j(svuW`4y zms-!Rhqr(D_+)CJ!YDSG?^Xx9cu3RGNcE5G)DRSUY?rGOz?v3ZXtyb%mB$Qq z>vP`r8``{UvTF2=dO`5@$F6yJ(|Bk}^Rr-MgD=icw>e+aY^!Xa9^VbRMVc=B=rm!mZAvY`bEtAO#ku7C*?7O`| zBQxRVBr56|xNT2y`3{6>8cou|{c=fCjc2#TQ!%e?^SUm*Jm>5r_0*Xx9uu`-78ATl z6VWc>*J^ZZU(H}Qx{SoOja$-AGD=_6;4g5HewWm;sJMIskaRCP6=*n_K! z&NkVb7lkS3`|~wABw>^)M0iyNX_j(?49 zex4DiCVpzf+S>VCa-!T9Pi8c_Zx+7v^S@u3`_u62^~*-`IfZ+t4bd#JRG7kq;y5mE z!;LeQRz$pZzgQmFpBlW>d{|6>puBL!EE zA_qU3iKYAob^8nE7=JUN9bVM_Jz&xH3Y97siTv%Up}%1*t&ga1D_;=Q~4Jx_u|NQTCj>4D!Ge;x)1F8+n%A$9!5H zvSLES@;w!kgYg~7Yh~_sh4V*NgFnJ*crPl*E@le~ro4M&eE_wnMBdlL6Ir46?3^B1fX(b7_(?^Wko`JUk~ z2@~dU;CR|GmVPoXZhCgWaw9tUMgy(`a~*CBu5!}?xtDvZc8{;^HPX~89`{q9<^4pq z#CmLdQ-$b+RSc=Sf~^~r{r4&HmjhXQ;_d5b%4_N}TRELloL$seWKJe&Klt}uVu$c& z!n*l~z~0XV&-a!iojj-#vqll07cOuRZnb6#L0~|wxepopgP$d$9yl|Gf=BJpo@pvA0GJ%050S@QJM(-BdIkl`;=a#?9_S3NAbvdr@H z@ye!F&}|!NBUjg35@N@zC!NfTL?ii`x}E(N?pZ^2%Ide;N3^2Z)h{rFgkiOmKUBeh z+Z++praW=@L=@Iivz6P$Z5IiY#f>5+|D$=qofrLAqwl;fZfI(u)iJ*@Us76_ zeFz69Uz!lgftl{b^#Sj-Yff^-W2CZi4A~U*9!vZ37<3%Be_~>wYD0$es>jd4Quu8i z+49{pQtnwJVW^r$&3x***V0dE25Qa~R^+gs+zD?$f40H2xEbznFk*gLB~Np5Shqgw z?M-f_VG`4dYUf1F@Pr2%ZGTis+UBmGqa6_=HW#1`4J}A zvnQp?5Gmmg=Ucep9~;aVjVnj9PY4U+(o=`GUv9n263pAL>F>t-r2e5;bqw3e!iTe7 zc}3|oAMQjO&OqVaqQ?%j+o~5ahi-9OLlk*Q%&>0F4^MgdSD!@ouL0-50a+mw+M)Op zoIWV@lUI_8cpj0gZa1R^IVC8CI^7$suPdb<89=xu_Cy48`tk>AF_Q47>=ZFLHH;nj^uY3n3+a-Qt$>`6P9D$DaUk@@Uc)4oV97dJozf2{;nx(%pB9 z!fz4;+@3o5m{$7y+dnb$<4#k}Kgvs62mWqQ;nXBtE*CsHNy%qk#86d=#q3uXlxu? zXe#-fIUO*7@eIHJ(PKsB+LaD1>|cqJR~$2i@to*Dvz4>8+(j?wuMedC#$vpSR(}bPaE93NfR_ zopLLnBhlm9F~WH&N?Pfm&w9#3{QMgh>Iq+7-48mnYx5H?OB(xg7*42Ii5VJ`}&s;^K{@PLbn6?Jjbr{u3J@gKT^lYckC*t6<9I zS!KQ1lo)&KQr&F-X2{b|OXo7QGn!WvA6RZHG}AmM9GV$3C;t4ty|n$qFYc{7oxcVz zurVFj2|yT5b=G+e3onR8J7M{BT3B(2EhU*?jZWT)?lgRJp|w%NDo_{#wxQSSc^g@l z7L~mL%tR)Y*2ps|qUVnpWC$^Nx)V!|Y6`1#zvR?r)-{V^FD2ne{$Bct^n&>a4Te2V zL~PR5k(Qfl>$KR_HIMw_`4!&0xM6y`=+VN zCW$XAeAaPg)u`_dAD;(T;BoAY^#z4Sn&s3^AziuZa?-;?`OoeJJVy_a^C zrqT}7BA!6c~Aidf_!llYZe~ynP?c< z{c)!_HV#T5!~IQ;hW@exmV?6iyh`yX z&+-1ZBltr+zdGigjP4z{8wk0hvpvnC(;2F3F^@{8&~ID1^7s}u1g4>`CoTP4CL@I{ z%qZe8l265KcVDGZ1VRcF?6Qv0a8grf>XBL`ujI7)er)j4e4?8MMFq9irx(?v*CA(s zo8w!zD!g&xLi5$^K!CdROY7U#`49S0c_ej8WHZo8j{xZx4HThNyuBrnNjODj4RpwS zDMiIkO7os;qX94$;4Y95YhW<-7EHLADfK_$g+{{*R)wc17o4Vw3L%hKx12~-8HIzy zbAJ=4bT!}s`BguC^5w)pP7VcwNsM)zPl*CM2AhA-tqBxhppdnYt<`to)XD0-L>YX2 zd6XDgWPs+nMC|>%>a5Wjx^L3P4!@S>?{u&$;@Fd)&$<36gZgykJxS2sBhE?^F5VTF ziY!w1c(?t{D%)qH8!i_Yt*PJ`Zr+!T%gWlDP+v!dV~Uwjg)J*&T`4iA74Dj%D$D=* z>(8VHrUiUc82B4rJ^h3pHSRKha`AA;r+E6^H+s4az4NGE?f-8ZqN%zWVvjGS%pt_OJNF&%k2?0D|VL#^;~*! z>vF*#Qu>P?;yCb7TdMKYw$__(?>7XZGVbsS-oJ(YmL%-+CyZI*pdmi9#`^21cSgl9 zGOiWG$N}KBsAEotkuWgY@gst7r;mR3*|Nf1t!JL6AamMX@ z&?CKsp&CpAAGR>%75a8>fV`*KQMA>jl9iPe!*kkC3}(b*hh!c4CxHpvfN$}m9oYCm zapd<0OYzZ1$rp^~^Qi%<{H;Cb%B4k#882V8^t>(-`;dSXVD5otAMqRL%C8$K;@uZm zVa-_>W6-438~!;rCK;PfGYUTiN7)U6edj^RXQa7JQ!VyIvbO>u_sAOZdY~Z>QPT+7 z%cdP2eOkxWPn0Wuv?z?yv*}yf+8Cj&0U*SWY-eL*1L@DKP5K=pqjAT>uU~KPZvs!C z&8d3wDU5$Y%xM0{KbfxpCJsy(3jCfYFI*6PF!aA*3`rSG?m@)Zm${rTCn`L%dctww z?N0#tatS?o!2SGznkN?6LDvqYO7~G|5^lI4C_}_;FuiIbQoyhRJ6X(GSyS_iQlECd z4s`iAU|8JzlQpoxv>^ilD=I2%rnzKgnPJ3Nv;WzVx%zA2{4t^b*4mXoQ`xrb*BDCK zL@4u^MCKu)O$ucwLu8C5B$Oda6w+u)n_2I^!xXW1!2$04GM!RMC=Q? z+SrXGhmVI2FPzgqBW)Vn>CJPKA-;BXk4J1J%ppdg!Lnz&Xsh|O+OU?p(8>aK#Y@% z>_#HRxJePsq#^=0EWEpEZmW4>l*FB)K#>)eLJC zlHQUy^2#{6HBk9BRSExC=UQkOVS-R8Q2us6q1tMu6m@ytZ?f0L*Q!)*ioDz#$N1{Y zOEA5G24~ajpMPszx_o&zOgiLSn^wZLYfA|d;fIBH981nq=Wsx4NFY5(StGE^IryID z+JqKnCW4!rJ4NTPY{22e5NqP$<2yx)qJv+wI&88TK%|KmM#~1#9$K;6C3MG&`t>H{S5*xqlVg;|Y`L|2!Uv0I#>QdmhsNOGKIJ7XEo;4hDbnQ}_ zaMq=#ncJH)edn#8yti88EK&T~pVKg~hTAq!;8LpMI?Wz^7jq>RXu8IBR>>}7n@b-- z1G1a5uga^}iE#}#K2krHR2uf5ILHMb{#HpY{X8CiIT0p_)g!jrh=a+fhMT`k;A&x6 zW_I^k$I^xD0JHn=mPWZVDMbA!$l88ygOkoxW*l)z$azNA&bg#Z4Bf;9JpB1s8rj_+ ztSRO^px##!I&jFm$1pBR$6eEQDand3i6%@)QL6--&kTD$}PFl$aS;t zH0?yWK-J<~?QVzP_?@9taor-*c*0PYX$Sk}!3u_~hbQw_zEO^UE~0-98I(iPh-f`m z5(s_TW_X#4Ol2udUct(Us&4HV*_&UnyKmxb(;TPs`WN#vA5}d?bViia^fqujs?ikM zv1qgZ#>t>J(`ig4&a+G~_Lo;JB8Ni@!g_jqg2kQkHhN|XyZ`iReh{*GCFYHorCnC$ zUmE7xnmf-~osikk>kzAPpI)N*w^K~O_X5D?pMZCDlU#YfM6iFBL7{^yWpVcmpgowUqF>&0CqHSQ2 z#GqrTh$^RGs^Iyb2}}sUX9HYSe%_y!zL?Bl_8XAX4&1W*ix};s^_e=CqN6j9N3>Yn zs>WH2jg8Zvcm?m?7cO)5mUEbpT-5B~@H-wkor z9*-YUySJLjo5)2QG9uMzVXA*TgaW3pkAX|1?m3nKp>+x>tjDGFW+)rH@4k7x-g21{|<5#XyJ^A%d1wZ)t(CZk1*MdFW9ltBiDL|)rt9Sf1zFt4J!y7HxryP218 ziP|GxgwzS?u4g)IxkVo*2XHpLJ?Ru#VkvJT!)}!0v#`5cpYq3G)?(W^IY7*FPGpJZ{?txmN-M7#xfrsrm+)CZx`F02Y+96y6&t3uGZUXyW;NtDx;5{*A5q4`j68b4eck#; zy!*BZ#Hqqca(GXv$iCF@)KgdDP}uam|3cIs_p^;7l7+6Jz0-%4V4b@k6UEYdhxI%U z^LPHhFN1F}Zb&FYFIlhlM4}|FT3coi;5k%&G$E8>dCBDzN6mv>OQpX3Xd`-*NDpZp zY`wiBLgeup_d8WFQPYkxN4(hPyC$0qR-Uk#wEHTh7*#tl>FD`Ui>+S4rpuvt-k8`J z-qN%#f8!;WF$#~=bkV^X>OxoHmojjv{4<#?h6{sy48kYI$D-BE96SVlb{QVgb)Mlt zDMsFVes!TrPOIlehYLb`WEo#CH`SagHdadFp}bhWwCf|A2{#I9s5oWP55D02V~P%N zNVw%!WOeV}oZfX2p-!2==An$B9ufP8qgrgn14pw;%JgHY9G&VtR(tPNp4`37!%F>% zHGci&V(ne`KC!uhFgtv6|F3<08=-kvBe(c}95&W`5qib0Epg;@q~ zFlWE~5cP8JracG0HB{qmge&Mso1W~Pz4m2h(Tk~qIm<1G?WLTGO%#|1rdge8&R}~_ zQ22I!QEAqLS6oIlBhQC**VG>wUV31~WFz*BjNcids)#SOSA=JmW_90oh5M@#LUkmk zh2tLY@L*X#b-XzmI}5R~{f^it^I3|P;Fv>cKb8FIhWxlWGv?;LZ+>>-+DC7~FyA1I zSWoT&P$*?PcNkv3R=BU|4X$8pe30Co!lgV=T*R4xbR`{DT@* z*Rk*3Z`1D3aP{WXC|mv4lTGR(Z3R`ofLB>>pY3Ojo?MZ*gS`{Kf0(VMn~MK?L=ZmX z^B+b=WH(Y|LH$s5z+QML6fzlm3x)^LJ*|7yM8jv@qheyHc^!j*xoviENZp)TUr%jK zQDK2vEpDjxE&ZH(MW22P#OmGOH78gVbbacpQ2sexd?N7g&w-0kMK~pGx)UhtIhFDQ zBm65?3^5-4)Swy}Wp#NaT)A>~pend3Zn4C8p1}fVdwV;?d{gv}ssda3VM5r});2{y zOmiw!A7EuaG}+~pmv`s1qDjbqtc(FRJ@DX*qJbrVcGcDXu=c8u52^d&uU=hJXm@VI z@gHvhB%SH;NIn>P6uP-}%Hb1>A&F<0D1CcZ#j~Lh`|{WSs}Ld*{;+c!NA^;ZG2zZ` zI+s5*xbGZ#y+P&pk0S~3@#0?I-UE9`7#i|!Wdz`KU>)he05)ncoQ5{|R`k&UJ7@&K z8Zc%>J0FGD0D`~=wCVM#t>NJhk?r!uZTOK-VlYBR3M-2rB0c04!9jVE0-{=6zDqj>c_@Yz_COV3spgdS_7dg;HIGCfK5QI(g8^5f9&n$WehwxMzvf!WIS_rs>3qc zu7Dqa!f^5~LMG@dHtpY^Kl@{(m=0hZ9U~NNk;ey?@0hA3eN&SlEmyu4I=uQ0m|U0) zZ#!3YV<5|3Bgu%y!{Ali{rzwVb)&c6T)t@Os!vmznMIF@E$P|S)lLKtGD1jvAl8k- ze?wvzOs#lJY(zqO`r0VJzB72zijAHpJXo}pX-8iE z&6`XFn$`#c%v~&LZ7~?ms;5gE8V<8!5|BkBr1vVG%{+wRfdi{; z6!>A5dMzdXQYc+A>xr%?h+op zfduE`%`GDlJEY#I;?5FcjeQZTU(vQJ-f`dh)<{a$4^>jL3?bKE&|6bM7PrO=To<^I z{$bugbwsMfZ;;x0O@AfPcdzEb-32AvT&;>?zhd=ElglDc=$U%N$U=Q?k znA6%mD3t>6NGe~0(=R$23=Z`&nlPX~|aGL>xWD_GO_oc3@rhZB2@ZWHJl zAxpX zb2>9VKE9Wx=ujPUXBIw(zW!@&Y5gRF23oNxuo^xT(!cEtXmDE>iyEIl z%5kx$7A-pqIN??Z%}$?{F;`NDDhFld$`KcKRKheQ6&1IF1}hYt8R*8&_V+t6 z8IshKl3H5!S9Jr;INAfpDxlzl3DO9C8c7n=K7mq6fKZAH`GQ#^_MSis^`KJ6ix=Xg zC_DU@;{5!4BS@$e%*@R6T=FtA4M8om4U7yF0RdUjuu&EF(wBNI=ma-Kzo%2ajY2|V z;@b()ngK*<2YW+*eSdzU5$jl#W1#>nZuzg-#|C;A*QZ-dfJ=ycwk*2>I%qT+F^lzD zP-r>}PCrgmq^3e^DnVEg^~7^hK8Z$?1I$ehUN`o_1%7ffVe0TV_MCQk(WCDuoGWxJGXrSS757K-T})tsJJNeJDcF>H zwZdsVVj#+NX!Rm8to*-*dw;}=$9kdbOUk4lvOg7@Z~jkkB{|9IdTfGi`;HxmQz*j1 zXW?Z%K!QP*F;Ss5-;&5`ni!v)%tmM*|Dg_|N()vtHVJ8I){6ds#ZWWpFJ^Qeo&^np z1bVMSYebjnjT5)+3W6=-WApi!zbYaq*8A3=ev4Ik9E_+#-MbD&Kk=-`V(kx#h3t|N z9+Mmjkg){0hScsYbq66R%(NBQT25`d!>A{6Busb+7$ji&u9-kRF z4L(Rxq~n#Ivt;%fa#Pi%`1*C$pem7yfuNPVe0)pPiwJ=>3}i7XL)MmTEk( z;!##?J_F7}5Q;NVot-xQ{*OgwK^SP|?0o#m7g5pjmPMs*j}7|7oel*(-R@GKIjduUkiwCbl$a4E-Y$o zmy1y;1Q=o*3|g*76!r9~OAE}6J3k{wXqH2h3FKOKL*r_;BjaZj;wXeG0t(4oOHViC zj3U6DfESRGlw=OA`I%UpN~4{QiD8G45VeS!vT%Q5Vy|z0q#Ad+tYVhUc2Sm-TSRh( z1buyV0TD3X8#@Uz2J=SzrAtvj!4Bb+`T6+~ZF?I4Fo3DWkhtny6~1I3m5`hq4ML{O z{QTyH2MD-Gu9KxAx+K5r(93-p^!e}LqZXqChK(}eRaq7@JYX@TVh`y>Q&M8>U61pl zz-a`>i?VtA<;yWBkHdmFF?@~iP2+G@Lx@jKJ@KRhLqJM;w@Yas_)D5xXUV0XzWeu7 z`)Kg%Wtg0qaVHg6+>k0>V`Kd`SO5aD65j#cL-x_J`CZx;*g*Ru(>fjT?~E(#{~_~g a{k16RUZ=-jh7NZLd`vf47#C69PW}sF!V0f`mw?gf!AfDQ)lu{|B5s*}* zK~nOaYjfV;^NjBs@B7F1=Xb{NoP+GW?-g^+d0p4M)_!!)W9(O z01U$$B_)Qx5&kwb3I7pyRnl?QbhLE!FuiMmT{m@gvU7B`yJL3B-Qupx9Y+TtK0yIK zQJz!QuC7il;{5#f|MLny$GcYiAN1YY;Zw+*F6+8r*b!6o2QNz|^A3h(-MOMDr{$SA zH|*h~`E!qCspCo~$MC(krRlBb(6a z9dQ8e11Je((c2wD{{QrL=d;+gD_1`7z8&h6yFTs7sg~UEgjldH6V6||bF@D6$cF3VcRyx&a=eP?$MfH>>s?EcX2Ndh>4nc0 z)H3_|`FZZ|ZY=+dH)cC^s;-{buJ%635qT=Yx&EU1uItX#@B!w%xw^jxN;r3Rcd6;< z9+Dm781>)ZPEqJEuJx00sGh&LI^9LGx3`C0vrsW{cyek=VkpF-C2ox0|1X92#=;>v zDyMHsG?klup4vu6%toGLM0100J8}k1d?fpNdSE_AbxlzW9r_H_uWlLpo(MW+GFlf{ zJn!b_h6X2S@#T0!Vys_Vh`L`p{fdnM-*;+5o@KRC+ zR+jnem)pfM+s+G~&D8I^($m*xK3Hz=xiy_xWby%DO(e5R=e@V^5C8pLJ_f?xzCMNY zyCZ$4#Kh><*4CDmmz@WtB{W=Ig#32bZt)nsqs0ncrc||V-I{ni^dJMb8w@u0HZBB5V zJ6E5pTk^Hq!!c3Jg?RD(2D9JRG$*XYk)om^_rayHP!_X|g<)DjGeVWCSBKX!bte}V zbBo@+lYF;19&s504PrxLQh4Ya@x(-BY3XtPnoTy$_44R$t5C=2L(XKUZNHAp1jiaW zmtU=cdU@{Cf2Z1$>eG}Wcz#f(z#}(C{AD< z7|__*hy{H8dZXjwgQ1M_^77#U?x;%>8NI*Bf=1VU#pN@*QnRx^PxtLj#K@pUq7$;j zu<-D3Omtl;J16IUZEbCr{n~?`oLlDKOYbD*pKGD+%2bO#UqMW9l1KK^CE_36v%J-K zqxz2TgPkXu8-i)u@?SXMR|Gr5#66KemE5N-V#nDd`m7CiC&}ar*l+egd}OB#VgWk_jcVq)s_6CnpJ7 zVxX*eEJ1Pjp`8Z%mfDH&<@D z_~ZSvi}v(E_wQ5F(uQ4m%rVv>v;V|zACd`*Vr=R6s~9-HId^U<4<b zoRY!<30T0kmuG?^e2R2)Iy2?9ePv~3#eDf7)v;p)WqMI~1jM6V zYBFwB_U)V7Bm2>lleahj{xuzMcvyUYK2G1^!}enB5!qz7nMC73Jx)9240 zp58KgI67+5(9|^ECNlD5zN3@2PT!%DC@?VaQ(If(&0B`Sb#=1nJ+aEng0pP!ND%?c zR$8b@bsZf?=;-KlYJ4i&Htg#H@NVSvZ2p}}iT&rFpmPQ9I_H)T9XdoxM)qOYV}y6G z#-``&>{GiQikQ>lNoC6Hj~q!P+KYT7Y!{iZ}K>U7F|@kbP-$GVXb7B#80rqa80Sa$`1jsx|46?b@!w~9btx(c_I7K1 zKEiFb_jB^%r^*IhmGq-mu3R}BHIgV`8pN|HWJhu2Nb|Q>%1qwxVGt$0{Tdn?K`8%! z`0xR$blv?U(qSPX#G%Qy^Ah)Fy19q$w(yc3I;3>@GO5G+HSO-6p3lkK@OrbM3TO29 zCMKugf#;Z-8AKeO6mz`&;hp0;L|4;NUPI8^bViR}#8_QDFqFklr>gf12Z!0hP$d^P zchMkm{mP+c7fVZyw6rt~gY=cQ>ZzblIh71&-h69ynv|UU<8_f?LRtS65eU@P-`^Cr zen7sj&nhU0FYD{1m976f1##D07f6sk+w(%$*3Rc{+)XX5Q3yym7_7X4f}rCMNyw=M zhEwXA^K=9ice*%ujB6MjYj?{VN0cvJ`Vu5_AF3lg#<+Kw4i>q3=>F=+{vJ&lmr6>J za8y(j%HCSH4BbwA{P^+FvuD3oBTk$=Y1aBoFe@jgp5TaNKv)>5&vGNpGcgyAIDTWk z_jp(1cs`(C%;s0CZl$q^I$?uv9h;*>evo1nQ_1%Veg59%(J92Hr>D7ic^e^goL7IV zQ!-p^wP85SA|1Ir(aa9%I7Vun7P8FJax=?BS_I!es_*&?Tn@pEmY3>}OoT8v)Iptv zGjHvd5?$&6S*O3s?cA@{=l2^L6mMHvj@KWNX;7jue88?8jRFt~w{l0a)G{24%QV{e zp&YaF@K9=Isy_YELc#y;#+I29oC-i5O{j#vd2?2B^{c|+w{7bCdmFXtkgET#Oub4? z#an#0a+t4j@+hOECzYU}UQ_xtX){TM-7YJ_^7)3!1V#Jo<-LD)+YBnbyt1Us&W)&7bap%sRPV=i*uL|0K zztH+zh{XSIi!n;FNvfgGg{;h0ezjFVcurY56V~7vldZ0< zuII1kR#Z$(^W8O+lp^|?8P|&b(mmhWS?NGk02Oor0jk>Fb%x}73u0O~Z;mkDBD7Qana6fx;ntls-CMPF{ zF-pXXrj?hU1W1&Xo7(_5CM?B&kFLb3!+pl_BESH->(>vlv9lX`*GTml`%6JKDfC=6 z+kx!4*(|dc_~*~v9jF-&ebSnmnnm!Nxq%Xr{hcWpY9=P$q!bkNXte-oHeKa^PY9_I z;_`}l`qsu`OuUgdS%=gH<3iOeUnGkRGltUjv@nk^Nus)@=6yIU6TiQ`LWN4eu3zBE zlP9CiEdC9ZE>ny)_m`J*HL?+o8vk)`u@OpXlx=<`F?RcTYFyk&6buthQS~rkRwlI( zzir-`nHeX5VNe$FCEVxgP|UeZwT}%t)|PF*wfpgoZp3$kX`y;invjTSjAeh51i|Oq zR#xn6Y#&oM6pkwyc>R3Z|ISNjp>m3Ob#a8{+^zfz)kgH9-pp<^dD;;W{wy9t&LqT# z=qI~fD9FjNm-RIYai}6fc6KjN&bd|4JHqI<_jig7gFSzkCiVIA=ka%5E3Z>nynnM& z2W-OJE)TxR$!YBF))G8(MycDTER8FC9LgT|VOoK@Uo{Hy@(plO2kq}?=j2zZtgfw{ zP)m{KwQp%@Id(rNr~v}#@?d+i)H!^r{q?uC1H+l@^LaTrLV$?}FTw*B{rC01uS2wk zOG`_e>FPc&%_}NWr#y7%gd-sPJA;rM_?j9UAD%gLrr5XrxsWm?8Ck4RM{N0kxhFpq z>jqe{E0FF+`0%M9yiD&5?)+U*!YsEvmp+g*LTN5aPfs_r$2)~jUL8#K-}9ml@T@;1@+mpx`E%~GPgM{AdTKA@x%4tiBm4H?R9gZk2PbF3`@NM8 z8OL>T1gA^$($kf$UAuOiNRG<*-n{UgdtP3Zzo(`+w^eC>3_>QkwqAubKpb6J{|!78#fv~XfLjsb+$N3>;l(?TePQDC)t9ZUShJ^+%491>nw++X$5kQxEX4jR$ z_Czsud@2#U{!{kbb0r;$*m!%AghfY+sB;>r><*x;+k>?Lh1FAUd}_yoO=0m}jU|;G zDz=E=3UBvSZJL>#ZH$bh6qJ@WYVGLoxMM6b;!`s4w{6u$WM>#M+S78~t!Rj_;WPlj z{a?SHvKM+MV(h#2v~<3|$h^~P6>_u6U|+tWt-Ft?H(${dL}FZx8hNn%J2)G+r0ZLiONYH4xVTr1P#_51si z#=x!TCUDoxQYk*Z!y-RC*)KZ&h=hWexzDHkeHP+s*B=NWP_l11xA-4~gnX&WLj~#A zFH4_|VUNyoW52&dCr+H`9IoUFN8lm9Vw^lIE$p9vj!utsXKSkK>T=I_#g>cByvfOz zmzNJ(UHKHtE@;!u1w;P6;G&?QpsueU0R^>ec%D|u`@;G2=U=DXTm2mfDD)@*bm6P> zBQ70M#VNJh3lhIw#SQ#-*NQiIFJ3(P{@zAY$RP#;n=T+Ath==u*$oiVtl?}9wY%R(EBJ*RevZ?u|mzUVtbLRy8c03~#SUFS^!(hP&%B6j`OuDnw zb*fymOY?GbDS;LWIaEz^=-z6FnhT-^+xFh~H0>Pc*}S)3%6+%z-~Jw{mDyUG<-E6# zjuiIjhyN{^OvE>k0ug@V-0RPz?Q>T%yG+|&2&;PRYje-GJzma`^O!^><$ZOB}C0hdsz6bWpp~AKN-R z3&JawK69p=I(t@Sy~C+kkh0z5wSDx=aP?bUK!{#Y9k1D5P)L#X6#(>dnuBA!e9*z= z&Q55aI}G5;V-HF|Miw$Dl3qvCxX`#Q7l(moq;It8N_nq7+GPYSEb5YZT>B9+GC4@o z!@GAb(3d>EtfTX6z{MZr4@8E)T3;YNazq&hjI^BrK?{>DBihy^2^ys2)cB)P{URa3 zd8JLHh50S;uxy6U)MR92#et0hd%H3|n}q}SMl#h{F#G1bb17X=G9#e~P(lLZLp*=L zR=lbpNx~zoqM|{pu{D!Jee`G$Y$!0pD~7)_Qk2ci(guhmPoE})1+*N*K1q8h zx1Z6=J4r-DRP3Me!LZhESHSDfZ8R#G-PL0@Ha4#lRFs({+$lk^5+**(80NpXl2SbK z^y$;jMkAeq(_CGPK7chrl5kyc{#kadwj`t>Wb8eV_=`=^Oc4?b6$AjMp9xtVuKD|m zAu~ws>Ij}yqO@<7;p5u|V#znpK5b+*8W&CSZgD6|%8dM&I zl1H%zKw_-GfA<`OhQzgmi}OAgy0X`iq_zo#atThWDNr{J=V@T^&ysFfLb61%9vpl` z@DQ40aHx>0TB2v(2Ta{K(@#<#Is+&r^!IOTB!?kk3TSgA!kj6->vX@`lV(>-J*;R) z%J8=~RH+De0Qa7>@*1F@y%Uk{tDB(o>l0CLES<%Hjg( z4&_2z>t0*zMkh5W^cM!*VYziL^=rR*Q+C87~ zqpP#?R5%;txb$32t1`1ja_bB9xrWu++66O^?OkkS#>U2;37Qk5fI>tPGL~S%hjpM3 zsh}(li`)Y8*I$d1U2m`GIRA1v3*^?V&oC6wj7rH@!{PYUoO)ddJjFYAxY2PM2$+GX zh`;B*$0!yLvet`3Z)wlp4r|+`w{#Kzr;Qr~1ZXjQ?e7m;o9$!g<7=N21e%(`(`A-( z>A4il?+H<9rfO1@PNc8fP{zs>#hiGimVS%8mnswUQyy%EkImqfuHXcrX|(GpH=!hJfDj&83ZPC%Wk zJ|?%avQh+tKuXgV17f6AiPA~ZzT)n4{ekv_Uo z^hSfLs#Bojm3cPzL8_yqzd+g&$6H@)(LzN-!&#lh^;E^+-h3ILx8TOd_nk$u*zH}q zK`xIO8Zwxkf-G+Wpt1O6-3|UPtCN=B4+qP$4lDL=wek#<+b7uNYO1Op_S+mIx%T{g z;{wx&aGwD|y2e9TwQqAPR7yql5uH_!L4H0(T5QfGB-E9Dh647?l2Pm~VT|+^*AtcW zxe$V{R!2jQuq>?dT+&38`mT#h(e2MiOGco9@d~JTz(mSp?VLvF9DMEWegILfGw0yq zY8)IK++Si9b9J~jWTM@3AL$s7`4WG`vlmz_8l-)*Mn!8tMu8?XePIObcP=peTmeBU~`uFD^b` zVjm*k65<18MA^^LY|NQdQa$A6rp!d7UyywN%8MlnKxM)Z9 z<4JrIJw_n(DPO%Bk;^Hc)1%USKNHFk6=KcXA|`bh6b%CS;Yk$9Na>wpNT@&(&~#_I z>QhCV0))2#bkd4ABsMyDnFC%Hb`?*$pn7A?e}BXO^3|&@(|qI51*roC#Tcs6p(Bz@ z*vI;Mkm`&_L4ZJ;yUGDuLGaf%*@OQggDejKu0(zDCh=caZu9?ATPtABgH2m zC~_DUpBt*|%58sCL<=tcP5>#u((mOXlY6M*h2Qnfn~d-zhMZ`%rV_!Gi~< zAXb54*H^jC=BoOqK+Q+G4CIa>zwLQTl25h;?*X@8M0mSmrClO6IXN1Z7O5n&y?KvS zjiO82)Is9?2+}xGDAGVj20`i~C>2Zn<}p`d1h2ItQBA;YMvMCyI$t-mwL`A*RT4gY z`0&%0FCUSVfijG_ER(R)s4PeyH?@h00~8F=2}eDXC1Ct)kk^sU2@`i(pVLLv9VSl0 z>@`W9`F(<0$g;H#wOv~HswojGg09UH)T39Rb%{ES26SzQgK+$I#4iF6`Dc|ZYzr%t zx<7Ep0V3N6pc+p3gaaPQBykezPjfU=JrLmgWww20n@eMd{Z)Z4Q^RVA-0^QhJ29Oe zFaE9oil4%(Z=(R|-M1F$g)AvSgcSfq7v&WyLOh^a7NM?=V)yt2-GF<&dh>?-7{@hP zX#J3rlNY*9YcK*J!{x)0KQKd;DQkuD`HTdIk03cZq0SCSN5xeZ&>g4%HU`}kD5M&g zmVrSeAph)u)dyt9TEA!{O5HmNq^ZFo-Y^7G(U{Es-y;t1mM>N7wyk@@I152j28uuc ziID5;*^eYFJ^>Ilci6N5?;*YRn=%V(R^s8|(F)r<=~*r(Mk){ziBOWMSna%81PmF< zNUvS%(yJKh@SiR5sL6^%tw+#Ffl%0iI=u-R10pFo_Wc#kuNo%?s3!UK%PD=gR^>cB zMFDbJyX2YzE`1ai7oyDUMF{;H(!+-lwGqF~lf-+=z?ldkO(5*}&!0cPtftls#kDVg zIemeOj_%HKg~;ulNdI_f5rGN`IT=O0&E>q56_7xz5KA(?6^Ckv_<#04*0 z(CW6nFb4y?Kn+B)AxMvPHwv@uWr?HA^WYjm{t=1(a3 zl0Aa3y%`x9c^s%^W42~yI0RqGCiBBIu4G9sjkDhhy;dwE3iNe!xY|=>jIXJv9QF0_ zY3T3QaqfJ5Wt=m`XIyJ=PSB74htJA&L2m9VS~)#PwP^byP+D3V24$g^_QZ)Z> z713OsB6&tgXK0edgoI%r*t^J3h=41h1>tchZ<_S&Zuu;lKWJ~+Q#PS{#IXY zH~}#wRgX;>?a7lImbY(*q3+9o^PNSP^E-KQd(Y!e7QHP%4Gcs<8ZRdJEyR6Mv>PZ^ zLX1;MMn(qdSUz(_&9Be%H8p3JZE%Q*=_U%AhcxzG+?z|3ihs5RQsOz)WJ%q~hYzWN zzMu!8A9p~HZDqFQN;lo{04Y!3w}XjS&UDtLWOY~k?qrxJ=*|y8bT$J@SONXQqMDi? zseN}>C2Y5z=Ns1=z5Da!Tq6|vZ+(5YB4c7`94p3%m!Th6Jhcnh!pz&d3JHZH7WyN2 zdD`cs0dAY!zWw_5;v&B)WIai@nJ0}Mg{7rh28AYp&mKKG1xO+svLsSjZ=b2-Df`g7 z{LXDwTM$YfY~a)PGQI6m5WdRMj0i3A5EB!JLw9pL$>Rt2)4PJw{$ehB&a2OaePvLh z6BHHIK>`*k=g!cpm=})`|Fi2JoNjIc5MMjSx657h<5%sg`#kU8LQ|d^#aU;kM#8r6 zlMKBIr_|ExUv0|FfcPS`A2BSd$aIB@ur)=71(JSpW|RGOxQLqb5MT%b&pNk%&+IdC%0fC1` z05uDXztOFQC62RdB6XS|;#-z4EI>Y72G%Y1E(XjZ@qP3h@QjWxpFhiis*#y6Mm>BH zNP6Eiom8O~@ab#pBnP~(wGdEXfX5XC>W)3Ih&T1lkcgP{^MEon4$_^Gw`r#>-KHn%XOlE8P0d zoj~oJHCN3~hk#RPoxS;%dBRF9K9|6=26}3&z;B@%CKAdkc3**J0;`M+3%Grf9mp$V zYini3$Hzx!XKmLdjB2ZI5Q91HldgEuVNi;LHL?|bd?bB*eV+@@Oe(4?YHA(}kBD%F zFj-v~t{yyjuRIvJ;@tEBz5@@vtfCSGErHAg1&8seDH0YImfIELSZv12OsiHC0t=3VO&N!9+5-?EdJUGbMaJ{1zId zqk!Q&X2eiy3$zjOETu_qoK>$%RQ-5rj}2-kLTSxRW?*LiA(zLdW=2y;(>bj;*O4u& z?sh*jL0(bO=nddegs$N@f>%wzVMr7(@-eK?dEyfI)R1fg|9}h$y|EJAKHIBTDG)aV z=^TcN59MDJdf=g`UF98lhsH@oh5RJ1-h)Ah>O&CjfY3+~qtG2>_TO>-FKbz{@47Ci z5gwC+a;j$7@8IYYS`$u2CwiH<|F~*`(=h2|UOW4K;u-*C)5h=hT6G+BF*+gZd z?X9tgH@KNvTDp6C$5EReIy=#Tn(?qW9^D`yGAB@h763aLRtNpi2k@tR#t)f;kT8PO zN<0kN#k32I>^If`SVbd|JwSSJW21JTiQ%H1EXc@s7$`E4tNqZvlKk_D9ksKUVJXB` zH`JTWkw>%pGn97vwzf9lKDniB&CMjxE@?nwE#?IC)ZTzb#lw#u^E(qP`*qV@nW!Hd7Z>MHJ^A9T8SW7(=~|;OS>}88 z9u%acPTSIW&)UdO63UU&zqKC>K>`W#tYxSsg@cm^wS+u@DM-BLourS(1N-RtG>x;8 ze>&)Zq%5tiHG*9I+8zcBtsBnHt}dqU%3$#TuJHOHJNB~2rU-U7^ShZ1P^QbOs!d>4 ziW;jAeFY*kP#~K=>Cd~*(1*_4#>dA;ENUEZxe_2}Z)mNcLJDWM0(35f#Y5L!z-VZ` zJl^=bCx-@l8leD{X`p*fMfl-o`P#xT11PaLhX8C+u3i~A&{S71|87B?+tp1T#S|zY z2oiu-&>WUi2XHcQ7semWD8WPZM#?eRauCCU`IDz>Oy8|WS|tdMw8-lQT+*`jS)f&l z?@9i5OF=E$r#Fs){Z?u?tQ|ZC|WmgwTRj}Rp@9$NBZoB-ah5ufa*u2?n$$ZwVhtpyjPhCmzW|S%f7oI5n@C|O)YjU^~NYPUz@|I*#+F^bdW~@R$O6!Z&zyg z!i-5P=FRH<Z&mu%LIx27A~jqyvJiyx z3zz9bGmlWN4(fyC@bSACDnz)X_cGr0+l=HeA=Fg)mU#Z=mvZ+IFIe1L9muOHQbr0V>|(`Nh!>_mP15dKbW`A+kC86?9GG{2PLT z)HY@G3ZaJ^!EN+!B=&;d>@vK091J7qJqx%x1k}P-F!%o$Ypr+ z=us*vssQy@mrMa+6i@j>)j*jL-B0~WvH7dt2H-bsl8HBXU+ndmqe*`WZgjDk0{`d&{6N4NTpyl<~ z_(~wEIRq?3)MJ4WdkN|hN`N{#wC)Q-4?#yb3Hw(cL`3nr&j%{u5+u5DFe?G)!wCwl ziDJh9`4VDa0Vco-K>z_m?*u3ckCUr`&!Z#)DF>$sf=JH@dsVr*O+<0h1OyLsM4U(K z@Q~Rg-q8Jd>Eit0+X;|Eo8fV&vKxSN3h569+mFbwQ(^6l&Zjk@GX~i}OIsV%^M3p7 zVfU)zR#$D(Qydw4>g~5V*wQY75l{5qIuDd^C^t9yx`S4ydc9xsDjU5Z_cwm35Hnn2 zbp*PmkDfn&2sXumMI>w6fd>Pt5+<9|gM@12MnP>yl)X_! zx_kF7>=lL~b1>3`Cx8BA1+WBdCg{XTCwtFuPnTVaW_(D^erXh(-O3P;2vK+}8c{Nd zcjrn$Ru0j~Cd5#4_6FB+v=FgLNm0=5VUv(xSYBT4T;Whlj)d->8Az}d%S{X@-2(8t zQkw#}4>euF!^tBSW2dcbb6@3;B$44xQBo^lN`X6_r;%h?V%hfbpEFl)7#Z>H`a!>6 zV~GTV9un;Z>qo0405X6IAZsm#LIvLTdh`ee0ssa0j6j1&T8WpJ7fSxQOd$Lwn**;sRjuQCMGXe7lG+ELz+)tkD?b=Der(c4Eh=tw-@Oj5??G8rw+on z2tRaPXJwnYxa0Fg^iQW&%=_|J0GyC_+HSb&1nL#xs@U#o7Ya+PuDMxx-BXT=$DwPw zy+hPRzrava6G^4R)gCQDiWn&+r6m9SY&0$AojN{4{BE_&lya6jZ`W zut&Z3i&LaXzeC~%YI!0(y7F%N)$JMRM@xfO9?GnWZXK_FxoqlQ8XH)Se%NeNe#rX(fj!+l`-rhR;QiO+ zxw{u^mF#5nLtZqUR1<;x@~#!2eUbGywz=7LGD;ab~z+MgQID=;h35U)NtgP!NVX@msVElA>Z5pU=6BB zs8Ka|dMc(f5(GRJ43OynodzV7g9qcP`5ppSU@sR03NV?V zz(ewAr#X5Ak3j_$D6RXUa`%Z*OCKpj-@wg|1*oKdl-XN8;1A1=ZK1TBGJ%c zCgF(RML)Nf+w#ln_*cs490o6sYVkU$`Jd zfG)8t)MH;Gxb<@?M9M{PX`<^ZpH3177_};=@{@FZg2JgDY9<$%ADWVF4f8XT|C2r(w#J?CJBaulvM$tEV%xLikK9;UbJUSkD!~j)9KnZwfXF zgZu9_k10_D0s#S0-r72AVC&;+*sch=^8t)7mTqUjTX?eM_}R8#w|y$De;)mkTYphL zp^ULvR1P!4rB0f0lrnVbIghykQHffXwf0$F~`-yvUT5HNAOkgoChWnk4IRs3bM zJO%7q+5{~soOfAZR@XiwBtY$kq5o%%!n=(qOW05--b)(WJc6j1kdxr)hrg*h4ll6L z*th1=W0z&hd47zVI+j5YTh$O-n@K9e{Ex!61aprY*m^gKHXB6rSC|Wg*&-) zYh@Wp;9zK>05$6LnKNUcR9sR}sQ0qTvv+3o`}bMD&*P0kTc^-zOkQ$-X9b!gM?xs+ zf;qW5!=5e^(qX;YpHKCxh{lTGk$>;oSObkK; zRy6x6VE_=j{`dDppaSsNYjPcbCh2t^QTJsr^QOoTzyZ?(7NIqbYEOn*il5Q;6;-yGh|nsi`a-BBj@1G9x&cn6>V+x_Sf2~W z$yfR}p@Y}(F_@e8DrJSGqxGo*%W1Yj470DoLAXnfx@x~2>LcD*B+rM{25 zfHSByNHSf6OxE~*$5Akcg0?VF8{NS7KW`<)C}{W^K*2-3d*CZaQDi{B5LuJ(F|Z3n zJa|BeyS?Moc;eP5+C#|j2#Ae=RN@JV2|B*FE(gBMq~iz_I0(XS5Pncwx}t&>*-xRL zAmIA@2Bg(XFKGWsO5$zVlv-&Of*u6WEK2B=QJ**wd@jENA9{^%GWQ6DLY~T&8XWBS zWvaY2kADoI2MyN;HZo_djJ2*>B zfpC5`@17L+-$y&LBmf?i#V!CZNhCr75AG%Atk$@FfILRa8HgUHj2?!gGY9`mh+|}F zhwC8mlsygn=2dY9jqfS?ppf$dk^3k*lhA0n3AoPVqf@_O3LjB%@N7bqRvLBd(0lOU zRYkp>ME1ea~Z0m$=b9A9NQf&=t-IpIyM0u(x&_~OKY@XkpNlxH;t*7x;d z58jOOTZVNThCfG)L@wMDBMD@CanJWa&woPxit=ULf4$l+V*l$$qCgbK*U@n0s7%7} z{ym$)uv*6mcV#dWd|dW}pW}tD5t<(4LiE#Pd|6d|R(O;MeE3gF!@7eduZCr?k=G}F zjG@Y+;c-?{r zS@|O{3p#w+xIi=k-0Bd1us%a|FiNzxi5JKo92{}n41!IMk{%2W{gM$Eg|XErx|+81d#bCf)`EWu^K~@$bVbnA9pJC?GeN2NbO2{ieaX^8#ox4u%|< z4`duCAW`Gstsufs8w+lBosiyL)kH)rYzoVoF zt9#GwA>SxGZ0^TsDSHLbW< zV=MmgeG7tv-TlY}8>g%I{r-Iq!ed%@ZV?ndqmOfK!# z!BL=KoB|t++0qoZ2CFsiK_ZFaqsH*-pza_K1-h&Qq7N6k_JOEDNf2C~iGVhrhWF0< zjUN%vZj}VrgsAg4Au?3Kr4-Y>oxjOj$686LULOfNIL7gO--swNu;yF>*CcA@A3uH^ z39ULMmQfc3PXoszSJHo%@Z`w@=x~n%G-(1&0fh~8hpB*IRzQu2pftyHNnfBNlHNiX zfZ?vk=@izDFJ5#QvM#p=m3$MJyhO##dG@D)|0bc=Ev|STH~+0HxC2h;#EUB7U0h}{ z!6ov$LMU^3@|yf70VM_nzeKsC4W%Z8K@ZZ1P--4!Wc0zDj{TdO(nnD)$7O&6urO(f z%T1~FM@j$dq~hZWMQj2}I6MRja0C_rcSq3i6J_>zRS3e*-4^IX+Z)zJ;+`jk($=nq z)7Jjf!MUY@2j=Hdm4-4PoM%Xk4*0*g6CH4KI$AU@5qQ-#D#BN~bvR-A6Z#l2rCH7f zaa;;L$gYLBvN#lj61I=*!3z!1^Mb#roD%;}hI@93^hT+6(69dt;2JBb^>r*Q_&;k9 ziA%WM)RN4YAH)i}o4L9IM)MGmj3ViTh{58{Z1;SMn=sX$*TfwUA>ag0Nz9I_Gfaf>{iHe|_dTx$|!ZqQJ^LzyC;jQ=xO5V$o91Z~M9}=T6bTfS7yKO-u`2T_t zd<3QbkKKgiwQTDa41bH~C4SHvCnQ8_vAPMmIuPTboraI$oa&H~Ff8g4*rB}DF1Ae^ zzjXBjUM>-iyNoSUj;6;Og(G7rx+blfC>jXlod83(Z9o+6xY%AF$r*?xk|7YVfL@Nq z45S`Go^#r{vLGK|CU60{Vl0F~a!6e5*vktu1P&mt^`C@onK>(&j0g8Z8bJrQD&GZD z&8n>JyNFY=7l|JoEG0>d71jqjMq7f&Rhe2y&$QyMT*x=^BtHILp+z$1OE+OE1;KyM zsEtevNdU4xfx3xd$T7aZJF<`b(TLohMQYs}K8KIM!P_NITnYh}g$2O%FVinixUm4x{kVnt z4e2;AhVucamjjD23X1_&8$=pVYC)HQfD&LBc;959g%+wmr7LfH`0EYwkpIkd1SC^1 za-+M2U_4_5tq;SHWuC8k?);Ir?Io>30l28l+J$wS1Jw^*_AZ%6odT(^^oy_Uq6ib* za05+SU-869F;HIMs)Q7@Cde^b0TT?wVKPvO$Kt$B{)^xtKH+`^Ir30};|?()Oy`}a zAfR=G;a{hxdsPG{o9hvWJeEZVnn_Zccqx#CSr%+5Aa9{&2yz&KSOH3LBV1S{wnuBnN1yrQI& z&8B#EfYkz<3F`czvj9e}dJsuL=mQ5Wy8DIB4B-3n%!Da*`>P+O_gip8;4Vo_3@#qQ z&l`ZQkAZ6(u8$eL_x7r=FggHVGZ-@&M#E=dy|Mw4<)giQU;;D0e|hqD2?iXXCkfBf z*if}cfkKQ>_lMd1Yt7i*qofFY+*ihzouFjV?St?>NTj}!(qs>E0fd+yq+E95fTRHV z7euYNFT(f^nTSS6|9^uwVoFQ`?_W9#^fuyo9^uCW)PQ~ySgu=#d%ijjx`)Vr&0A(U z*adyapq8K-9Do&(84Wz#F@*yR?y*REVR8sgr{7-4{RVnC+;8qFI^-Igde}th0JsqY zC@}zsKOCSiXfrY2Tc7)YBt&qE9Y1kmz{d!LOXTnZooi3BtlQc0vFt(F7D3Dz`Mp4q zIR+Pc&%r4}P9`wkvVdYXL&R&L@agJSr$tW zacrvHTjK|zwqdMK7vdf6(7g5soBq7b+gDExXpckp8GKo*oA5OaAZ9m#MhXt6>7|uq zpFdYXS611F@E0$9YX4m3-z|Cl$CY4+_5a0ah41Y^%7kz1cnHi+{(FXPTYk3IS!9ey zPR9<-oE~UuIyk&{>(r}*%gDH@nzj3XMk)gn+t5maYmR0h?se9J&uSdHl1F@ud6Ac>n{Dd|Kh#JK)}KGkB$zK{Q9cPqY)533R|OU%p&7-WY-Ut>EYzSb=Lp zcy)HwoOEC6Eo^JVC28OZBRIN7c35G>*iQL8Eq4Gv51lqohLkkI#pQR5m5F*(;uE%GkGz>U-LBM_O=Bf&dMh$pQQ7lbI(2I9zsul6@AxN%4Y094Y0m^Z@M z0f_mIMs+MwBS88fxd6YJJN^1V0SwDZLg?fJ7t!w=vjcj0ABHqGcluFor{0dEKl9m{ z7vjab2t|=0gcWl7Wr>AJdE!|mHw6m#kK0b!?UiVNaalQLSpSuZ??gjsE<4L?I=j!| z?<8PkKPkn;SpJ8s_FthrAJCppWG5MmR*V#0utvF@@SO^~Xe-rEk19Gj0#@}6lw~nT zu^S_x94w!viY5UjKT}1ExgYh*!!uDRd2Cq6^bO5+rW;-^@tALv7`Gj0x#vz_m`bJS zfA)zP0BIT@tXRB|>sYx3mXt&^_lp?zT|KiID|{>fVi*pueaF3|ds*l<)^Q8R5Zw%6 z>kag23QbSq>?LeAAWkMdXlia04B#O|Ad>@)#U>?s5#Msz3Ldin1C1AI9q;)U0zN=L z>Qht%&;UyJ#TI+!O(p9vJihPvdZcHt`-(>mJ`qpE=gpd5i4Mv^ig=O9GL3~ zWdcB`Xd5KRsMG8yO_W>vOui~Z7)l{9AKrWXr7EHP&LQ)E^-Kd$qFcE2ssr4_zoqNe z!}587pkrMD&sYCv%@lSrdLEMA z;U%cY>Ck0}?OT1q;F_vFg-)|86G|FkFaui#g8uU3iK>M3FUpvj*h=++r{hJYjQdO1|uXIK~n32!~GbBl_G?M)*`xfj-h4YRYc`3OR@AY2EylOUg3 z2a_*?8{nbq>bGu%g0=oZXsCylkf9mrC}oMC!kjD0sMG^pXYlLb9xGJ-hK7bu<;@KZ zFJHc_OZ7#!nxNAYPZXO07BcmlH_vSMeWN6kT;Vg>Xv25U&?%L(yu!QwQ>!BH(3GZ} z1;M&yyh5hA4D@i^)X-=Oz*LCdf<%2buY zB*B2Gmfo&((SL3Q?|ku&DX%|K%XIGJw;h{Ao;SGO|--%8;Pn9C}uSz{Yc)0#SG>|2I-yiUu7tsK1>Cm@z!EM6%8t}4Rf~zKT#>zr1 zm`!2}V8}3Dw)h@)uqYt)A6MH^qZz(si3nMbfI`uLH=$y!S04{U?#IV!(%e#e$cuzr zPH3k=+5$5a9tb!$G$#y)gAnrdUkIZ#@o;y=b-5 zY-xvaz{X;IZUB9W2Hf5Y1?LGgsE@-~V>v?Ly9*Rjts^{S;F{$HVd1f=+5E@zQvk}F zp&jQWo#3FUkKb{FRBu04ED_IcSfNBawDSdrB1v}Yjea$X@L_AgiV32g%h9AxEj8uI zd`j-ROy~cp|0;6vQ@U>=S8h{&>)psjriZVc{NI*FKnBo>C&Tbe;5w`hm~R_L?1w7U z*Wv45ESp6>9s2&I?)Sc}?HMZltChRX#k53Eqi7Rpg96D|Pa3}x`cjbF{K4fZZI6)r zmuT^pr@CKzZ^!rGVbPPfsu;0_FFYieo!HGFG4^{Ed0>H6BsR>TsK~Lg`KDGZs++a= zBCZJ&A0RTgF+hrm8qOIS4$YU1=?JRWyeRys^)NGmja{^qlm&>n70Pi|;Xfz3?0AX# za?3{Sb4tJN4knwVr`6EG>2IVHA&&d|bZyhrn2-D{!<5^LC1{ zJ+8trmtXxB$mA{0*H!7O0usvDUX=Yg%5!cbxulTbZ_HVerhT!A%SI<`7|J$Ru00=E zv~#;MSPa8~zv%ZIsuYfTC>co9_+O|F@`&`rlfqMIak8irbq3cT;$j1%k}nXi~BE= z;aE01>6>Qm)NATh8%w#iHgXxk4ll=NF7b@`9uB@4?IAYU^_;W!(M0smTMC>rF+cM; zYKxgi=ESFNUX{LB_S-4;R%$=>Isw?GQ-mc(BdzvRhRCjFpDYmbI%Q(b6PF;I>f zPubvA$rH9V(e{em(Qo-&%C8xxX%u>m!m!5D-pQrodIW`e+2NnsX7xpE(JO2*Uvjb> znOfZUI3&ExPfEO%uU+@FH*c;za^3Ldb^@-6%y-$`Xprwl-Vuz#;CJzbWPhA!Ll(` zWuIbEcKuUeUs@fCoeBAM zc>X6VU>7nv8@!XZM83C;KhM8w>Z9>iM529w@1-}z1O@MV#e3R6xU%f(;xxA?mZMAT z)2$46#U`>&KFrmOKqF47upx(Fhpe%)Z+T3Om*7; zUJN^wue-}se_4HRV^_{};WNMY47_D52<_`Z00s&0gva(Ll-;Y2zc zC3y~gtfqF1Lgk-M8c;0XICh$~rS2T2Yq1_OQV@Be%E^|Q+y2u*`0o;4-}j!}o-t3k zvNmxC|3n^#Jk~pZGcqJfJZO8XilmcoHN9s#{&@ZikFuY|{nB6JFIm>XoYs%>>U4)@n(CAq_}TAX)y;lYgWjJcawO&^$x4yWhFHWz-or!- zBa3J6RKrqY+R`)V^P@Klx~m?>F$*7 zM!H!b-AMPMq(qPoX%IN~+V6M1>-}-A^Jlxa`v*KV=NMy-d(4yqF57wX*Iz414-}J! zKTVqM&kuBR9(>Ew7A=_DnCDxHWN4w+;O8&*tf6fI?ZnlLI2jN6C9*0 zx_BIQW#gWf>F8Z!S4wm}-2UBSDx%p-jx2yBk%Dte4x*(pA0+xd*}&ci^(!qRz!r-L zf4;?Jw7NU;R?xc?SJ}C7h?HRrT4+~ZF`sF!xjXhMJ0J25Z(32#4iwt=G(khvQwm)6s5K z5)g=~SS@*8Ijx}yo$7ZODSeSLNJ_HoRrcf+Px2#-pb~RG22V#XDQ~rK+0NXI7@EV8 zO>NuB5~4jWU0ENh+aPEo^pSA{o79|~On@8S{QrlR_UD+(SH^6d{|<`X3o*-NYWg_* zO$b>cyAQoa&&^$^PTsn9Ei#$xfEBz4h$NJ+JfyBipzp}BY41Bh7z5zoBCF0MuXv*0 zvt*cz`8y?Hu02~GgXpY$W?^?;$79%A3r+HLGTkRT9%goPr>oLHIpAOB_3xi_v2ynZ zPZ)gtRUIN}DhYM+`+6jHJEP(E&JHXQWu!$e+6qyK@AZpB?v`is1K-mR4{#vZ9_P^7 z737T^x*PJ0C(p{8eg^*>HZu_VioktTC;CAz(wiWHs9H!hj3*RLnBd8Sov#RQ7K3<4 z^pI>-S&4N|M#UxzsXEn<{Ju2dh-t!foMuL{} zj)mm#ifO@rZnC^2>6H^U`(bBc?cPg*Uxr%j$baUD=Orbn=zZkxn6wCpVn7`HTduNP z(!Uq@^oQGDDRod_W@HwBv=kHk*5vQN+N^!6jxkn8rHOsM_)X;(G2YEZ z`yoE^v+r!OBK5irFLG=|KSWSoZsnIsnzBr& zr2%FS2&laf*}r8qLL+}GuE9$qO3By-e!Qx77PwXe#oueUA2c@>mp|n@+zM}k;|hg#l0#FERRL`+Ta9>s z;6pN?&*y%f`{ze`SFw2f?oc7^duUb63>5bCVA6?N=cCcM`havI`C@(JGh%>F;Kl7; zkr@2ixpU#yK6cq7hjn-rM0*|Ta~YJ+x2YR5Pow@6+|2!`t2xMOoPJaMo^U=NsuGzX zy?q>vdhf2TP^s<2K$&zgW)`@NwWTra!&+h0H9lJB`p(OdP}W*C8K%<_N76|cG9(*f z`z_KcPF?4@3SMPZTOEBX8NLgQ9w&>Xw14|U6(J+v!;ix;;YPmK-A7Mo`KETIeyh*6 z3V(nW(5E>g3z)hrq-h_iR)@46jxrDC{bXG3b@*Wl=r_(xq`laFdKdJ4RW^Ul;yy*g z<$fhFdsnqpGF3@48Tq~smK4v^Kq*g4zN`@b6_u5itpDhXvJ}vs9zsw`IA8K zS=XVhW7)smaS+sZ=fpThr<6MWi}yjOLC|2%->c$h2GjE>05B4xr*wTAQ0bprpBz>> zv8J%ssrR*jCc)LuQ^%@hJs8V`s}0I;C98wkKc5{b@1}*Je3C@nFKxSf+?Adj%Z^oT z<6EqzNS+B@jt5Z%&N80wwPpX7*POc7jy&Cb7pg?Gzw_%4thtrK&^rNUrTJ$4XX^f0 z(PI3T`wvU_#TE?(GsqQSL88r3Q|5TzmIrDXd0_5`eMoPF6qV&I?s8Cso`Lsl`RrnS zDS?3p6Az%}AFNJZEGxDIupAfY>m#|pf-j^p2q1jk5xdd;1@klS=$XxH^qe>yi|foZ7N2D?|5=@ZDX_k~cCw z3b?l33k5V@6-bv$7C(1E(>xmMua-rtbUhWUmr#?eCjk6`d^*9TO-a_1>W2XQ;wh8< zZAM_gAQnuO2oSq}j%1=oDQ;|nGaq5I`Ssr=!e}e9p8G82!~*Ou;2pY{1gn3Os%z;$ zEMOhDIgvc8^CMH>DxF4(jIBiG`-c^Vzqp^X-X<@qlE!R}w0gf&S}aD$ZB$GxU~bII zuNvAr^#mwgyU4|2DqQ3FH`33=6?20&SeskzQd7cl6ZM>QvrGMRX`58l`3TmSLQRa( z4wH7bI!P{L{F$21D&I?mza?xca@8)Y{=O?!d%gg8fiBtbuLIN0f&r{-<_iD!v-8u= zQjr>=AJa^E?FLu`G(UynV$NO)t7t`wzE3jP-&Jj09M7y1jBLk*F=Fz6*gQ+24F+IV zi??I(W0aC7`lBE+KrvFbFCYU=FdwFp=~$+E*T3~pMKXg=_Rd3;<|hM7n};p@%R=6n zwVcI68Q-0DxfJ*oV$fskjH!E#{-9e{dj2HYWCjqDGlg@o7~$KlZu8hsWWgJZvD;^Y zqKAArs52FFY-8pI$7eQxWVva4_u*6!THcaV{$*jS7#Pt)7daSTdEa0iOo0oRXMEom zEfT%!`s-=^^P{c1sA6cV z=)F;{np-LzI?AZN=>CP1VIhPG6FPa_N^uBVSCq-_qfb<(79I@nJzp>tJYX&tm~Ig=U4=B z@Lk?bJ6P$hx0K}(cgP2;QN0;4;jN%$^9F6qZ{a9X$+iH-ynurcYPPfIoUiS+J@t?A zpixuw`0 z37sxysC~8F81IxW?~T1XTR+}kK;tY1koe>-oF+;!nq4=zk7P!)mwdBDlajeB6Xz|c z*K`@L?0z7w@|U4N&Yr8O2O2A!d}p%0oOuvdB*mk@XO761Oz>w;BCShwanb;~NxJ>q zWJP21*Ens$4sgOI64OU!Fsev$re-#G*JEkgN%6sYl>1spT`v#^VT0hxiMf;H1|LH% z=0sL4j)bF|By>O;d|5I;V4*t?=WH#7{_#pGiN6|6wR4gJIwv%zG27IQyAaNH0e)>R z)aOn_L--bHJ^DH9kf*Yy3}MmKg(wj_U=HPMYOFxEblY}Wg@nI#w0l-2U|E}vH1HwtF$%wF4=b8QQaC+FqX-8cS2um3xd zG`}%V8%*FS+2A@Z9#6ET_Q2IQ3Pn!Bs#h8bY8)WOOQy%OR`?pw<56ZOexV&XZ?T!HK4rl^eu#Y$ z>Dr?Czh+TFK&83`Q38ZFKYVT1o~WArD*RE8sT~VrOlP;*?G;z^t+GhRip%(~7vXj2 z2Kz#0`}oW(1!kqBi|+j0_V&LQpv4qxVM`bEcX&Kzuy5O>=)3?5#r>2d8UJ7nAFMl$Q*~)SF&A_)PS<9la^9QuZXZ7Sthf_%^CzxI^L zL|#0?C|UZ3dPo+_L}97W$~S4BH4!PzbBw}fZ04%C*jXVfv*P_dAS5nER+zg@yIm+t1u^TulW&8I8cq$qDzgloIdf zE$)1k<~|<02iM72OphE)weJ769mkYg>}ts{eEN|-=;p1$K$@U2=ghDUfjQ_n2P36o z>fT)7K}LMjph#?4erToD_4xT!bbQ@J1p^S6?mU=_vw{`LVbzPtDZRt(a}AhAh`r3` zV9$}v<`+`@p5Ck0s^*hjpH511>m`W`jEiO=X%a4e_Vc4#SH-FrITIU5*^^hpo zX}*0|uqcHeil|R0;$}Bw&}8oQ&CtfowxFZL(Y8QiXH~E&$Tb6OWc>}#_IlOsp8tU2VgSD~clUWhS(#mOi>7RG2#Ad@6yR^@Yvk}_6&`weDdJ>4f95I`3Hx{E z_R?M3twAn%qv4$5i-%%e<#BT>V#B?Ftt62iv4#Yk@C>~SxiMDK2wuF5ypzc*DMO(B z0NR_{`lW;I$4R9(=2Dv}4p%~|JB|f^3-B-^=JV5f$Udq&)3!2T? zOB0}GkPw}{%6IzO76i-6D4uv}NWVi7Pyzqk3L%7HA!t*U) zNyrdTQ)}*{tjl4|b!Fj95oILUpH-p#&7U@7U@ypC;~>;k#5lN4HZ#@aR}t4zkGnj6 z7c8G^ZQxnGUbuC=5-2BzWtoq#GYg<|&`G)~hPrF{zPfU#5m<}c2i*{#AKFYk`8hFZ%5 z?!8Xd3s+B6T3qJ<#Q3`MF~b#s54sbltrOZdjO^7?WE-_k`Ar@Gkr#Fk9*AF(Fc#vq z&7g?jz*C5m>A@6q#75~O1>E%2aDkojg)G1TPyU&g(a4x70fC|@gC zLvruy;O}T$gQ)8X>ph zc`7iu11U>&jMej(CO)sjeL?XU*iU!%aZLMpGfV^W|;wBcU7)I>3A`RN>J|`M&#sV#s)W}+vQpsIH;Tm zMYAva@lQzQw**g6fy{_d%hJ<>AI;hd2kq6fJ_m?L#Z@HfkRpjL=*+f%sG(_a5WA=_ zPrE2MkGFOp5-RyU301Y+e(*ZG#MVY!wOKU#ej{#dp2L~mw`~{Q3^QCdSI3lh;b(TW z+jg$64LbmzwT^bWP)ED>!HHv9z75!cFC=V^WbugKh0wJwYJO&hk6k$X5h;# zO1Ka2v5F+t5c74!IJ;`p*1Ej&{tlOOS&UV4PgQ8@3lE=MZ9WL=EpYXi-7(`xPno%v zmvs>PlO(NhG8{j+zL>*cN%eJL%LvYDtZW?7`~D2U4-_?f2F=c(h1?+Q67NKSLi&Id zM6}F=Xq9Ar*JxTEZ2VKDDn{tw07k)j5VzHYZtN@6^EQFUnZ?_+c;$u%gXR4kwlf%3 z$$UfXf&ZpA42KElOUdmQdYSY=(R117K*R7l#+Y6wp) zOp0wwwEw39j5zQ#mnmpXiOidU5D<^haia1%7w?G_X>?O9;S&V0w=T40I<5N9tk=?d zVaR?DeIEHW$V!#MVSm_^LYEjIf*JxMVdLyXa#)V9Q7vs&)eJBB#ORdL z?pNJ^xC7pp0d5g_Vg6FSRc&8Y=t|lU<_DBMqdh;Z-9P@P-Tbp8=b#pL%a+!({!Wso zHh`VH%w#O<9(Q%dcmRlodD*Ey&K+F|vlnn}Qzjxv*C_R=mT!d|O!{=$;R|nWSC3*h zzNb@>Uo(Kik2aMq?!|=~!(pGT$QQ^fRNf*KiN7;`{^@$nu>Ss`nn&!x#+uG3V2)o! zEAUzwV_ScA#3*?}dGzX{ajOc{D3L-;!FOb zis*`OV@#aqc`NQHZvwd;e423>C>UB%r(>yi0u&c?rCO0E{W!^-Ypa5mv(q-lvICVQ zOqG_7f~bz?r$s!`(^rcV{YBK3E?Bz5f{0=+fg+;^y9;>K*{BS^rc5um2effNl ztDsziLmUn<9ivOI)7FbI9lcq;rA0FY2#~kqwN)b>+G$m0CIi6P;BrYv@OP&ZqT@z# zsti>lat+r+9}WX{MF9LOn*mdiw+UZELeyhnpHN#MnkdY^PxCy;bS71FNjmD5! z?B4PmH=OJCiX34Puy%JOa$Q7?1Ggx$qqdu&?6Z`L3vGVDtXTqs&(EAtYn|tQk9QM_ zt@XT*t+b@#o1QcRJ(PEKOeURoW}29E7m+dmFI}3X7%S~7b|uJI|2Z0G9CY)XFqHRT z9=Nn3CA9QVBw4ZvFLF>$2AfpWQU1cgjVSpKS@4p7CQdMN8ZBxC>F4c>_+eNVTufi; z*uEip)`1li?q78Sf@XlcXk#fEiOHv*8U^^k@5K2$JH@yUT zs@7}JlTI|!mm_i^4+{PB%3?6u%GQ&5TAHVdHvJ@gbXrGS1Gu=muzW=^Ak$;!@hnSb6bJd$T~zK;8o#vOW<1hCR;> z58{1g6Zdw*TpT-&f-s8RmlC|LiKbrL(eJw@6FfIdckfS6ev7b1ziMf=OYA>CEe*e5SMs4aNa4W_F%06mu zO^45OU*C-|++4p4RC?eKgu@xM=bP=Y<)OB0#wzi8jlQzLn;$NDiSNROPI;I7pRwzx zB<`ta19gP+`Ic7H94|EW9shKA{vxjeaT>VgWK5GvXiFI%{lROqLD`wrH+`sGcT{EP zy0xyRg7Hb)8}AD~j4hi!vJ5_3J7@%Aan}Y+-D!_RceG_Vc3U&Au7enktbFZ8dUz8S ze*w+BPP+IuSAg>Xvns+AZAFtrtwS&@? zHxT&hyCDLWe|@Nh<7%E_Jhp+jh`!DCI4Y5)a+4R69-gJWccbp4XQ*X0PD~oJ>xqQ9 zts&TOz6#Fbw_WT%%VdT5vR=k;sNk7C$MmWjYHNe>wK|@RUWJ|*T3ME5u=M z)9YqYDOXCAbYi9JIKyQWKHoQn4*IxvQbPj_OZh5|D_Ev4L*HtTh($kG8(B~H21I&p zQg#jvs@~o(n2H;LMxSyXIgp6J;eOoo0j^NR4GbO~n8>z6w;3pMmXx5jVqWaio08b% zg-12rN5^)PO21P11p7Ds5b0Ok1I+uC$cW%)DreT*mzSMrV>(&PfyyhB=tUBKIz@fI za$+U${0Yr5{p3r! zj$(J9l*v7I0$HCM_YHVzly-uF-Qm=!ToRPObC}Bp08Rt$3(=7!cdb5v49_E7e8`H5 zsveL8lK8dN0u!Wpnz`Q)bTICJ4+2V#i74f8Wnc1#BGiVj93ZN34kDHl{T$ZtX8gST z?{HZ`7}i{_xd~MPbRyd3*JjP}eT)Dk7zc)BYDMo0Mop&99Wl3eU_*DnNZ zt>O7$7=w}V`r5J9p|1pIs|L(iTuqV<-5vR{4SWuP#*xy2#HKGWx&6BwKI76ZKTg(n zzMyJN3anEM*aa>cF#SR+l0d+rL8cEH9*APo@y4sACQ&>vz8T0`f@?*lWZe87f!tM^ zh5ay75j#dtH+0tHK7P7Oa%>SQ#K--ERhD9Z_(1 z{D?lpfBn>tMaJ5B)yt5x;~&?^!FPWWd=FU^`IW*eMMX)Q8sz1Po$4K3dbm z!jW?KF_bPC?k;&L`-JLS+xzr*Ax0ju<%y+iycv2rWi%R+bQkw23j$uq>*edN!d3oc z{oocl78=AkEh9WNqERBeP<1+&>CDr9sN(LUcVfVz*j41Ml;LK$QK9tHTv>O&*&-1; zXHG<0W(4<-GBnA@&^Ejcz`o&m8^DkZc-QsG)eJU-xw$!I_7&WQ8YTvx)ykm#5M54x zy*#nJ|GT@jJ7$zqZ(*iwpSwC=9I8?3lW{YqZm1r(_>yV!*B`iU;V@hhx#Iq>{FKvy zCz=JIt7w*Q@`c8S!1FLlocGx7V7Td(aQV+umms^7W~EeB$gWz5dWHWd7Y7Elj|l4N zS+_tzW1&8PX>gB3R*Cg0qC1*d{Bd%Y$sPF zF@+qA+|F99e`g#JF~u4ZtoM7gciv!tBOgp9^ydIHLVqXxR1OBrscsU>%&N*H(P66$ zp#Diqq*6k&Si6CeiHdb1{n%`v_mhugv8ZvuwHR+MnuTbF>jnPBJ1w_{n#PZ>Qh$mL zY_Fk+3CjNSDuF1y+c&>l;mT%ix-sDhxkT*+KY}Qm>@)A%zH6Z>TxqSyoO051z-ayC zpYO9hPFUcVQC<#l4GXF~-hkFDYt+0|+xHtBc>CzL8~lUB%{0AD1^hZk*$W*Gf&=u& zas>w_kyCH0eD&C~Ghs?CL_~naahOYCwYBY`O5A&6QNcyqF0m2wt44cVawGs9jVOsECQb4e zHEKXZj4kj%{tkmkrI|#FT9K2w9rtn9KmYC!M~br^nXTdvo_VkaeLmFXRqhEyA2{g1 z;JiY=l14R$=c6|}&>@?5j+>B4*uf&GI^2yraf>?653#uYa%fj6_;@~6 zTjD`CcLp0p-g#=M`~Xb@KLudXh~Tr)@RUO*))UJKhlS^>WtW06ZD*B02tbh8S@D>n z`mbWPn=qEOoSGrM9zh%Ba;7|Y};5v0|MzYVROH35-UGRyA z3<1`v!dgv4`^jB~qH!Q#4n6-AH-J*xO@c=>W~PRU_|GfJeIo(^`^gVZs_|y3*XZC! zpu$Slt2KrOI(MX~Mj6=OMbDZmACau#=A@dO&oVwG(EB-J1f>UWjPVSOvhtkYgF|;1 z1}xXy`pUz@i#OsFC17P#(0m$LWHKW5-5Vwu6N)!-IDuh%mcyZ`N z)N)+7k1qL$EV#MY1@aG#z*D!8$z_P(SJ$Jh%=Sza z+3zo?X}j5bCx}SiP$py`D)iS?;_t1ce1Nn3w~~gS@zhz5Q_@FPBKw8V%(+zZA-%?V zAc_a+^}ISF$pgjG29Z;zTA!A`Xe{k20=eFZquUC6&iUqQ5ad@O>x15~dY;3|Jv-qL z8>2Ob17Tg*md0DTX!Nf8lyeDpYn3g8Q)-P!`ZYn`r)x)op&tNaNXV|NRgU=~hZ(pe z>z@c)|Iw@~_=*OCgJApd4@YH!tSSwXy`@K#+V)cj4bog#&D7XuYl7latwC$uz`9RU z{%|D;ylmPvye;NWU}VE7dlZOWV}!;0f4?ogL`4LxcnEqC`+NY*`gU-RVnrtswMS#L zzk*I&(!L z#XG||yzNjjA!B>Wj3v!IGbha<(wWH8QreqjbL$7^t_OR+ytjE8`(9u*S7ybNIDjWs zJ&@`*+-seYHc@@`i|~Yre2yqdnRT^G@;japzv(&X7=(<{VQZs3upST_x`YoDJe`{i zzx7*N*l1%UtEp$+vKx#5g0U*v1LQ}5XqV%_zEs|qtEN{1TNj|?vdrx=m)rJ`c9Jz| zsfhdepnpH*7u99rX|LO3_zd(RFbZDm>xNJNs%^in6he2L-H%Yt(!YfpK7^sEZq^~x zYB_0jsLIvI1S`xKn+{f3VrV*Ifm1pQ(%uCs^RqN#g%@Y)axNcK%yOH7%L7IExn&5{yaUw&3Z>ZWeT7|rB@;Y0 zZ7f+>cW^*kUWakKF0rymu5{!CY)gV$L~C{FaCTK}Hc>vW=601xAN2_YVK0W5iVZcu zSp}GO`FXRLLprNp07YISkVs54WZb=SN3(jm+Ic!3S7ogbSbU_v>)XWH*zC;_zJxm+ zEL9Mc8J;ja5g(Q#d48gvZ*JmOp%_yNKqSqp9!lT?Ze@exYZ~T8CG=c^wH>N>wsBCVUbEokfoyuVe$jx*?B!=JrS=z`Xd!r$0J!}WS0 zHMk!bpJ)>&DclYitbIrOo&FO6<+?1B$D zi0BrH3HuSyzRDqP7OTSe#M1BX);Rc&%lAr#0#07E;C08VCN6+VPJ8f?)`9H-HFTCKXO5HF;06v;wg}oK#fOgl=q*q#5qxC zJlQz@rlPkJrYuT1N|GJ)LrmYRo(TmY77nKpkr~gooffYlWZ0BZa4(DWuzrUsB^hb; z=?WVwDw=rLMQ=jkvnS74kAkmZJOw$KFs!=?^J?-xGB11;=F*{IC=Dj31av0Unl($_+-(( zTEx*P9}X)2b%zU-B{%2U{of=c71fy_V4svjKa&J-RU3l3qlsF#?XTdm+1mb{Usp8} z$Y~oJL*9j3EEwJYem_j{r|U=SF>OVo8&7x!-MvLH@UABS>L%a`V0NMntMj$N{>w

Wr zm4WrPm&NU7AjF~&akeR1A^1IB9)CtE5?c_vHn(Qr(7*#(nj}f<6pdPf+9%0z`W|0B56l!%75RQ74&m z_KZ}M%$w&}CrQya86xZin+J-Ci08-m-h@SID^bXIH)5sOCWD`p3cjky2m1xFdwXJZ z=XX1$V8uOcg^a>(W3VoI+opz4r+3W7bV-oze#1sjjB_xsK+&eR0@#Dpb{D zLif5JzaH5iy8lpQ;XmFd$4`#z)x~wO_jnEYJ zHlE^Y_WYsQ+$eqS$X#*~JD3Ux1G!@!)&qC}*4nU++FMnc04l24HA24F_ub$Fa=#h& zPvvDNXo^)xqOjJ(YcDKj(IQR6HgYMI(@*O8b#+%;>TN2jM9DDmp482A+4%6^X`iX> z1sjlCG_27>P_B(MWN5tpoBi}2yzw>C5!F612WOkK&^&M@wj`;|&?6+>{n#iv6+Z>e zC`VXL%>~wg!GJR3lQv-3g-p&nl4DZ_gwkEf=%_)8F~g+Q;&d3(u^{y!`-4j)SR1vj z!WZGsdh*Lr7Apq@Y;zxkCfQ(Z^xaxw*13yptV9K#GV715Je^YzzxdLP9cy!aO)LeA z`R_TW`hy0@HIe(Iq`w_+2xU8}k#IA0zgD#;S)k+UXL5A76^EXj%Ft>B{e>7g_WJF_lgO0(QnvG`5tOl+p@R+MS5nX7*hXk8+QnyNzoqp zab1RIb^%34=U4(04RByAB2jK0&0c5NLTeB0`D{s{|6nb(6`q<7)=B0eTG%rU+SflR zB@-l6%p3j&a0&iBno))3aj7z%ry<(mk{5!H^P8^lks@5t$#{Fns4G1#l(0Nd431|t zD!c+n1JPmdTFpXDzJ63GS0|Xm66XhVWq$?wRYe{+%2W_IY8E3zBR~V07?%r0oyucA zSTDp@VCYX)) z&{$FCIY9Ll&-3YU9fVw>UZq5<*S!S44QTM7Ynb+{k(*ksR~5}zd+4!)Y4T|v$&Rzd zycJ$C`k(0YYXfemg+p1%K%Pf<@n)i+{GWYnw`zpbZa7}f!NOngt%)Ri_5ktr2F0H; zF#}KFQUxvS`Ki;c(0YZkFD3j~AQ+XV?4!0Q`0315off{aP5koxrwlQT68p!E9)ctZ zLyfh%0wTIn)XD_=Oozc{wM@n;_=nBgiKK}hBP(r-TQ|H1_dJ;G*XufNDWARl;3p#1 z=AGGroF{}RiZtL$4o=h)atllAD~xN*%R9r3bhl;yWdHs)eHnGUfTVLg{*+JxU;jD6 zdJULei88Xu=3)DNs$5Y_PDXPi1Wl5-vfdbOR0=WMZ`=S_J7x~$LOh&_KIgQZ7R(*M zSuI&WErC}!IUJ;7P!@J(8`*>Y2gpCLHZvJp}T?uY-+m3|@vZb`D}|rS=i2m? zHL^DfQ`6|t3d_L)<5mK5=kKaoTG;2+DWEAeV~c9H^BW)Ce%2S@rbsCsDufSyG$Lysq zR{J~fm-XS|$b?t*UdH1m)y7~@Z3b@D*ii)A?zd`99%Fklx{KIacX}eyeDSpfu_#5C z&x*>b&7Im}mOiIW|Mm~fxh-jg#e81W&VJTKNOI5^bgqkXuzw}d;L%gpH%(r_Gf<6N zqOypN_YE^F{;4`Npg2*^S8)e79Dq>XV`!zN9Ums7e6w)iGC~X##zn|q4oetHzuS`} zRcfYVuF}p9;o+U^c);Z2lf*gN2oowy91g$d$})T=o0BBq)=rN;hLH z&D-)e^koQ*T!DmF2{IVmVw)%8tC!;`zkYhn@HEnxxUvzyM&uJGi+oO66c^gBs4I@~ z;n6PYx74XT!Z^SSTu3_B(CFfSb~aTV2egoNwG*qi4N0Swv?1SusHsPl2%71DAWkcw zNN_@g(joa(qOhlZ;43XN3e1WyrJYOU2wf5vC@b)JFK|v{7funn^P-~w89AQZ7x@y= z#`2^2qIFaI_0u;J4tdXs=dD9t{Rpt=@|d(8U22>?`Eq^3IF?MyZAz936dx+(8IIne zZx~ViQR3q(T~beOfj~LP!pBiu0ZvAG@_DJFMGgY`?cIAf(DHa}g{80-e#uGB*mn(o zYYJTzP%nBD5o*nzezaZj72;YglnoL!lH>+jM_d*vxS%0Uih*iEg7ruM&Uy7gF|xl| z=ikk@6sC9w{v8x|JuXH4_SNY%qfCFUaVwq35t+guV@D6}5aKy1E}xRdsO(r(>U;8m zKjOL<$G1O$qWN3iV;Jn^=@8VCokBe&pJx_Rp^h*+;b38DTc4jERFHuQr zcZ-R#ZE$&~0;M1=HkM8zcmSt)inN;^;|ih7tsAfZRybw=O$`$Z#s`jJlaWvs_3Jnx z1GbB~PdJkvt(hYxxDPp`s z>yPSon0^<$p;YS(61?q43&cIzwqCF$C=T#5;zJcuq7*TDd3s;yk?4Dg4DgVcuODD| zH#Z}hNoB-HniHqSbVK5V$FIw6|1}rrQ<4f@r~hJ5xWRzG&cRlO z2D~xY#3ci!NFVj3dy`nn%m=d4KjEpPbz7NDVhc+xpk_lxKGqRe9=n0^56D~4^-Y&% zk>7NoS$h8q2ANjtLHU zx-l^8Yd?E2Gk-vC{KeHu7Tvcl(YD3LphA+6c79^*@)27hArgdD;Y%{lr*oO`0f{HX z|5^9@==dniY52Mjc)}1MctYF<-6z@&SF{`^lF@^#>1Km!Xb8X(v>rf_r#kh?7_Iy7 zpOVL{w&0LFG{TGMugS4oB2WN{B6kCgKqI3kH8UV-j(s~bG2FzH*Ny%1_KuN71C5;> z^NCDh2(5Af=kxUd#TZ#2)NB`4pL!pxNsSN7^{n@SxPC-dQj2V2US3WfvR#L6Grl*w z8fZn!dlHS#tT<1^p?aA-V6r+nID}r1Q;kTg+GAo(a(BnT!7P9W9!RB`STu3SEWdSG z^e5!_88Jbr6f_8ka`P@MEEFr4f-iYos5K598}x8tQBh?3lwG{j2sGe+Op+3ve0c4~D7Y zR(6-=RH&(fR2u1Ole9f+My`FbrR=)znHlAx6H$G4kDCUZ?~SbKk1k`d(6LS$7g(%{ zktWsxE87TRoMC0K&1ub|E^zVR){uJ7lF-Ck#4;%M1_+sZA&8rZt>dniMKZ|i)8|?M zxxF6gqYJt<)N*7PD3o5`d;TI<88r-5t-x2WW^Nt9uivI>x0elgQ5I>xnv}~EeRz#O zT@wpR#||N3(9nIs*s7ash$s4c)b5h(jZ`ah;-;N-s2D3y?zuwyM#d?@BbHN0&^8i= zO`qIHV@_)wpz%`Z@BVO12T*(pXe;gcNI;R-MxI={c@2~4vL2q~+1n`{KX)jYp#h#l z12v`l!p{BZ0VAZY=1;c_EWm7@&`sD3V`N=sh3 zfyS*!j&02Q;ivmk#;l94m9v=tf9Wpx+Ft))mAVeS7<}vSbr!4bo(^DF$wpauRS!t` zA9osQrIUrtD#oC80d%$)Ebls1Xa9BPn!y2XQnNaYQGbo!>}b*Ed}3jj{WmqVydGA8Foh573Rc5YLfYE(d~H-KHO zv>hf-n?{mFfKVhM2FEI9?fN$CKsP7T8S9y3wcejicWiiHph|53#4>PHM31DpNR=^; z0N>)mE)`%g<7paZf+ju@)`O%B4hn0_e^-~`&m;iE?)G5>>;ZVol!I=BGy~b*h=4;~ z^V~?OM8y+{;54>0inTqE%>b)kHEVLU2_u=&;Tp$WaTfOxX$Lq$l@`f5x>teOSb?|($cW<;5c{F&Jpy7llQ9!j=`#k`G_nt+8n zfo%aCEu->O|2sAZ0#BAiE!U{*`Ih4o0k%H%9Go7vongYm8HI0m3Lt*)02i>=oC&`9 z1|Dbor}1!h;JRUZ=i_I;4A_ec_1B~C-Jz*arOg88=H=yd!#6$3AQMAGqs5iwZOkN! zY<`KTgYMofl7R!yb5K(98l_n{gxN;dWOaD$#Jo@=$} znZ=q%SBP7kA3+`f5UB^`eZWtaP!Rt569|9^Re1v@n3xyVdH}2Cx}pOMsN6=HW@$DR z#X{1^Wu{E^J8SdoU;-DFXNmB4Ka_q6y~bnk$uASbXq$u5a`b)MWfHq>R%q2+!kw%! zI07-tV~Nmpz+X$JT*)~SLJA%$I{>eUnOfr*ev25zyIM`&0{G*H6MbEP91DL!7h7}6 zQ)R5cA+7u9hr2T(1Hb+`qqimX9qkrQB_GAW2IWLV|}frVZm!-;%xmX!^ZG(a8;1(Y=gz5-zZbk@9?@IR(E zm6xbzSJJk|?YdXQxf&aF{^Rq9xKFx2cV!Z7hmxC9ldjty08Ru>fWDR~lF|D2CU*h~ zVq}I8!=HNui9ea-;)K9qjZJ=M^{|aZEQd9ro0v%QY}|KwFp_=h7czJejqx>FGy+{s zVY9P%VfPa_!Q_8(8Q`9jRj~v7kLTf-oTPvLMC>9NgQuM2A{ncQF$m0yV7PlX|C1-K zQ`0$jZfib2kf+(po~&ch1rx%YY^kcvFklz8_HT9W+?*^2tIMxBpvu*Xew(%; zn72R6*La;tQZ8KfZR=P+?(HIz6%oh5sBQ1a$+`^}Zsv$ng6nwS@MH1lkEVvm$Wk zH4yCKtGylKR>ktL1-F*!#Yrfe&t5R0B32P6`{kF()Jx1EL(wK$)1SH}%9qv%%q)mi zsxD zo0ymwfc)%fUa$=aY>qbuSHp7EDi3%nm0^S43|PW_GN-A(k!GIV{XywXFzB(HAdhws zET($%KY_?ugaJR)lRs+w-;GgqLLd#VBw_ucH8SeIba$FTj{oxT_RRjvnZ$A7`Y(hx ztcCc$ph>9}^#8j$44yHs!q@v>dTU9>|7<3i9dk}AS{LrF36lpef2b}z9XN(luc`IS z$O0X*oY&Gq0s*mZzc-RWaAbTvd>`kF7r%DR+@H)NentVe7VO`A@)T(9Pnr&!z``Yv zQuQ|Nf!pJpuN1xVZ5A+T6@jcAl8BAB8{zCGtSzuuvLa;WYL_|>{^L<}^}mE=6La^c|78M>*ZlXG z>bPv+3E^P}=%nx^2{b~z$The+^=25z=tK~6bMv6!V6a_t4fdg~C5cJ%SG+8Rcr3-6 z_1)cYu)rxlKVL%K>2QgsZ*Z`8Wre(|s>&TCra3(P`=b61>~93OZ~fOVL?tC9u;4Af zs7P8p0c^Vis{tje=Zz)zbj+H1hx zWy4#Ln=3|4hy%}U0~@jYyA8y^o?eiuowqbtq>zFQb_^6up(YFk!Iy^Q<%z3*(9ww) zr0_%^A0OWVi>N@9{2cYZwNhneB|M}VELSpQC$fic8CPLOGtNeDG3d>wvCM$xo2Er-Lko5TQp=SS?!iwL#ei&_iy((&wqQK$8ns)IL`O? z`+dKk@8>(`c)z;$K6rkl1dxhl#7r&;H}5kR{02Lq{Avcx*4`d<|2%XjheL;oNm%mf z=%@i?HOCIHs@VwSzm%N3R#sMa8pKU25@=JW($bXISRKiQ)a&5xo{W6yxO3;u-9fCL zV+TB)rn`4-*->beHg8VHQ6~PzjTQn`{evJ^{Q!$i!=!Bi{C0V)-FfWS94Ba| zdT`Y*W06?GWq#Q&-UF^Gulox+zz`X&;o;HA_oPtzb zCs)@bC}_JOBc)EkUyMypPsbP>0DF>7^=F@)B2`X}s=?#D`RS3ThotJYUdx(+B``R+CZ0HhbR{iQDr}$aC~efJ4SzxWM?p4+>g^ zhsuOXCV!x3%FwGqi}(#?OndtE&ia-Xhw|XZ1>Kj-lP5{BPZ{BQ!ELpH6ru`)yWyUz5 zJJ&jiwNZI$ZZ3@k!20McmO(@f8|oPr7%0nyJEA|#AIN!zlB$wurvn2)Nii8t&)$gH z7dgJYbA!k!=1;6+(tt;hbdoTL^e8GSEEKVzVE&XZXI5i_9gVf7%bJzQ2o;oFiTX!)tlh-H+xr0YX5K^g> zAixOu^Z9f?rPZrH#TK#cz@Gyj)fFJn}W`y1JC=>+3zm{O^p}F!gX7dm`}Y6Qzm567v zd#sGFjn7!t^r03Zl$>KMAdE`q8c?z-!=|+V#bVL)_7juS(>5+HiHY2vsg24@N%|4* zXX0qMkO&9}_`d7;WtknBYNdI3d6hv&gZ77t@uE4+CZ+K#ybyXnjW|3!7}kK!LbRMM(q z0>^7@Z9O>oWG8D+&R5oC{{BYKn3)-+3r^@7Mqi^h{X~KfBI4_IC!F;D=|AN zRz=x8RsYLiTd@#Ytyjgt1t<>Z=%O=p(Om#3lMtt1Wi_<5#R~?q;pUO39<855z=T45 z;-mMgfo-P-2L~$-4xJ3RlanKxZs=Wg)2sSqQxkPW|NFkc=CW_PCR+^(*E6yNl7SH4 zZK1;F!ui{#$Dv8?1UG+pR)NDEJA?K918|fTD^3Ej^2@{>va|c7p|LT@!emrQkxcTo z4jpd-2RJk_QK+gQCwC$xB}G3I$m3obZ9^aOR)1| zQpthO??jQFULr4V>*bX-9nAevSyAy&+whqr&F#y;pAj(tr|;C%&_-`z8nM||^-Iy0 ztyEUlH+}bd;v5F!>3PK?1pp;>jT~JHMFWf5u8>`mUtT+RboK?!!XkH%5*|U3=SJd+ zWINV&RoCxjx8|*ZphUl)vd1ncY8Ei9i)X#pXQyRk=%bRD1@>y8j!YxM{CtbA$3RY> zM#p#73=&v}km2F&ed|sEyGGcJGGGtY#ScQPhnt($(9qBdTwQ{g@~baqjE9%k#$&I? zQ_2x*_oltK-fnw-3NW~qw4>D*ODR8$Q@^2yDPxCmCj4P4)4 zYx;k72f8Q~D4h@Nab2f$!?CgpTPj&x9Au57--kVy6kc1Yq9VG#*nIE-FId2oQLA(H z2R=mZv8Xk639HwhQ_w7*y_~Og@(C@J zkr|2KT=M#i*ol?85YW_jc#y=DPK~pOyHmP*XEN!-PG?59X+ck58cxqe(_a zGWnZGrguDixTHt&a1aWcFOply3qVI>jLdmvnSb&NYT!h;AD25g*K4}J`7_1mz2xfG z1qL32fnl?{G*SERe!%Dl^YJvMpWlc^=VF0C(7NsACKKK-vC_%ip=7)FV(!g<`@a9t z7XIVQB|NkeO-*r1K@@oe-`@QzQ~lmw{blteF0?VwjO^pkAG*7{?{;=>0-jrZ>@^>q z7Cs@WFCgw;rNpB)HTW9WcXY@r={l3Bb?TqDy)7g0PAy4Wu1>VWY+j6~T+Y-|d_ei- zQN%(j^&#EcI}xN22|=gmy4XZTQ2-1WMZGWWxL71DHgDcmuvQotgz$pMO z(_JXK@ep5rC@WhD8=klbVjJBq1~UZc{E8`0SvoLK965iOWtblOhi37DFMi+`;>@vW z(%>gz{XE_p^|rEJKhaR*SCj?}<;v5cgW-#@;5^|&sH?w`U8)$u-fE-hE;bMBHNpwAA<%3;j1=so8rT#ge}0ST z=d7*aD<}~&F(Q$Hm$&!Ovu9^{l7b;{SznDw&;ZU`Z{4c?H_w|-lkB3-1Wzr|QVcnU z(n3a8qrEr{9c6)tQP>Q25ZiBP|J$=?KY=pH0gft80Il8ssB4{eiIrIl)mH`YjAMF` zJLfv5w7SktvWA{^y}66W6A6!!7<*NS0d~Hxx2R53bYgT{%*+}%Mg(cxe8ylfW+om# z7V_3=zZJh(WrR_{yY(#ICrgGX2U}Nafeq*6 FzX1s&$Ke0~ literal 0 HcmV?d00001