From c6e09373af6a1362ea375c2e11eb3cd9d56c705c Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 20 May 2026 18:26:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: HCY123902/llama-3-8b-dpo-tw31-beta-1e-0 Source: Original Platform --- .gitattributes | 37 + README.md | 69 + chat_template.jinja | 5 + checkpoint-1000/chat_template.jinja | 5 + checkpoint-1000/config.json | 29 + checkpoint-1000/generation_config.json | 6 + checkpoint-1000/latest | 1 + .../model-00001-of-00004.safetensors | 3 + .../model-00002-of-00004.safetensors | 3 + .../model-00003-of-00004.safetensors | 3 + .../model-00004-of-00004.safetensors | 3 + checkpoint-1000/model.safetensors.index.json | 299 ++ checkpoint-1000/rng_state_0.pth | 3 + checkpoint-1000/rng_state_1.pth | 3 + checkpoint-1000/scheduler.pt | 3 + checkpoint-1000/special_tokens_map.json | 23 + checkpoint-1000/tokenizer.json | 3 + checkpoint-1000/tokenizer_config.json | 2063 ++++++++ checkpoint-1000/trainer_state.json | 2199 +++++++++ checkpoint-1000/training_args.bin | 3 + checkpoint-1000/zero_to_fp32.py | 760 +++ checkpoint-1500/chat_template.jinja | 5 + checkpoint-1500/config.json | 29 + checkpoint-1500/generation_config.json | 6 + checkpoint-1500/latest | 1 + .../model-00001-of-00004.safetensors | 3 + .../model-00002-of-00004.safetensors | 3 + .../model-00003-of-00004.safetensors | 3 + .../model-00004-of-00004.safetensors | 3 + checkpoint-1500/model.safetensors.index.json | 299 ++ checkpoint-1500/rng_state_0.pth | 3 + checkpoint-1500/rng_state_1.pth | 3 + checkpoint-1500/scheduler.pt | 3 + checkpoint-1500/special_tokens_map.json | 23 + checkpoint-1500/tokenizer.json | 3 + checkpoint-1500/tokenizer_config.json | 2063 ++++++++ checkpoint-1500/trainer_state.json | 3271 +++++++++++++ checkpoint-1500/training_args.bin | 3 + checkpoint-1500/zero_to_fp32.py | 760 +++ checkpoint-1911/chat_template.jinja | 5 + checkpoint-1911/config.json | 29 + checkpoint-1911/generation_config.json | 6 + checkpoint-1911/latest | 1 + .../model-00001-of-00004.safetensors | 3 + .../model-00002-of-00004.safetensors | 3 + .../model-00003-of-00004.safetensors | 3 + .../model-00004-of-00004.safetensors | 3 + checkpoint-1911/model.safetensors.index.json | 299 ++ checkpoint-1911/rng_state_0.pth | 3 + checkpoint-1911/rng_state_1.pth | 3 + checkpoint-1911/scheduler.pt | 3 + checkpoint-1911/special_tokens_map.json | 23 + checkpoint-1911/tokenizer.json | 3 + checkpoint-1911/tokenizer_config.json | 2063 ++++++++ checkpoint-1911/trainer_state.json | 4132 +++++++++++++++++ checkpoint-1911/training_args.bin | 3 + checkpoint-1911/zero_to_fp32.py | 760 +++ checkpoint-500/chat_template.jinja | 5 + checkpoint-500/config.json | 29 + checkpoint-500/generation_config.json | 6 + checkpoint-500/latest | 1 + .../model-00001-of-00004.safetensors | 3 + .../model-00002-of-00004.safetensors | 3 + .../model-00003-of-00004.safetensors | 3 + .../model-00004-of-00004.safetensors | 3 + checkpoint-500/model.safetensors.index.json | 299 ++ checkpoint-500/rng_state_0.pth | 3 + checkpoint-500/rng_state_1.pth | 3 + checkpoint-500/scheduler.pt | 3 + checkpoint-500/special_tokens_map.json | 23 + checkpoint-500/tokenizer.json | 3 + checkpoint-500/tokenizer_config.json | 2063 ++++++++ checkpoint-500/trainer_state.json | 1127 +++++ checkpoint-500/training_args.bin | 3 + checkpoint-500/zero_to_fp32.py | 760 +++ config.json | 29 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 299 ++ special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2063 ++++++++ training_args.bin | 3 + 85 files changed, 26124 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 checkpoint-1000/chat_template.jinja create mode 100644 checkpoint-1000/config.json create mode 100644 checkpoint-1000/generation_config.json create mode 100644 checkpoint-1000/latest create mode 100644 checkpoint-1000/model-00001-of-00004.safetensors create mode 100644 checkpoint-1000/model-00002-of-00004.safetensors create mode 100644 checkpoint-1000/model-00003-of-00004.safetensors create mode 100644 checkpoint-1000/model-00004-of-00004.safetensors create mode 100644 checkpoint-1000/model.safetensors.index.json create mode 100644 checkpoint-1000/rng_state_0.pth create mode 100644 checkpoint-1000/rng_state_1.pth create mode 100644 checkpoint-1000/scheduler.pt create mode 100644 checkpoint-1000/special_tokens_map.json create mode 100644 checkpoint-1000/tokenizer.json create mode 100644 checkpoint-1000/tokenizer_config.json create mode 100644 checkpoint-1000/trainer_state.json create mode 100644 checkpoint-1000/training_args.bin create mode 100644 checkpoint-1000/zero_to_fp32.py create mode 100644 checkpoint-1500/chat_template.jinja create mode 100644 checkpoint-1500/config.json create mode 100644 checkpoint-1500/generation_config.json create mode 100644 checkpoint-1500/latest create mode 100644 checkpoint-1500/model-00001-of-00004.safetensors create mode 100644 checkpoint-1500/model-00002-of-00004.safetensors create mode 100644 checkpoint-1500/model-00003-of-00004.safetensors create mode 100644 checkpoint-1500/model-00004-of-00004.safetensors create mode 100644 checkpoint-1500/model.safetensors.index.json create mode 100644 checkpoint-1500/rng_state_0.pth create mode 100644 checkpoint-1500/rng_state_1.pth create mode 100644 checkpoint-1500/scheduler.pt create mode 100644 checkpoint-1500/special_tokens_map.json create mode 100644 checkpoint-1500/tokenizer.json create mode 100644 checkpoint-1500/tokenizer_config.json create mode 100644 checkpoint-1500/trainer_state.json create mode 100644 checkpoint-1500/training_args.bin create mode 100644 checkpoint-1500/zero_to_fp32.py create mode 100644 checkpoint-1911/chat_template.jinja create mode 100644 checkpoint-1911/config.json create mode 100644 checkpoint-1911/generation_config.json create mode 100644 checkpoint-1911/latest create mode 100644 checkpoint-1911/model-00001-of-00004.safetensors create mode 100644 checkpoint-1911/model-00002-of-00004.safetensors create mode 100644 checkpoint-1911/model-00003-of-00004.safetensors create mode 100644 checkpoint-1911/model-00004-of-00004.safetensors create mode 100644 checkpoint-1911/model.safetensors.index.json create mode 100644 checkpoint-1911/rng_state_0.pth create mode 100644 checkpoint-1911/rng_state_1.pth create mode 100644 checkpoint-1911/scheduler.pt create mode 100644 checkpoint-1911/special_tokens_map.json create mode 100644 checkpoint-1911/tokenizer.json create mode 100644 checkpoint-1911/tokenizer_config.json create mode 100644 checkpoint-1911/trainer_state.json create mode 100644 checkpoint-1911/training_args.bin create mode 100644 checkpoint-1911/zero_to_fp32.py create mode 100644 checkpoint-500/chat_template.jinja create mode 100644 checkpoint-500/config.json create mode 100644 checkpoint-500/generation_config.json create mode 100644 checkpoint-500/latest create mode 100644 checkpoint-500/model-00001-of-00004.safetensors create mode 100644 checkpoint-500/model-00002-of-00004.safetensors create mode 100644 checkpoint-500/model-00003-of-00004.safetensors create mode 100644 checkpoint-500/model-00004-of-00004.safetensors create mode 100644 checkpoint-500/model.safetensors.index.json create mode 100644 checkpoint-500/rng_state_0.pth create mode 100644 checkpoint-500/rng_state_1.pth create mode 100644 checkpoint-500/scheduler.pt create mode 100644 checkpoint-500/special_tokens_map.json create mode 100644 checkpoint-500/tokenizer.json create mode 100644 checkpoint-500/tokenizer_config.json create mode 100644 checkpoint-500/trainer_state.json create mode 100644 checkpoint-500/training_args.bin create mode 100644 checkpoint-500/zero_to_fp32.py create mode 100644 config.json create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..07c6fc5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1911/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..5d1b75c --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +--- +base_model: princeton-nlp/Llama-3-Base-8B-SFT +library_name: transformers +model_name: llama-3-8b-dpo-tw31-beta-1e-0 +tags: +- generated_from_trainer +- trl +- dpo +licence: license +--- + +# Model Card for llama-3-8b-dpo-tw31-beta-1e-0 + +This model is a fine-tuned version of [princeton-nlp/Llama-3-Base-8B-SFT](https://huggingface.co/princeton-nlp/Llama-3-Base-8B-SFT). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="HCY123902/llama-3-8b-dpo-tw31-beta-1e-0", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/2320032466hchy/attention_dpo/runs/fl0lapso) + + +This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290). + +### Framework versions + +- TRL: 0.20.0 +- Transformers: 4.54.1 +- Pytorch: 2.7.1+cu128 +- Datasets: 3.6.0 +- Tokenizers: 0.21.1 + +## Citations + +Cite DPO as: + +```bibtex +@inproceedings{rafailov2023direct, + title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, + author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn}, + year = 2023, + booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, + url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html}, + editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, +} +``` + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1000/chat_template.jinja b/checkpoint-1000/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-1000/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json new file mode 100644 index 0000000..fc3c54a --- /dev/null +++ b/checkpoint-1000/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-1000/latest b/checkpoint-1000/latest new file mode 100644 index 0000000..e2d3435 --- /dev/null +++ b/checkpoint-1000/latest @@ -0,0 +1 @@ +global_step1000 \ No newline at end of file diff --git a/checkpoint-1000/model-00001-of-00004.safetensors b/checkpoint-1000/model-00001-of-00004.safetensors new file mode 100644 index 0000000..063411a --- /dev/null +++ b/checkpoint-1000/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e0de4cbbe0373286b453e14148f1e5b78d38a21a7d043dd2a59c03f2efccd1 +size 4976698672 diff --git a/checkpoint-1000/model-00002-of-00004.safetensors b/checkpoint-1000/model-00002-of-00004.safetensors new file mode 100644 index 0000000..e82f1d3 --- /dev/null +++ b/checkpoint-1000/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5affa680795c4cce1f9be440d7dcb22f1521e07eed8ecda4305d3ddc0f2987 +size 4999802720 diff --git a/checkpoint-1000/model-00003-of-00004.safetensors b/checkpoint-1000/model-00003-of-00004.safetensors new file mode 100644 index 0000000..b29228f --- /dev/null +++ b/checkpoint-1000/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07555153d9f500e8619d5b333a186de5785bf14d793a4e9e20a4bab819120b0 +size 4915916176 diff --git a/checkpoint-1000/model-00004-of-00004.safetensors b/checkpoint-1000/model-00004-of-00004.safetensors new file mode 100644 index 0000000..7e96d7e --- /dev/null +++ b/checkpoint-1000/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff0a43bfbb5e58792dfada0cd0b7112ebdc66489f8a22356b9806f8d0091159 +size 1168138808 diff --git a/checkpoint-1000/model.safetensors.index.json b/checkpoint-1000/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-1000/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-1000/rng_state_0.pth b/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000..37c843b --- /dev/null +++ b/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb2e15baa025bd61fe183f8e343e7ff470b9a01aecd8defcf155a1cf00393e3 +size 14917 diff --git a/checkpoint-1000/rng_state_1.pth b/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000..d8eccdd --- /dev/null +++ b/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ee73a4738a457f3198cccec25cf12377bb1eba6c29e95c9fecf83c1487d401 +size 14917 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000..7c34b16 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1b2374548a5ec63cdcc0490db6ed738cd23c8550fb53eb0592574609549746 +size 1465 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1000/tokenizer.json b/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000..4c0b5d9 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,2199 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5234231876472127, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005234231876472127, + "grad_norm": 132.6717987060547, + "learning_rate": 0.0, + "logits/chosen": -0.40118408203125, + "logits/rejected": -0.41802978515625, + "logps/chosen": -297.609375, + "logps/rejected": -247.84375, + "logps/weighted_chosen": -4.7568359375, + "logps/weighted_rejected": -3.47998046875, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005234231876472127, + "grad_norm": 226.00839233398438, + "learning_rate": 4.6875e-08, + "logits/chosen": -0.3175845742225647, + "logits/rejected": -0.3532341718673706, + "logps/chosen": -275.5841979980469, + "logps/rejected": -255.84548950195312, + "logps/weighted_chosen": -2.651665687561035, + "logps/weighted_rejected": -2.88427734375, + "loss": 0.6921, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0731336772441864, + "rewards/margins": -0.0670572891831398, + "rewards/rejected": -0.006076388992369175, + "rewards/weighted_accuracies": 0.2951388955116272, + "rewards/weighted_chosen": -0.0015835232334211469, + "rewards/weighted_margins": -0.0009324815473519266, + "rewards/weighted_rejected": -0.0006510416860692203, + "step": 10 + }, + { + "epoch": 0.010468463752944255, + "grad_norm": 14.726158142089844, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -0.29781341552734375, + "logits/rejected": -0.3136836886405945, + "logps/chosen": -294.4296875, + "logps/rejected": -272.58984375, + "logps/weighted_chosen": -2.458728075027466, + "logps/weighted_rejected": -2.455883741378784, + "loss": 0.6924, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.14013671875, + "rewards/margins": -0.13369140028953552, + "rewards/rejected": -0.0064453124068677425, + "rewards/weighted_accuracies": 0.34062498807907104, + "rewards/weighted_chosen": -0.0013603210682049394, + "rewards/weighted_margins": -0.0010925292735919356, + "rewards/weighted_rejected": -0.0002677917364053428, + "step": 20 + }, + { + "epoch": 0.015702695629416383, + "grad_norm": 77.6125259399414, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -0.2918853759765625, + "logits/rejected": -0.3377639651298523, + "logps/chosen": -298.05859375, + "logps/rejected": -268.0132751464844, + "logps/weighted_chosen": -2.4350829124450684, + "logps/weighted_rejected": -2.7343993186950684, + "loss": 0.6926, + "rewards/accuracies": 0.24062499403953552, + "rewards/chosen": -0.06621094048023224, + "rewards/margins": -0.1640625, + "rewards/rejected": 0.09785155951976776, + "rewards/weighted_accuracies": 0.3187499940395355, + "rewards/weighted_chosen": 0.0014068603049963713, + "rewards/weighted_margins": -0.0015777588123455644, + "rewards/weighted_rejected": 0.0029846192337572575, + "step": 30 + }, + { + "epoch": 0.02093692750588851, + "grad_norm": 30.666196823120117, + "learning_rate": 2.03125e-07, + "logits/chosen": -0.30072021484375, + "logits/rejected": -0.3433845639228821, + "logps/chosen": -278.68829345703125, + "logps/rejected": -253.90780639648438, + "logps/weighted_chosen": -2.506396532058716, + "logps/weighted_rejected": -2.8416504859924316, + "loss": 0.6908, + "rewards/accuracies": 0.3062500059604645, + "rewards/chosen": 0.063232421875, + "rewards/margins": 0.04838867112994194, + "rewards/rejected": 0.014843749813735485, + "rewards/weighted_accuracies": 0.40312498807907104, + "rewards/weighted_chosen": 0.0042968750931322575, + "rewards/weighted_margins": 0.0019538879860192537, + "rewards/weighted_rejected": 0.0023429871071130037, + "step": 40 + }, + { + "epoch": 0.02617115938236064, + "grad_norm": 18.60569953918457, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -0.2819870114326477, + "logits/rejected": -0.32059136033058167, + "logps/chosen": -280.31951904296875, + "logps/rejected": -267.4359436035156, + "logps/weighted_chosen": -2.4267334938049316, + "logps/weighted_rejected": -2.529711961746216, + "loss": 0.6891, + "rewards/accuracies": 0.3187499940395355, + "rewards/chosen": -0.03535156324505806, + "rewards/margins": -0.13984374701976776, + "rewards/rejected": 0.1044921875, + "rewards/weighted_accuracies": 0.3968749940395355, + "rewards/weighted_chosen": 0.0039031982887536287, + "rewards/weighted_margins": 0.005755615420639515, + "rewards/weighted_rejected": -0.0018524170154705644, + "step": 50 + }, + { + "epoch": 0.031405391258832765, + "grad_norm": 38.21036911010742, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -0.31453245878219604, + "logits/rejected": -0.30809077620506287, + "logps/chosen": -277.66015625, + "logps/rejected": -261.7445373535156, + "logps/weighted_chosen": -2.8622069358825684, + "logps/weighted_rejected": -2.7553467750549316, + "loss": 0.6894, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": 0.04150390625, + "rewards/margins": 0.08027343451976776, + "rewards/rejected": -0.03876953199505806, + "rewards/weighted_accuracies": 0.4312500059604645, + "rewards/weighted_chosen": 0.0006561279296875, + "rewards/weighted_margins": 0.006243896670639515, + "rewards/weighted_rejected": -0.005587768740952015, + "step": 60 + }, + { + "epoch": 0.036639623135304895, + "grad_norm": 69.19047546386719, + "learning_rate": 3.59375e-07, + "logits/chosen": -0.3177490234375, + "logits/rejected": -0.3246749937534332, + "logps/chosen": -289.76251220703125, + "logps/rejected": -244.92578125, + "logps/weighted_chosen": -2.3438963890075684, + "logps/weighted_rejected": -2.7010498046875, + "loss": 0.6841, + "rewards/accuracies": 0.49687498807907104, + "rewards/chosen": 0.29765623807907104, + "rewards/margins": 0.4546875059604645, + "rewards/rejected": -0.15703125298023224, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": 0.01530532818287611, + "rewards/weighted_margins": 0.01918792724609375, + "rewards/weighted_rejected": -0.0038825988303869963, + "step": 70 + }, + { + "epoch": 0.04187385501177702, + "grad_norm": 51.98476791381836, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -0.2850998044013977, + "logits/rejected": -0.30662041902542114, + "logps/chosen": -289.234375, + "logps/rejected": -270.375, + "logps/weighted_chosen": -2.5325684547424316, + "logps/weighted_rejected": -2.796435594558716, + "loss": 0.6747, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.512499988079071, + "rewards/margins": 0.6001952886581421, + "rewards/rejected": -0.08769531548023224, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.036380767822265625, + "rewards/weighted_margins": 0.04396667331457138, + "rewards/weighted_rejected": -0.007586670108139515, + "step": 80 + }, + { + "epoch": 0.04710808688824915, + "grad_norm": 30.52783203125, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -0.3142959475517273, + "logits/rejected": -0.3075408935546875, + "logps/chosen": -280.11407470703125, + "logps/rejected": -257.95233154296875, + "logps/weighted_chosen": -2.719482421875, + "logps/weighted_rejected": -2.88037109375, + "loss": 0.6687, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.5205078125, + "rewards/margins": 0.737109363079071, + "rewards/rejected": -0.21660156548023224, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": 0.06780395656824112, + "rewards/weighted_margins": 0.07340697944164276, + "rewards/weighted_rejected": -0.0056396485306322575, + "step": 90 + }, + { + "epoch": 0.05234231876472128, + "grad_norm": 69.397705078125, + "learning_rate": 5.156249999999999e-07, + "logits/chosen": -0.28213196992874146, + "logits/rejected": -0.3543289303779602, + "logps/chosen": -290.71875, + "logps/rejected": -286.73126220703125, + "logps/weighted_chosen": -2.2228636741638184, + "logps/weighted_rejected": -2.8367552757263184, + "loss": 0.6848, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": 0.24521484971046448, + "rewards/margins": 1.0690429210662842, + "rewards/rejected": -0.8238281011581421, + "rewards/weighted_accuracies": 0.5843750238418579, + "rewards/weighted_chosen": 0.05242309719324112, + "rewards/weighted_margins": 0.05032653734087944, + "rewards/weighted_rejected": 0.0021240233909338713, + "step": 100 + }, + { + "epoch": 0.05757655064119341, + "grad_norm": 36.600040435791016, + "learning_rate": 5.677083333333333e-07, + "logits/chosen": -0.33063429594039917, + "logits/rejected": -0.319937139749527, + "logps/chosen": -296.82501220703125, + "logps/rejected": -262.2984313964844, + "logps/weighted_chosen": -2.8468017578125, + "logps/weighted_rejected": -2.9306397438049316, + "loss": 0.6773, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.474609375, + "rewards/margins": 1.053613305091858, + "rewards/rejected": -1.528222680091858, + "rewards/weighted_accuracies": 0.534375011920929, + "rewards/weighted_chosen": 0.013439941219985485, + "rewards/weighted_margins": 0.05541381984949112, + "rewards/weighted_rejected": -0.04198913648724556, + "step": 110 + }, + { + "epoch": 0.06281078251766553, + "grad_norm": 57.109580993652344, + "learning_rate": 6.197916666666666e-07, + "logits/chosen": -0.33633461594581604, + "logits/rejected": -0.36155110597610474, + "logps/chosen": -295.3687438964844, + "logps/rejected": -256.1953125, + "logps/weighted_chosen": -2.161865234375, + "logps/weighted_rejected": -2.4251465797424316, + "loss": 0.6791, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": -0.72900390625, + "rewards/margins": 1.641210913658142, + "rewards/rejected": -2.3702149391174316, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.007176590152084827, + "rewards/weighted_margins": 0.05286560207605362, + "rewards/weighted_rejected": -0.04570160061120987, + "step": 120 + }, + { + "epoch": 0.06804501439413765, + "grad_norm": 39.176841735839844, + "learning_rate": 6.718749999999999e-07, + "logits/chosen": -0.29625242948532104, + "logits/rejected": -0.2914108335971832, + "logps/chosen": -306.6781311035156, + "logps/rejected": -280.15936279296875, + "logps/weighted_chosen": -2.188079833984375, + "logps/weighted_rejected": -2.5787596702575684, + "loss": 0.6659, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.644238293170929, + "rewards/margins": 1.972265601158142, + "rewards/rejected": -2.616406202316284, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": 0.01349639892578125, + "rewards/weighted_margins": 0.0841522216796875, + "rewards/weighted_rejected": -0.07064209133386612, + "step": 130 + }, + { + "epoch": 0.07327924627060979, + "grad_norm": 52.14993667602539, + "learning_rate": 7.239583333333333e-07, + "logits/chosen": -0.3304199278354645, + "logits/rejected": -0.3464847505092621, + "logps/chosen": -301.4390563964844, + "logps/rejected": -277.9515686035156, + "logps/weighted_chosen": -2.554003953933716, + "logps/weighted_rejected": -2.881591796875, + "loss": 0.6581, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -2.746875047683716, + "rewards/margins": 2.744921922683716, + "rewards/rejected": -5.491991996765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.02762756310403347, + "rewards/weighted_margins": 0.11510010063648224, + "rewards/weighted_rejected": -0.14276733994483948, + "step": 140 + }, + { + "epoch": 0.07851347814708191, + "grad_norm": 22.611814498901367, + "learning_rate": 7.760416666666666e-07, + "logits/chosen": -0.2870376706123352, + "logits/rejected": -0.2975311279296875, + "logps/chosen": -287.859375, + "logps/rejected": -257.54296875, + "logps/weighted_chosen": -3.089892625808716, + "logps/weighted_rejected": -3.1946043968200684, + "loss": 0.6544, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.7095704078674316, + "rewards/margins": 2.942578077316284, + "rewards/rejected": -6.652148246765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.005145263858139515, + "rewards/weighted_margins": 0.16416625678539276, + "rewards/weighted_rejected": -0.16951599717140198, + "step": 150 + }, + { + "epoch": 0.08374771002355404, + "grad_norm": 15.511767387390137, + "learning_rate": 8.28125e-07, + "logits/chosen": -0.3232177793979645, + "logits/rejected": -0.3726806640625, + "logps/chosen": -308.91796875, + "logps/rejected": -282.15704345703125, + "logps/weighted_chosen": -2.5903563499450684, + "logps/weighted_rejected": -2.742602586746216, + "loss": 0.6211, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -4.956835746765137, + "rewards/margins": 3.9961915016174316, + "rewards/rejected": -8.953222274780273, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.002410888671875, + "rewards/weighted_margins": 0.23797607421875, + "rewards/weighted_rejected": -0.24028320610523224, + "step": 160 + }, + { + "epoch": 0.08898194190002617, + "grad_norm": 167.33956909179688, + "learning_rate": 8.802083333333333e-07, + "logits/chosen": -0.36021536588668823, + "logits/rejected": -0.3597045838832855, + "logps/chosen": -311.03045654296875, + "logps/rejected": -270.46875, + "logps/weighted_chosen": -2.8318848609924316, + "logps/weighted_rejected": -3.139453172683716, + "loss": 0.6949, + "rewards/accuracies": 0.609375, + "rewards/chosen": -8.0087890625, + "rewards/margins": 4.345898628234863, + "rewards/rejected": -12.354199409484863, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.01859130896627903, + "rewards/weighted_margins": 0.20853272080421448, + "rewards/weighted_rejected": -0.22731323540210724, + "step": 170 + }, + { + "epoch": 0.0942161737764983, + "grad_norm": 64.57138061523438, + "learning_rate": 9.322916666666666e-07, + "logits/chosen": -0.33618468046188354, + "logits/rejected": -0.3534431457519531, + "logps/chosen": -284.2171936035156, + "logps/rejected": -272.12969970703125, + "logps/weighted_chosen": -2.694580078125, + "logps/weighted_rejected": -3.225878953933716, + "loss": 0.6814, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -10.43701171875, + "rewards/margins": 5.353320121765137, + "rewards/rejected": -15.7919921875, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.08297424018383026, + "rewards/weighted_margins": 0.26459962129592896, + "rewards/weighted_rejected": -0.347381591796875, + "step": 180 + }, + { + "epoch": 0.09945040565297043, + "grad_norm": 49.0852165222168, + "learning_rate": 9.84375e-07, + "logits/chosen": -0.354086309671402, + "logits/rejected": -0.38891831040382385, + "logps/chosen": -319.17498779296875, + "logps/rejected": -283.31561279296875, + "logps/weighted_chosen": -2.5078492164611816, + "logps/weighted_rejected": -3.016357421875, + "loss": 0.6496, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -13.561426162719727, + "rewards/margins": 5.937890529632568, + "rewards/rejected": -19.498828887939453, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.16942748427391052, + "rewards/weighted_margins": 0.24410399794578552, + "rewards/weighted_rejected": -0.41356199979782104, + "step": 190 + }, + { + "epoch": 0.10468463752944256, + "grad_norm": 53.46296691894531, + "learning_rate": 9.99959085414323e-07, + "logits/chosen": -0.37868577241897583, + "logits/rejected": -0.4114578366279602, + "logps/chosen": -324.7124938964844, + "logps/rejected": -279.72967529296875, + "logps/weighted_chosen": -2.8757567405700684, + "logps/weighted_rejected": -3.3623046875, + "loss": 0.639, + "rewards/accuracies": 0.609375, + "rewards/chosen": -15.428125381469727, + "rewards/margins": 6.552148342132568, + "rewards/rejected": -21.975000381469727, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.18135985732078552, + "rewards/weighted_margins": 0.29008787870407104, + "rewards/weighted_rejected": -0.471527099609375, + "step": 200 + }, + { + "epoch": 0.10991886940591468, + "grad_norm": 24.815481185913086, + "learning_rate": 9.997587035630105e-07, + "logits/chosen": -0.3853309750556946, + "logits/rejected": -0.4257049560546875, + "logps/chosen": -302.82891845703125, + "logps/rejected": -308.671875, + "logps/weighted_chosen": -2.632519483566284, + "logps/weighted_rejected": -3.3669190406799316, + "loss": 0.6558, + "rewards/accuracies": 0.65625, + "rewards/chosen": -15.814453125, + "rewards/margins": 8.331445693969727, + "rewards/rejected": -24.146093368530273, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.225901797413826, + "rewards/weighted_margins": 0.23236694931983948, + "rewards/weighted_rejected": -0.45829468965530396, + "step": 210 + }, + { + "epoch": 0.11515310128238682, + "grad_norm": 24.175745010375977, + "learning_rate": 9.99391406364405e-07, + "logits/chosen": -0.37365952134132385, + "logits/rejected": -0.3758789002895355, + "logps/chosen": -309.34686279296875, + "logps/rejected": -293.98126220703125, + "logps/weighted_chosen": -3.002514600753784, + "logps/weighted_rejected": -3.453906297683716, + "loss": 0.6732, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -17.203418731689453, + "rewards/margins": 7.933203220367432, + "rewards/rejected": -25.137109756469727, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.259225457906723, + "rewards/weighted_margins": 0.29540252685546875, + "rewards/weighted_rejected": -0.5546798706054688, + "step": 220 + }, + { + "epoch": 0.12038733315885894, + "grad_norm": 85.15988159179688, + "learning_rate": 9.988573164927884e-07, + "logits/chosen": -0.3097473084926605, + "logits/rejected": -0.3477935791015625, + "logps/chosen": -286.5078125, + "logps/rejected": -281.8453063964844, + "logps/weighted_chosen": -2.66943359375, + "logps/weighted_rejected": -3.1229491233825684, + "loss": 0.6646, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -17.690723419189453, + "rewards/margins": 12.424609184265137, + "rewards/rejected": -30.110157012939453, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.2716217041015625, + "rewards/weighted_margins": 0.32661741971969604, + "rewards/weighted_rejected": -0.5983597040176392, + "step": 230 + }, + { + "epoch": 0.12562156503533106, + "grad_norm": 26.17377471923828, + "learning_rate": 9.98156612329838e-07, + "logits/chosen": -0.39516907930374146, + "logits/rejected": -0.44511109590530396, + "logps/chosen": -286.74884033203125, + "logps/rejected": -318.22735595703125, + "logps/weighted_chosen": -2.6696534156799316, + "logps/weighted_rejected": -3.4151854515075684, + "loss": 0.643, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -21.990428924560547, + "rewards/margins": 14.028905868530273, + "rewards/rejected": -36.013282775878906, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.2329559326171875, + "rewards/weighted_margins": 0.3950134217739105, + "rewards/weighted_rejected": -0.6281493902206421, + "step": 240 + }, + { + "epoch": 0.13085579691180318, + "grad_norm": 56.73057174682617, + "learning_rate": 9.97289527905053e-07, + "logits/chosen": -0.40631332993507385, + "logits/rejected": -0.4203124940395355, + "logps/chosen": -290.1703186035156, + "logps/rejected": -291.6328125, + "logps/weighted_chosen": -3.051513671875, + "logps/weighted_rejected": -3.3163819313049316, + "loss": 0.677, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -25.742870330810547, + "rewards/margins": 9.973828315734863, + "rewards/rejected": -35.72148513793945, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.2856689393520355, + "rewards/weighted_margins": 0.253326416015625, + "rewards/weighted_rejected": -0.5388733148574829, + "step": 250 + }, + { + "epoch": 0.1360900287882753, + "grad_norm": 17.766258239746094, + "learning_rate": 9.962563528175875e-07, + "logits/chosen": -0.3611465394496918, + "logits/rejected": -0.39628905057907104, + "logps/chosen": -324.36639404296875, + "logps/rejected": -297.765625, + "logps/weighted_chosen": -2.652392625808716, + "logps/weighted_rejected": -3.535571336746216, + "loss": 0.6414, + "rewards/accuracies": 0.578125, + "rewards/chosen": -25.621288299560547, + "rewards/margins": 11.306055068969727, + "rewards/rejected": -36.93359375, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.2533508241176605, + "rewards/weighted_margins": 0.2956604063510895, + "rewards/weighted_rejected": -0.5490142703056335, + "step": 260 + }, + { + "epoch": 0.14132426066474746, + "grad_norm": 17.552453994750977, + "learning_rate": 9.950574321395277e-07, + "logits/chosen": -0.41735154390335083, + "logits/rejected": -0.441476434469223, + "logps/chosen": -314.5093688964844, + "logps/rejected": -295.7093811035156, + "logps/weighted_chosen": -2.864941358566284, + "logps/weighted_rejected": -3.25732421875, + "loss": 0.661, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.1123046875, + "rewards/margins": 7.519726753234863, + "rewards/rejected": -32.62890625, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": -0.3035888671875, + "rewards/weighted_margins": 0.2833190858364105, + "rewards/weighted_rejected": -0.5868393182754517, + "step": 270 + }, + { + "epoch": 0.14655849254121958, + "grad_norm": 47.66518020629883, + "learning_rate": 9.936931663006413e-07, + "logits/chosen": -0.4760284423828125, + "logits/rejected": -0.46795654296875, + "logps/chosen": -323.48126220703125, + "logps/rejected": -313.2875061035156, + "logps/weighted_chosen": -2.794970750808716, + "logps/weighted_rejected": -3.3581910133361816, + "loss": 0.6169, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -20.707616806030273, + "rewards/margins": 13.166601181030273, + "rewards/rejected": -33.86640548706055, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.10174255073070526, + "rewards/weighted_margins": 0.34544676542282104, + "rewards/weighted_rejected": -0.447021484375, + "step": 280 + }, + { + "epoch": 0.1517927244176917, + "grad_norm": 32.503883361816406, + "learning_rate": 9.921640109546357e-07, + "logits/chosen": -0.44742050766944885, + "logits/rejected": -0.5166229009628296, + "logps/chosen": -292.1796875, + "logps/rejected": -289.6234436035156, + "logps/weighted_chosen": -2.7469239234924316, + "logps/weighted_rejected": -3.9541258811950684, + "loss": 0.6249, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.293359756469727, + "rewards/margins": 12.698633193969727, + "rewards/rejected": -37.994529724121094, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -0.15215758979320526, + "rewards/weighted_margins": 0.4393859803676605, + "rewards/weighted_rejected": -0.5915588140487671, + "step": 290 + }, + { + "epoch": 0.15702695629416383, + "grad_norm": 17.32170867919922, + "learning_rate": 9.90470476826975e-07, + "logits/chosen": -0.5146636962890625, + "logits/rejected": -0.515917956829071, + "logps/chosen": -302.3570251464844, + "logps/rejected": -313.68438720703125, + "logps/weighted_chosen": -2.6830201148986816, + "logps/weighted_rejected": -3.202099561691284, + "loss": 0.6526, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -32.978126525878906, + "rewards/margins": 13.435937881469727, + "rewards/rejected": -46.408203125, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.23505249619483948, + "rewards/weighted_margins": 0.33623045682907104, + "rewards/weighted_rejected": -0.5710296630859375, + "step": 300 + }, + { + "epoch": 0.16226118817063595, + "grad_norm": 25.855854034423828, + "learning_rate": 9.886131295443002e-07, + "logits/chosen": -0.6332122683525085, + "logits/rejected": -0.6879852414131165, + "logps/chosen": -315.02264404296875, + "logps/rejected": -296.54998779296875, + "logps/weighted_chosen": -2.8891844749450684, + "logps/weighted_rejected": -3.3497071266174316, + "loss": 0.6099, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -33.83808517456055, + "rewards/margins": 12.542577743530273, + "rewards/rejected": -46.39081954956055, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.20775146782398224, + "rewards/weighted_margins": 0.507794201374054, + "rewards/weighted_rejected": -0.715728759765625, + "step": 310 + }, + { + "epoch": 0.16749542004710807, + "grad_norm": 19.11484718322754, + "learning_rate": 9.865925894455166e-07, + "logits/chosen": -0.730267345905304, + "logits/rejected": -0.746167004108429, + "logps/chosen": -338.2242126464844, + "logps/rejected": -307.18280029296875, + "logps/weighted_chosen": -2.9883790016174316, + "logps/weighted_rejected": -3.5892090797424316, + "loss": 0.6942, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -38.233009338378906, + "rewards/margins": 11.564062118530273, + "rewards/rejected": -49.80937576293945, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.3507080078125, + "rewards/weighted_margins": 0.3366760313510895, + "rewards/weighted_rejected": -0.6871337890625, + "step": 320 + }, + { + "epoch": 0.17272965192358022, + "grad_norm": 57.19697570800781, + "learning_rate": 9.84409531374603e-07, + "logits/chosen": -0.6843910217285156, + "logits/rejected": -0.6659576296806335, + "logps/chosen": -345.46875, + "logps/rejected": -316.2515563964844, + "logps/weighted_chosen": -3.05517578125, + "logps/weighted_rejected": -3.5519776344299316, + "loss": 0.6569, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -40.732032775878906, + "rewards/margins": 12.651952743530273, + "rewards/rejected": -53.38984298706055, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.3262878358364105, + "rewards/weighted_margins": 0.346893310546875, + "rewards/weighted_rejected": -0.6730865240097046, + "step": 330 + }, + { + "epoch": 0.17796388380005235, + "grad_norm": 52.49288558959961, + "learning_rate": 9.820646844552219e-07, + "logits/chosen": -0.6993133425712585, + "logits/rejected": -0.7529846429824829, + "logps/chosen": -313.59295654296875, + "logps/rejected": -322.1499938964844, + "logps/weighted_chosen": -3.0488524436950684, + "logps/weighted_rejected": -3.440136671066284, + "loss": 0.6287, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -37.06660079956055, + "rewards/margins": 19.494531631469727, + "rewards/rejected": -56.556640625, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.30719298124313354, + "rewards/weighted_margins": 0.448944091796875, + "rewards/weighted_rejected": -0.755999743938446, + "step": 340 + }, + { + "epoch": 0.18319811567652447, + "grad_norm": 15.657389640808105, + "learning_rate": 9.795588318471964e-07, + "logits/chosen": -0.7813507318496704, + "logits/rejected": -0.7874206304550171, + "logps/chosen": -299.80157470703125, + "logps/rejected": -331.4375, + "logps/weighted_chosen": -2.84619140625, + "logps/weighted_rejected": -3.315380811691284, + "loss": 0.6405, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -40.32304763793945, + "rewards/margins": 14.830663681030273, + "rewards/rejected": -55.15625, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.315826416015625, + "rewards/weighted_margins": 0.386627197265625, + "rewards/weighted_rejected": -0.702471911907196, + "step": 350 + }, + { + "epoch": 0.1884323475529966, + "grad_norm": 16.19976806640625, + "learning_rate": 9.768928104849415e-07, + "logits/chosen": -0.801177978515625, + "logits/rejected": -0.799664318561554, + "logps/chosen": -323.5171813964844, + "logps/rejected": -305.046875, + "logps/weighted_chosen": -3.1164307594299316, + "logps/weighted_rejected": -3.3475098609924316, + "loss": 0.6865, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -40.823829650878906, + "rewards/margins": 15.389843940734863, + "rewards/rejected": -56.216407775878906, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.31828004121780396, + "rewards/weighted_margins": 0.3831420838832855, + "rewards/weighted_rejected": -0.7014526128768921, + "step": 360 + }, + { + "epoch": 0.19366657942946872, + "grad_norm": 89.87427520751953, + "learning_rate": 9.740675107979355e-07, + "logits/chosen": -0.7640800476074219, + "logits/rejected": -0.7867538332939148, + "logps/chosen": -361.13751220703125, + "logps/rejected": -334.97967529296875, + "logps/weighted_chosen": -2.5084471702575684, + "logps/weighted_rejected": -3.4689698219299316, + "loss": 0.6531, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -46.098045349121094, + "rewards/margins": 14.188085556030273, + "rewards/rejected": -60.26640701293945, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.36387938261032104, + "rewards/weighted_margins": 0.3567260801792145, + "rewards/weighted_rejected": -0.720538318157196, + "step": 370 + }, + { + "epoch": 0.19890081130594087, + "grad_norm": 22.484216690063477, + "learning_rate": 9.71083876413323e-07, + "logits/chosen": -0.7209137082099915, + "logits/rejected": -0.7318176031112671, + "logps/chosen": -353.6031188964844, + "logps/rejected": -339.16485595703125, + "logps/weighted_chosen": -2.70361328125, + "logps/weighted_rejected": -3.5843749046325684, + "loss": 0.6589, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -51.237892150878906, + "rewards/margins": 18.424999237060547, + "rewards/rejected": -69.64530944824219, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.4475158751010895, + "rewards/weighted_margins": 0.3267761170864105, + "rewards/weighted_rejected": -0.7747405767440796, + "step": 380 + }, + { + "epoch": 0.204135043182413, + "grad_norm": 21.885372161865234, + "learning_rate": 9.67942903840751e-07, + "logits/chosen": -0.7708206176757812, + "logits/rejected": -0.8207153081893921, + "logps/chosen": -355.18438720703125, + "logps/rejected": -350.47186279296875, + "logps/weighted_chosen": -2.8836669921875, + "logps/weighted_rejected": -3.5904297828674316, + "loss": 0.6028, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -50.973045349121094, + "rewards/margins": 25.190038681030273, + "rewards/rejected": -76.1617202758789, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.39097899198532104, + "rewards/weighted_margins": 0.4941650331020355, + "rewards/weighted_rejected": -0.884967029094696, + "step": 390 + }, + { + "epoch": 0.2093692750588851, + "grad_norm": 26.357742309570312, + "learning_rate": 9.646456421395447e-07, + "logits/chosen": -0.805267333984375, + "logits/rejected": -0.8178039789199829, + "logps/chosen": -377.52813720703125, + "logps/rejected": -392.0296936035156, + "logps/weighted_chosen": -2.7947998046875, + "logps/weighted_rejected": -3.697582960128784, + "loss": 0.6296, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -59.2109375, + "rewards/margins": 29.121875762939453, + "rewards/rejected": -88.32890319824219, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.41761475801467896, + "rewards/weighted_margins": 0.38171082735061646, + "rewards/weighted_rejected": -0.7994705438613892, + "step": 400 + }, + { + "epoch": 0.21460350693535724, + "grad_norm": 21.382999420166016, + "learning_rate": 9.611931925683266e-07, + "logits/chosen": -0.7703964114189148, + "logits/rejected": -0.808850109577179, + "logps/chosen": -367.3140563964844, + "logps/rejected": -348.0687561035156, + "logps/weighted_chosen": -2.711962938308716, + "logps/weighted_rejected": -3.4615721702575684, + "loss": 0.5758, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -61.02734375, + "rewards/margins": 23.316797256469727, + "rewards/rejected": -84.34687805175781, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.40519410371780396, + "rewards/weighted_margins": 0.521441638469696, + "rewards/weighted_rejected": -0.9261535406112671, + "step": 410 + }, + { + "epoch": 0.21983773881182936, + "grad_norm": 23.030996322631836, + "learning_rate": 9.575867082172085e-07, + "logits/chosen": -0.7789466977119446, + "logits/rejected": -0.8260132074356079, + "logps/chosen": -372.22344970703125, + "logps/rejected": -367.0171813964844, + "logps/weighted_chosen": -3.114550828933716, + "logps/weighted_rejected": -3.364208936691284, + "loss": 0.6211, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -67.16816711425781, + "rewards/margins": 29.731639862060547, + "rewards/rejected": -96.90156555175781, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5122925043106079, + "rewards/weighted_margins": 0.5218566656112671, + "rewards/weighted_rejected": -1.0339782238006592, + "step": 420 + }, + { + "epoch": 0.22507197068830148, + "grad_norm": 16.442333221435547, + "learning_rate": 9.538273936226673e-07, + "logits/chosen": -0.830523669719696, + "logits/rejected": -0.8667358160018921, + "logps/chosen": -328.4546813964844, + "logps/rejected": -347.9593811035156, + "logps/weighted_chosen": -3.373584032058716, + "logps/weighted_rejected": -3.832958936691284, + "loss": 0.6425, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -62.920310974121094, + "rewards/margins": 20.668750762939453, + "rewards/rejected": -83.5894546508789, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.459890753030777, + "rewards/weighted_margins": 0.39284056425094604, + "rewards/weighted_rejected": -0.8525451421737671, + "step": 430 + }, + { + "epoch": 0.23030620256477363, + "grad_norm": 21.955875396728516, + "learning_rate": 9.499165043652391e-07, + "logits/chosen": -0.8598114252090454, + "logits/rejected": -0.868182361125946, + "logps/chosen": -358.21563720703125, + "logps/rejected": -356.26251220703125, + "logps/weighted_chosen": -3.4171142578125, + "logps/weighted_rejected": -3.6997313499450684, + "loss": 0.624, + "rewards/accuracies": 0.625, + "rewards/chosen": -66.96504211425781, + "rewards/margins": 22.563282012939453, + "rewards/rejected": -89.5503921508789, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.5841079950332642, + "rewards/weighted_margins": 0.434326171875, + "rewards/weighted_rejected": -1.018707275390625, + "step": 440 + }, + { + "epoch": 0.23554043444124576, + "grad_norm": 75.56902313232422, + "learning_rate": 9.458553466501665e-07, + "logits/chosen": -0.9330536127090454, + "logits/rejected": -0.9642333984375, + "logps/chosen": -352.6187438964844, + "logps/rejected": -336.0218811035156, + "logps/weighted_chosen": -3.4129395484924316, + "logps/weighted_rejected": -3.74462890625, + "loss": 0.6566, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -66.412109375, + "rewards/margins": 25.757617950439453, + "rewards/rejected": -92.181640625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.689007580280304, + "rewards/weighted_margins": 0.4539245665073395, + "rewards/weighted_rejected": -1.143212914466858, + "step": 450 + }, + { + "epoch": 0.24077466631771788, + "grad_norm": 19.516427993774414, + "learning_rate": 9.416452768711366e-07, + "logits/chosen": -0.945111095905304, + "logits/rejected": -0.9787231683731079, + "logps/chosen": -369.3671875, + "logps/rejected": -358.9624938964844, + "logps/weighted_chosen": -3.1959471702575684, + "logps/weighted_rejected": -3.948193311691284, + "loss": 0.6392, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -77.17265319824219, + "rewards/margins": 21.916015625, + "rewards/rejected": -99.0796890258789, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.67822265625, + "rewards/weighted_margins": 0.532788097858429, + "rewards/weighted_rejected": -1.2112305164337158, + "step": 460 + }, + { + "epoch": 0.24600889819419, + "grad_norm": 19.182979583740234, + "learning_rate": 9.372877011572557e-07, + "logits/chosen": -0.9224609136581421, + "logits/rejected": -0.9388214349746704, + "logps/chosen": -391.6937561035156, + "logps/rejected": -377.0625, + "logps/weighted_chosen": -3.224560499191284, + "logps/weighted_rejected": -3.783252000808716, + "loss": 0.6162, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -80.95976257324219, + "rewards/margins": 21.617578506469727, + "rewards/rejected": -102.59883117675781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.684155285358429, + "rewards/weighted_margins": 0.5555480718612671, + "rewards/weighted_rejected": -1.23956298828125, + "step": 470 + }, + { + "epoch": 0.2512431300706621, + "grad_norm": 31.75469970703125, + "learning_rate": 9.327840749034141e-07, + "logits/chosen": -0.969561755657196, + "logits/rejected": -0.998791515827179, + "logps/chosen": -362.1859436035156, + "logps/rejected": -385.29998779296875, + "logps/weighted_chosen": -3.0771241188049316, + "logps/weighted_rejected": -4.388257026672363, + "loss": 0.6296, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -75.1123046875, + "rewards/margins": 33.66425704956055, + "rewards/rejected": -108.75, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.606555163860321, + "rewards/weighted_margins": 0.612231433391571, + "rewards/weighted_rejected": -1.2182190418243408, + "step": 480 + }, + { + "epoch": 0.2564773619471343, + "grad_norm": 37.024818420410156, + "learning_rate": 9.281359022841965e-07, + "logits/chosen": -0.846588134765625, + "logits/rejected": -0.859790027141571, + "logps/chosen": -352.46405029296875, + "logps/rejected": -355.24688720703125, + "logps/weighted_chosen": -3.219531297683716, + "logps/weighted_rejected": -4.648681640625, + "loss": 0.5897, + "rewards/accuracies": 0.640625, + "rewards/chosen": -77.3949203491211, + "rewards/margins": 32.93046951293945, + "rewards/rejected": -110.32890319824219, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.7288268804550171, + "rewards/weighted_margins": 0.6741577386856079, + "rewards/weighted_rejected": -1.403161644935608, + "step": 490 + }, + { + "epoch": 0.26171159382360637, + "grad_norm": 17.740766525268555, + "learning_rate": 9.233447357514989e-07, + "logits/chosen": -0.8205505609512329, + "logits/rejected": -0.863543689250946, + "logps/chosen": -375.52032470703125, + "logps/rejected": -378.3500061035156, + "logps/weighted_chosen": -3.53125, + "logps/weighted_rejected": -4.106689453125, + "loss": 0.6305, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -77.591796875, + "rewards/margins": 30.978906631469727, + "rewards/rejected": -108.54609680175781, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.6407378911972046, + "rewards/weighted_margins": 0.6662231683731079, + "rewards/weighted_rejected": -1.30645751953125, + "step": 500 + }, + { + "epoch": 0.26171159382360637, + "eval_logits/chosen": -0.9400458931922913, + "eval_logits/rejected": -0.955981433391571, + "eval_logps/chosen": -371.72900390625, + "eval_logps/rejected": -379.6419982910156, + "eval_logps/weighted_chosen": -3.214712381362915, + "eval_logps/weighted_rejected": -4.0158867835998535, + "eval_loss": 0.6316163539886475, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -82.98784637451172, + "eval_rewards/margins": 28.939437866210938, + "eval_rewards/rejected": -111.93875122070312, + "eval_rewards/weighted_accuracies": 0.6725000143051147, + "eval_rewards/weighted_chosen": -0.6669993996620178, + "eval_rewards/weighted_margins": 0.5506796836853027, + "eval_rewards/weighted_rejected": -1.2176789045333862, + "eval_runtime": 1162.5522, + "eval_samples_per_second": 1.72, + "eval_steps_per_second": 0.43, + "step": 500 + }, + { + "epoch": 0.2669458257000785, + "grad_norm": 59.14344787597656, + "learning_rate": 9.184121755160232e-07, + "logits/chosen": -0.9093383550643921, + "logits/rejected": -0.9390915036201477, + "logps/chosen": -378.3890686035156, + "logps/rejected": -401.62811279296875, + "logps/weighted_chosen": -3.4715576171875, + "logps/weighted_rejected": -4.080712795257568, + "loss": 0.6505, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -77.12968444824219, + "rewards/margins": 35.939842224121094, + "rewards/rejected": -113.0718765258789, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.6525024175643921, + "rewards/weighted_margins": 0.52886962890625, + "rewards/weighted_rejected": -1.1813843250274658, + "step": 510 + }, + { + "epoch": 0.2721800575765506, + "grad_norm": 33.64823913574219, + "learning_rate": 9.133398690128193e-07, + "logits/chosen": -0.942626953125, + "logits/rejected": -0.965716540813446, + "logps/chosen": -400.1890563964844, + "logps/rejected": -403.63592529296875, + "logps/weighted_chosen": -3.2196044921875, + "logps/weighted_rejected": -4.1656494140625, + "loss": 0.6062, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -78.64530944824219, + "rewards/margins": 44.30859375, + "rewards/rejected": -122.9917984008789, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -0.48695677518844604, + "rewards/weighted_margins": 0.6421569585800171, + "rewards/weighted_rejected": -1.129034399986267, + "step": 520 + }, + { + "epoch": 0.27741428945302277, + "grad_norm": 36.97419738769531, + "learning_rate": 9.081295103510554e-07, + "logits/chosen": -0.9060531854629517, + "logits/rejected": -0.954547107219696, + "logps/chosen": -377.2640686035156, + "logps/rejected": -402.17498779296875, + "logps/weighted_chosen": -2.8717284202575684, + "logps/weighted_rejected": -4.353662014007568, + "loss": 0.5138, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -93.2210922241211, + "rewards/margins": 39.227149963378906, + "rewards/rejected": -132.44140625, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -0.5675109624862671, + "rewards/weighted_margins": 0.93292236328125, + "rewards/weighted_rejected": -1.501245141029358, + "step": 530 + }, + { + "epoch": 0.2826485213294949, + "grad_norm": 23.371498107910156, + "learning_rate": 9.027828397481989e-07, + "logits/chosen": -0.8855453729629517, + "logits/rejected": -0.911120593547821, + "logps/chosen": -357.30078125, + "logps/rejected": -385.8531188964844, + "logps/weighted_chosen": -3.6192626953125, + "logps/weighted_rejected": -4.450634956359863, + "loss": 0.6284, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -96.7691421508789, + "rewards/margins": 33.722267150878906, + "rewards/rejected": -130.5457000732422, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.788897693157196, + "rewards/weighted_margins": 0.5829833745956421, + "rewards/weighted_rejected": -1.3713562488555908, + "step": 540 + }, + { + "epoch": 0.287882753205967, + "grad_norm": 21.5579833984375, + "learning_rate": 8.973016429487988e-07, + "logits/chosen": -0.9223998785018921, + "logits/rejected": -0.9320526123046875, + "logps/chosen": -376.43438720703125, + "logps/rejected": -393.18438720703125, + "logps/weighted_chosen": -3.503002882003784, + "logps/weighted_rejected": -3.9817872047424316, + "loss": 0.6093, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -94.5347671508789, + "rewards/margins": 44.30546951293945, + "rewards/rejected": -138.84414672851562, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.7487853765487671, + "rewards/weighted_margins": 0.62567138671875, + "rewards/weighted_rejected": -1.3746826648712158, + "step": 550 + }, + { + "epoch": 0.29311698508243916, + "grad_norm": 55.23554992675781, + "learning_rate": 8.916877506280601e-07, + "logits/chosen": -0.969989001750946, + "logits/rejected": -0.9665802121162415, + "logps/chosen": -383.078125, + "logps/rejected": -397.75, + "logps/weighted_chosen": -3.5821290016174316, + "logps/weighted_rejected": -4.008593559265137, + "loss": 0.6282, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -102.19023132324219, + "rewards/margins": 41.46562576293945, + "rewards/rejected": -143.642578125, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.7835754156112671, + "rewards/weighted_margins": 0.644238293170929, + "rewards/weighted_rejected": -1.427978515625, + "step": 560 + }, + { + "epoch": 0.29835121695891126, + "grad_norm": 18.939470291137695, + "learning_rate": 8.85943037780415e-07, + "logits/chosen": -1.0394058227539062, + "logits/rejected": -1.042639136314392, + "logps/chosen": -384.078125, + "logps/rejected": -366.9984436035156, + "logps/weighted_chosen": -3.3567872047424316, + "logps/weighted_rejected": -3.9019775390625, + "loss": 0.5908, + "rewards/accuracies": 0.625, + "rewards/chosen": -97.7894515991211, + "rewards/margins": 27.4404296875, + "rewards/rejected": -125.2328109741211, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.7109375, + "rewards/weighted_margins": 0.566577136516571, + "rewards/weighted_rejected": -1.2770659923553467, + "step": 570 + }, + { + "epoch": 0.3035854488353834, + "grad_norm": 54.97072982788086, + "learning_rate": 8.800694230932884e-07, + "logits/chosen": -0.958050549030304, + "logits/rejected": -0.9729766845703125, + "logps/chosen": -381.4140625, + "logps/rejected": -385.4296875, + "logps/weighted_chosen": -2.9930176734924316, + "logps/weighted_rejected": -3.6538329124450684, + "loss": 0.6283, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -91.8843765258789, + "rewards/margins": 28.43359375, + "rewards/rejected": -120.30078125, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5365799069404602, + "rewards/weighted_margins": 0.4689392149448395, + "rewards/weighted_rejected": -1.005883812904358, + "step": 580 + }, + { + "epoch": 0.30881968071185556, + "grad_norm": 187.63499450683594, + "learning_rate": 8.740688683062723e-07, + "logits/chosen": -1.0116729736328125, + "logits/rejected": -1.0273834466934204, + "logps/chosen": -427.6859436035156, + "logps/rejected": -400.0640563964844, + "logps/weighted_chosen": -2.997143507003784, + "logps/weighted_rejected": -3.7496094703674316, + "loss": 0.6363, + "rewards/accuracies": 0.578125, + "rewards/chosen": -98.6539077758789, + "rewards/margins": 25.619726181030273, + "rewards/rejected": -124.2671890258789, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.539746105670929, + "rewards/weighted_margins": 0.48472291231155396, + "rewards/weighted_rejected": -1.023950219154358, + "step": 590 + }, + { + "epoch": 0.31405391258832765, + "grad_norm": 17.906530380249023, + "learning_rate": 8.679433775559215e-07, + "logits/chosen": -0.991503894329071, + "logits/rejected": -1.0299193859100342, + "logps/chosen": -428.88592529296875, + "logps/rejected": -425.1625061035156, + "logps/weighted_chosen": -2.877368211746216, + "logps/weighted_rejected": -3.985302686691284, + "loss": 0.5932, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -104.1353530883789, + "rewards/margins": 35.59746170043945, + "rewards/rejected": -139.7765655517578, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.6103607416152954, + "rewards/weighted_margins": 0.5564330816268921, + "rewards/weighted_rejected": -1.166479468345642, + "step": 600 + }, + { + "epoch": 0.3192881444647998, + "grad_norm": 33.20716094970703, + "learning_rate": 8.616949967063871e-07, + "logits/chosen": -0.9755920171737671, + "logits/rejected": -1.0198791027069092, + "logps/chosen": -370.45001220703125, + "logps/rejected": -400.4906311035156, + "logps/weighted_chosen": -3.3270506858825684, + "logps/weighted_rejected": -3.9315428733825684, + "loss": 0.6822, + "rewards/accuracies": 0.65625, + "rewards/chosen": -102.12422180175781, + "rewards/margins": 37.60078048706055, + "rewards/rejected": -139.7734375, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.7267090082168579, + "rewards/weighted_margins": 0.43719482421875, + "rewards/weighted_rejected": -1.163793921470642, + "step": 610 + }, + { + "epoch": 0.3245223763412719, + "grad_norm": 13.562949180603027, + "learning_rate": 8.553258126661154e-07, + "logits/chosen": -1.00177001953125, + "logits/rejected": -1.010014295578003, + "logps/chosen": -390.65234375, + "logps/rejected": -402.65155029296875, + "logps/weighted_chosen": -3.4473876953125, + "logps/weighted_rejected": -4.26806640625, + "loss": 0.6605, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -110.80390930175781, + "rewards/margins": 35.68242263793945, + "rewards/rejected": -146.5031280517578, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.7691070437431335, + "rewards/weighted_margins": 0.534271240234375, + "rewards/weighted_rejected": -1.3035888671875, + "step": 620 + }, + { + "epoch": 0.32975660821774405, + "grad_norm": 29.253734588623047, + "learning_rate": 8.488379526908368e-07, + "logits/chosen": -0.978869616985321, + "logits/rejected": -0.9867492914199829, + "logps/chosen": -406.9437561035156, + "logps/rejected": -431.9906311035156, + "logps/weighted_chosen": -3.346386671066284, + "logps/weighted_rejected": -4.080664157867432, + "loss": 0.5991, + "rewards/accuracies": 0.65625, + "rewards/chosen": -116.3648452758789, + "rewards/margins": 46.68730545043945, + "rewards/rejected": -163.06405639648438, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.757556140422821, + "rewards/weighted_margins": 0.597582995891571, + "rewards/weighted_rejected": -1.3551514148712158, + "step": 630 + }, + { + "epoch": 0.33499084009421615, + "grad_norm": 28.59862518310547, + "learning_rate": 8.422335836730802e-07, + "logits/chosen": -0.983142077922821, + "logits/rejected": -0.9791107177734375, + "logps/chosen": -378.4984436035156, + "logps/rejected": -432.71563720703125, + "logps/weighted_chosen": -3.094311475753784, + "logps/weighted_rejected": -3.8768067359924316, + "loss": 0.6061, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -104.8597640991211, + "rewards/margins": 50.2001953125, + "rewards/rejected": -155.0695343017578, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -0.6736419796943665, + "rewards/weighted_margins": 0.5768188238143921, + "rewards/weighted_rejected": -1.250451683998108, + "step": 640 + }, + { + "epoch": 0.3402250719706883, + "grad_norm": 20.690876007080078, + "learning_rate": 8.355149114184485e-07, + "logits/chosen": -1.030615210533142, + "logits/rejected": -1.0146636962890625, + "logps/chosen": -416.39373779296875, + "logps/rejected": -445.0625, + "logps/weighted_chosen": -3.255859375, + "logps/weighted_rejected": -3.76806640625, + "loss": 0.6048, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -108.90547180175781, + "rewards/margins": 53.869140625, + "rewards/rejected": -162.74258422851562, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.700115978717804, + "rewards/weighted_margins": 0.5948852300643921, + "rewards/weighted_rejected": -1.2950623035430908, + "step": 650 + }, + { + "epoch": 0.34545930384716045, + "grad_norm": 33.73557662963867, + "learning_rate": 8.286841799088963e-07, + "logits/chosen": -1.0694351196289062, + "logits/rejected": -1.0623047351837158, + "logps/chosen": -395.19219970703125, + "logps/rejected": -412.45001220703125, + "logps/weighted_chosen": -2.8808836936950684, + "logps/weighted_rejected": -3.636523485183716, + "loss": 0.6207, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -111.27070617675781, + "rewards/margins": 38.04765701293945, + "rewards/rejected": -149.3312530517578, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.649340808391571, + "rewards/weighted_margins": 0.5112365484237671, + "rewards/weighted_rejected": -1.1605103015899658, + "step": 660 + }, + { + "epoch": 0.35069353572363254, + "grad_norm": 18.913761138916016, + "learning_rate": 8.217436705532599e-07, + "logits/chosen": -1.0736572742462158, + "logits/rejected": -1.090576171875, + "logps/chosen": -431.78125, + "logps/rejected": -421.4312438964844, + "logps/weighted_chosen": -3.0259766578674316, + "logps/weighted_rejected": -3.81298828125, + "loss": 0.6004, + "rewards/accuracies": 0.640625, + "rewards/chosen": -129.1144561767578, + "rewards/margins": 28.649608612060547, + "rewards/rejected": -157.74844360351562, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.6614929437637329, + "rewards/weighted_margins": 0.636364758014679, + "rewards/weighted_rejected": -1.2976195812225342, + "step": 670 + }, + { + "epoch": 0.3559277676001047, + "grad_norm": 22.967056274414062, + "learning_rate": 8.14695701425284e-07, + "logits/chosen": -1.051629662513733, + "logits/rejected": -1.0860717296600342, + "logps/chosen": -432.98126220703125, + "logps/rejected": -426.5953063964844, + "logps/weighted_chosen": -3.1195311546325684, + "logps/weighted_rejected": -3.96337890625, + "loss": 0.5827, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -127.5308609008789, + "rewards/margins": 36.908592224121094, + "rewards/rejected": -164.46484375, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.686236560344696, + "rewards/weighted_margins": 0.6052306890487671, + "rewards/weighted_rejected": -1.2908813953399658, + "step": 680 + }, + { + "epoch": 0.3611619994765768, + "grad_norm": 15.409049034118652, + "learning_rate": 8.075426264894046e-07, + "logits/chosen": -1.006170630455017, + "logits/rejected": -1.0271179676055908, + "logps/chosen": -434.3828125, + "logps/rejected": -452.90625, + "logps/weighted_chosen": -3.187304735183716, + "logps/weighted_rejected": -4.466699123382568, + "loss": 0.5501, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -128.9619140625, + "rewards/margins": 46.51679611206055, + "rewards/rejected": -175.45468139648438, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -0.6887573003768921, + "rewards/weighted_margins": 0.76141357421875, + "rewards/weighted_rejected": -1.4498474597930908, + "step": 690 + }, + { + "epoch": 0.36639623135304894, + "grad_norm": 23.13039207458496, + "learning_rate": 8.002868348145435e-07, + "logits/chosen": -0.9920509457588196, + "logits/rejected": -0.996777355670929, + "logps/chosen": -439.54998779296875, + "logps/rejected": -444.30780029296875, + "logps/weighted_chosen": -3.31005859375, + "logps/weighted_rejected": -3.691967725753784, + "loss": 0.6102, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -139.38632202148438, + "rewards/margins": 36.769142150878906, + "rewards/rejected": -176.1867218017578, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.8051727414131165, + "rewards/weighted_margins": 0.5639587640762329, + "rewards/weighted_rejected": -1.3695800304412842, + "step": 700 + }, + { + "epoch": 0.3716304632295211, + "grad_norm": 38.38508224487305, + "learning_rate": 7.92930749776179e-07, + "logits/chosen": -1.0172607898712158, + "logits/rejected": -1.042083740234375, + "logps/chosen": -422.10546875, + "logps/rejected": -444.6812438964844, + "logps/weighted_chosen": -3.483569383621216, + "logps/weighted_rejected": -4.216650485992432, + "loss": 0.6316, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -150.42578125, + "rewards/margins": 33.904685974121094, + "rewards/rejected": -184.33438110351562, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.72515869140625, + "rewards/weighted_margins": 0.590161144733429, + "rewards/weighted_rejected": -1.3153502941131592, + "step": 710 + }, + { + "epoch": 0.3768646951059932, + "grad_norm": 20.4711971282959, + "learning_rate": 7.854768282469582e-07, + "logits/chosen": -1.0770995616912842, + "logits/rejected": -1.114935278892517, + "logps/chosen": -401.88592529296875, + "logps/rejected": -458.3374938964844, + "logps/weighted_chosen": -3.099194288253784, + "logps/weighted_rejected": -3.990966796875, + "loss": 0.5978, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -124.43046569824219, + "rewards/margins": 56.217384338378906, + "rewards/rejected": -180.6015625, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.6303802728652954, + "rewards/weighted_margins": 0.679125964641571, + "rewards/weighted_rejected": -1.3104156255722046, + "step": 720 + }, + { + "epoch": 0.38209892698246534, + "grad_norm": 15.2982177734375, + "learning_rate": 7.779275597761215e-07, + "logits/chosen": -1.0406615734100342, + "logits/rejected": -1.097131371498108, + "logps/chosen": -414.5625, + "logps/rejected": -462.5453186035156, + "logps/weighted_chosen": -3.3689942359924316, + "logps/weighted_rejected": -4.090185642242432, + "loss": 0.5689, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -138.33358764648438, + "rewards/margins": 55.842185974121094, + "rewards/rejected": -194.16171264648438, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -0.6730102300643921, + "rewards/weighted_margins": 0.699902355670929, + "rewards/weighted_rejected": -1.373620629310608, + "step": 730 + }, + { + "epoch": 0.38733315885893743, + "grad_norm": 37.01581954956055, + "learning_rate": 7.702854657580126e-07, + "logits/chosen": -1.1022522449493408, + "logits/rejected": -1.1134154796600342, + "logps/chosen": -459.6156311035156, + "logps/rejected": -462.4125061035156, + "logps/weighted_chosen": -3.382946729660034, + "logps/weighted_rejected": -4.401709079742432, + "loss": 0.6148, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -164.8722686767578, + "rewards/margins": 43.28515625, + "rewards/rejected": -208.21133422851562, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.8088958859443665, + "rewards/weighted_margins": 0.698986828327179, + "rewards/weighted_rejected": -1.5074951648712158, + "step": 740 + }, + { + "epoch": 0.3925673907354096, + "grad_norm": 26.924480438232422, + "learning_rate": 7.625530985899547e-07, + "logits/chosen": -1.0611861944198608, + "logits/rejected": -1.075714111328125, + "logps/chosen": -430.6312561035156, + "logps/rejected": -461.9828186035156, + "logps/weighted_chosen": -3.210217237472534, + "logps/weighted_rejected": -4.483691215515137, + "loss": 0.5815, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -163.78555297851562, + "rewards/margins": 53.288673400878906, + "rewards/rejected": -216.96054077148438, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.876666247844696, + "rewards/weighted_margins": 0.7388671636581421, + "rewards/weighted_rejected": -1.6160767078399658, + "step": 750 + }, + { + "epoch": 0.39780162261188173, + "grad_norm": 18.056201934814453, + "learning_rate": 7.547330408197694e-07, + "logits/chosen": -1.0437713861465454, + "logits/rejected": -1.075250267982483, + "logps/chosen": -460.1875, + "logps/rejected": -450.8109436035156, + "logps/weighted_chosen": -3.229296922683716, + "logps/weighted_rejected": -4.191064357757568, + "loss": 0.6146, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -164.56640625, + "rewards/margins": 29.973241806030273, + "rewards/rejected": -194.6171875, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.7951415777206421, + "rewards/weighted_margins": 0.599578857421875, + "rewards/weighted_rejected": -1.395105004310608, + "step": 760 + }, + { + "epoch": 0.40303585448835383, + "grad_norm": 16.393836975097656, + "learning_rate": 7.468279042832271e-07, + "logits/chosen": -1.0454833507537842, + "logits/rejected": -1.0705687999725342, + "logps/chosen": -416.57421875, + "logps/rejected": -483.1390686035156, + "logps/weighted_chosen": -3.134448289871216, + "logps/weighted_rejected": -3.8697752952575684, + "loss": 0.6132, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -136.8953094482422, + "rewards/margins": 56.886329650878906, + "rewards/rejected": -193.72305297851562, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.7508605718612671, + "rewards/weighted_margins": 0.5269104242324829, + "rewards/weighted_rejected": -1.277490258216858, + "step": 770 + }, + { + "epoch": 0.408270086364826, + "grad_norm": 16.254776000976562, + "learning_rate": 7.388403292317154e-07, + "logits/chosen": -1.0213134288787842, + "logits/rejected": -1.072851538658142, + "logps/chosen": -445.00469970703125, + "logps/rejected": -451.1468811035156, + "logps/weighted_chosen": -3.063525438308716, + "logps/weighted_rejected": -3.897021532058716, + "loss": 0.6134, + "rewards/accuracies": 0.609375, + "rewards/chosen": -144.0207061767578, + "rewards/margins": 41.165626525878906, + "rewards/rejected": -185.15585327148438, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.752685546875, + "rewards/weighted_margins": 0.5895751714706421, + "rewards/weighted_rejected": -1.341894507408142, + "step": 780 + }, + { + "epoch": 0.4135043182412981, + "grad_norm": 19.472450256347656, + "learning_rate": 7.307729834504154e-07, + "logits/chosen": -1.052435278892517, + "logits/rejected": -1.10076904296875, + "logps/chosen": -457.2640686035156, + "logps/rejected": -488.16876220703125, + "logps/weighted_chosen": -3.143115282058716, + "logps/weighted_rejected": -4.254638671875, + "loss": 0.6137, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -175.60116577148438, + "rewards/margins": 46.61640548706055, + "rewards/rejected": -222.21328735351562, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.878021240234375, + "rewards/weighted_margins": 0.650500476360321, + "rewards/weighted_rejected": -1.5286986827850342, + "step": 790 + }, + { + "epoch": 0.4187385501177702, + "grad_norm": 24.911523818969727, + "learning_rate": 7.226285613672847e-07, + "logits/chosen": -1.0021483898162842, + "logits/rejected": -1.031951904296875, + "logps/chosen": -466.4765625, + "logps/rejected": -547.578125, + "logps/weighted_chosen": -3.3023438453674316, + "logps/weighted_rejected": -4.483691215515137, + "loss": 0.6142, + "rewards/accuracies": 0.640625, + "rewards/chosen": -189.1164093017578, + "rewards/margins": 79.8050765991211, + "rewards/rejected": -268.85467529296875, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.978710949420929, + "rewards/weighted_margins": 0.6761840581893921, + "rewards/weighted_rejected": -1.6549804210662842, + "step": 800 + }, + { + "epoch": 0.4239727819942423, + "grad_norm": 16.060869216918945, + "learning_rate": 7.144097831531398e-07, + "logits/chosen": -0.978619396686554, + "logits/rejected": -1.0038635730743408, + "logps/chosen": -456.95623779296875, + "logps/rejected": -505.2093811035156, + "logps/weighted_chosen": -3.171826124191284, + "logps/weighted_rejected": -4.205761909484863, + "loss": 0.5645, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -186.97421264648438, + "rewards/margins": 54.0078125, + "rewards/rejected": -240.85311889648438, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.788342297077179, + "rewards/weighted_margins": 0.7865844964981079, + "rewards/weighted_rejected": -1.574896216392517, + "step": 810 + }, + { + "epoch": 0.42920701387071447, + "grad_norm": 23.292619705200195, + "learning_rate": 7.061193938131396e-07, + "logits/chosen": -0.9266418218612671, + "logits/rejected": -0.9759277105331421, + "logps/chosen": -492.421875, + "logps/rejected": -497.2515563964844, + "logps/weighted_chosen": -3.4576172828674316, + "logps/weighted_rejected": -4.116650581359863, + "loss": 0.5666, + "rewards/accuracies": 0.578125, + "rewards/chosen": -192.64022827148438, + "rewards/margins": 39.222267150878906, + "rewards/rejected": -231.8562469482422, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.797924816608429, + "rewards/weighted_margins": 0.775280773639679, + "rewards/weighted_rejected": -1.573089599609375, + "step": 820 + }, + { + "epoch": 0.4344412457471866, + "grad_norm": 23.763275146484375, + "learning_rate": 6.977601622699789e-07, + "logits/chosen": -0.9908691644668579, + "logits/rejected": -1.057653784751892, + "logps/chosen": -459.80157470703125, + "logps/rejected": -544.8687744140625, + "logps/weighted_chosen": -3.350512742996216, + "logps/weighted_rejected": -4.385839939117432, + "loss": 0.5077, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -175.025390625, + "rewards/margins": 90.8042984008789, + "rewards/rejected": -265.86798095703125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -0.699688732624054, + "rewards/weighted_margins": 0.9197998046875, + "rewards/weighted_rejected": -1.61993408203125, + "step": 830 + }, + { + "epoch": 0.4396754776236587, + "grad_norm": 29.07372283935547, + "learning_rate": 6.893348804390882e-07, + "logits/chosen": -1.094964623451233, + "logits/rejected": -1.1045074462890625, + "logps/chosen": -521.2327880859375, + "logps/rejected": -545.9468994140625, + "logps/weighted_chosen": -3.6615967750549316, + "logps/weighted_rejected": -4.320361137390137, + "loss": 0.5747, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -225.86563110351562, + "rewards/margins": 60.388671875, + "rewards/rejected": -286.2398376464844, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -0.912017822265625, + "rewards/weighted_margins": 0.748242199420929, + "rewards/weighted_rejected": -1.660058617591858, + "step": 840 + }, + { + "epoch": 0.44490970950013087, + "grad_norm": 23.878381729125977, + "learning_rate": 6.808463622961578e-07, + "logits/chosen": -1.0891234874725342, + "logits/rejected": -1.1309936046600342, + "logps/chosen": -545.3046875, + "logps/rejected": -615.7484130859375, + "logps/weighted_chosen": -3.533984422683716, + "logps/weighted_rejected": -4.504004001617432, + "loss": 0.5472, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -252.8015594482422, + "rewards/margins": 81.0531234741211, + "rewards/rejected": -333.59295654296875, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -0.967456042766571, + "rewards/weighted_margins": 0.86865234375, + "rewards/weighted_rejected": -1.835351586341858, + "step": 850 + }, + { + "epoch": 0.45014394137660296, + "grad_norm": 101.535888671875, + "learning_rate": 6.722974429372925e-07, + "logits/chosen": -1.0688354969024658, + "logits/rejected": -1.1046874523162842, + "logps/chosen": -578.9781494140625, + "logps/rejected": -604.3499755859375, + "logps/weighted_chosen": -3.37939453125, + "logps/weighted_rejected": -5.040380954742432, + "loss": 0.5051, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -275.2124938964844, + "rewards/margins": 66.9710922241211, + "rewards/rejected": -342.20001220703125, + "rewards/weighted_accuracies": 0.7749999761581421, + "rewards/weighted_chosen": -1.086279273033142, + "rewards/weighted_margins": 1.05950927734375, + "rewards/weighted_rejected": -2.1461181640625, + "step": 860 + }, + { + "epoch": 0.4553781732530751, + "grad_norm": 66.56680297851562, + "learning_rate": 6.636909776321128e-07, + "logits/chosen": -1.1214478015899658, + "logits/rejected": -1.11016845703125, + "logps/chosen": -493.3843688964844, + "logps/rejected": -590.8125, + "logps/weighted_chosen": -3.670654296875, + "logps/weighted_rejected": -4.723730564117432, + "loss": 0.5107, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -231.6789093017578, + "rewards/margins": 94.42109680175781, + "rewards/rejected": -326.2250061035156, + "rewards/weighted_accuracies": 0.746874988079071, + "rewards/weighted_chosen": -1.0464904308319092, + "rewards/weighted_margins": 0.9440551996231079, + "rewards/weighted_rejected": -1.99102783203125, + "step": 870 + }, + { + "epoch": 0.46061240512954726, + "grad_norm": 23.494997024536133, + "learning_rate": 6.550298408701174e-07, + "logits/chosen": -1.094885230064392, + "logits/rejected": -1.1415894031524658, + "logps/chosen": -534.1663818359375, + "logps/rejected": -621.9547119140625, + "logps/weighted_chosen": -3.794140577316284, + "logps/weighted_rejected": -5.148291110992432, + "loss": 0.5174, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -247.2595672607422, + "rewards/margins": 92.109375, + "rewards/rejected": -339.3968811035156, + "rewards/weighted_accuracies": 0.753125011920929, + "rewards/weighted_chosen": -0.9449707269668579, + "rewards/weighted_margins": 0.938586413860321, + "rewards/weighted_rejected": -1.8829224109649658, + "step": 880 + }, + { + "epoch": 0.46584663700601936, + "grad_norm": 27.359371185302734, + "learning_rate": 6.463169254006276e-07, + "logits/chosen": -1.1160705089569092, + "logits/rejected": -1.157629370689392, + "logps/chosen": -538.1109619140625, + "logps/rejected": -562.7062377929688, + "logps/weighted_chosen": -3.73779296875, + "logps/weighted_rejected": -4.817724704742432, + "loss": 0.5154, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -256.6499938964844, + "rewards/margins": 59.66523361206055, + "rewards/rejected": -316.37225341796875, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.019891381263733, + "rewards/weighted_margins": 1.008874535560608, + "rewards/weighted_rejected": -2.028430223464966, + "step": 890 + }, + { + "epoch": 0.4710808688824915, + "grad_norm": 20.988025665283203, + "learning_rate": 6.375551412666326e-07, + "logits/chosen": -1.0879943370819092, + "logits/rejected": -1.1199951171875, + "logps/chosen": -514.9187622070312, + "logps/rejected": -545.7906494140625, + "logps/weighted_chosen": -3.4129395484924316, + "logps/weighted_rejected": -4.592138767242432, + "loss": 0.6113, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -240.97891235351562, + "rewards/margins": 48.454689025878906, + "rewards/rejected": -289.4117126464844, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.1000854969024658, + "rewards/weighted_margins": 0.7867187261581421, + "rewards/weighted_rejected": -1.887121558189392, + "step": 900 + }, + { + "epoch": 0.4763151007589636, + "grad_norm": 29.907148361206055, + "learning_rate": 6.287474148328583e-07, + "logits/chosen": -1.0193603038787842, + "logits/rejected": -0.9993133544921875, + "logps/chosen": -474.3374938964844, + "logps/rejected": -501.23748779296875, + "logps/weighted_chosen": -3.7416014671325684, + "logps/weighted_rejected": -5.238329887390137, + "loss": 0.5727, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -203.36563110351562, + "rewards/margins": 50.95781326293945, + "rewards/rejected": -254.2734375, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.043182373046875, + "rewards/weighted_margins": 0.8107665777206421, + "rewards/weighted_rejected": -1.8539307117462158, + "step": 910 + }, + { + "epoch": 0.48154933263543576, + "grad_norm": 36.32797622680664, + "learning_rate": 6.198966878083857e-07, + "logits/chosen": -1.0350799560546875, + "logits/rejected": -1.0523681640625, + "logps/chosen": -488.9765625, + "logps/rejected": -553.484375, + "logps/weighted_chosen": -3.7232666015625, + "logps/weighted_rejected": -4.598974704742432, + "loss": 0.5581, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -217.74844360351562, + "rewards/margins": 68.5667953491211, + "rewards/rejected": -286.3515625, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -1.01214599609375, + "rewards/weighted_margins": 0.775787353515625, + "rewards/weighted_rejected": -1.787988305091858, + "step": 920 + }, + { + "epoch": 0.48678356451190785, + "grad_norm": 40.16273880004883, + "learning_rate": 6.110059162641439e-07, + "logits/chosen": -1.0597412586212158, + "logits/rejected": -1.0781066417694092, + "logps/chosen": -513.3226318359375, + "logps/rejected": -559.4593505859375, + "logps/weighted_chosen": -3.147705078125, + "logps/weighted_rejected": -4.214404106140137, + "loss": 0.5481, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -231.1085968017578, + "rewards/margins": 61.108985900878906, + "rewards/rejected": -292.21954345703125, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -0.8689819574356079, + "rewards/weighted_margins": 0.7522827386856079, + "rewards/weighted_rejected": -1.6212646961212158, + "step": 930 + }, + { + "epoch": 0.49201779638838, + "grad_norm": 29.65454864501953, + "learning_rate": 6.020780696456059e-07, + "logits/chosen": -1.072198510169983, + "logits/rejected": -1.104650855064392, + "logps/chosen": -511.18280029296875, + "logps/rejected": -601.0250244140625, + "logps/weighted_chosen": -3.1954102516174316, + "logps/weighted_rejected": -4.517724514007568, + "loss": 0.5407, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -240.52108764648438, + "rewards/margins": 93.419921875, + "rewards/rejected": -333.953125, + "rewards/weighted_accuracies": 0.731249988079071, + "rewards/weighted_chosen": -1.0927855968475342, + "rewards/weighted_margins": 0.8496459722518921, + "rewards/weighted_rejected": -1.942968726158142, + "step": 940 + }, + { + "epoch": 0.49725202826485215, + "grad_norm": 210.50332641601562, + "learning_rate": 5.931161297810185e-07, + "logits/chosen": -1.132635474205017, + "logits/rejected": -1.1451904773712158, + "logps/chosen": -574.6031494140625, + "logps/rejected": -629.4656372070312, + "logps/weighted_chosen": -4.126172065734863, + "logps/weighted_rejected": -5.016747951507568, + "loss": 0.5998, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -301.52264404296875, + "rewards/margins": 66.5218734741211, + "rewards/rejected": -368.080078125, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -1.307519555091858, + "rewards/weighted_margins": 0.74249267578125, + "rewards/weighted_rejected": -2.0491180419921875, + "step": 950 + }, + { + "epoch": 0.5024862601413242, + "grad_norm": 113.20726013183594, + "learning_rate": 5.841230898854959e-07, + "logits/chosen": -1.070550560951233, + "logits/rejected": -1.0872802734375, + "logps/chosen": -652.0281372070312, + "logps/rejected": -711.1765747070312, + "logps/weighted_chosen": -3.9981932640075684, + "logps/weighted_rejected": -5.147070407867432, + "loss": 0.6329, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -351.828125, + "rewards/margins": 97.4195327758789, + "rewards/rejected": -449.2261657714844, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.564361572265625, + "rewards/weighted_margins": 0.872241199016571, + "rewards/weighted_rejected": -2.436718702316284, + "step": 960 + }, + { + "epoch": 0.5077204920177963, + "grad_norm": 22.200820922851562, + "learning_rate": 5.751019535613102e-07, + "logits/chosen": -0.987274169921875, + "logits/rejected": -1.0052611827850342, + "logps/chosen": -531.3624877929688, + "logps/rejected": -610.1218872070312, + "logps/weighted_chosen": -3.8460450172424316, + "logps/weighted_rejected": -5.256982326507568, + "loss": 0.5675, + "rewards/accuracies": 0.65625, + "rewards/chosen": -271.6910095214844, + "rewards/margins": 91.53633117675781, + "rewards/rejected": -363.22344970703125, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -1.357843041419983, + "rewards/weighted_margins": 1.01031494140625, + "rewards/weighted_rejected": -2.367443799972534, + "step": 970 + }, + { + "epoch": 0.5129547238942685, + "grad_norm": 17.362323760986328, + "learning_rate": 5.660557337947117e-07, + "logits/chosen": -0.9707549810409546, + "logits/rejected": -0.983325183391571, + "logps/chosen": -549.1953125, + "logps/rejected": -586.7874755859375, + "logps/weighted_chosen": -3.279223680496216, + "logps/weighted_rejected": -4.507519721984863, + "loss": 0.5466, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -254.6687469482422, + "rewards/margins": 74.56758117675781, + "rewards/rejected": -329.2632751464844, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -1.0866820812225342, + "rewards/weighted_margins": 0.8655151128768921, + "rewards/weighted_rejected": -1.9528076648712158, + "step": 980 + }, + { + "epoch": 0.5181889557707406, + "grad_norm": 39.01738739013672, + "learning_rate": 5.569874519496174e-07, + "logits/chosen": -0.963134765625, + "logits/rejected": -1.01763916015625, + "logps/chosen": -488.7406311035156, + "logps/rejected": -554.3687744140625, + "logps/weighted_chosen": -3.665576219558716, + "logps/weighted_rejected": -4.876318454742432, + "loss": 0.5929, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -213.4460906982422, + "rewards/margins": 73.8890609741211, + "rewards/rejected": -287.3500061035156, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.959338366985321, + "rewards/weighted_margins": 0.7813965082168579, + "rewards/weighted_rejected": -1.740045189857483, + "step": 990 + }, + { + "epoch": 0.5234231876472127, + "grad_norm": 33.2608642578125, + "learning_rate": 5.47900136758499e-07, + "logits/chosen": -0.9298340082168579, + "logits/rejected": -0.989898681640625, + "logps/chosen": -527.16015625, + "logps/rejected": -566.2453002929688, + "logps/weighted_chosen": -3.71044921875, + "logps/weighted_rejected": -4.887304782867432, + "loss": 0.5395, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -259.56170654296875, + "rewards/margins": 64.66015625, + "rewards/rejected": -324.302734375, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.119836449623108, + "rewards/weighted_margins": 0.944445788860321, + "rewards/weighted_rejected": -2.063854932785034, + "step": 1000 + }, + { + "epoch": 0.5234231876472127, + "eval_logits/chosen": -1.0472733974456787, + "eval_logits/rejected": -1.0595996379852295, + "eval_logps/chosen": -573.6119995117188, + "eval_logps/rejected": -629.1840209960938, + "eval_logps/weighted_chosen": -3.815713405609131, + "eval_logps/weighted_rejected": -4.930161476135254, + "eval_loss": 0.5728335976600647, + "eval_rewards/accuracies": 0.5989999771118164, + "eval_rewards/chosen": -284.8971252441406, + "eval_rewards/margins": 76.59700012207031, + "eval_rewards/rejected": -361.5224914550781, + "eval_rewards/weighted_accuracies": 0.7070000171661377, + "eval_rewards/weighted_chosen": -1.2679998874664307, + "eval_rewards/weighted_margins": 0.8639541268348694, + "eval_rewards/weighted_rejected": -2.1319541931152344, + "eval_runtime": 1366.4223, + "eval_samples_per_second": 1.464, + "eval_steps_per_second": 0.366, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000..f626c5c --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe50c447c403d41daaff89efed0e09938eb1d87a3da0072f9c7935887bef87f +size 8721 diff --git a/checkpoint-1000/zero_to_fp32.py b/checkpoint-1000/zero_to_fp32.py new file mode 100644 index 0000000..0e75914 --- /dev/null +++ b/checkpoint-1000/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-1500/chat_template.jinja b/checkpoint-1500/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-1500/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1500/config.json b/checkpoint-1500/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/checkpoint-1500/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-1500/generation_config.json b/checkpoint-1500/generation_config.json new file mode 100644 index 0000000..fc3c54a --- /dev/null +++ b/checkpoint-1500/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-1500/latest b/checkpoint-1500/latest new file mode 100644 index 0000000..c56ff77 --- /dev/null +++ b/checkpoint-1500/latest @@ -0,0 +1 @@ +global_step1500 \ No newline at end of file diff --git a/checkpoint-1500/model-00001-of-00004.safetensors b/checkpoint-1500/model-00001-of-00004.safetensors new file mode 100644 index 0000000..69d8285 --- /dev/null +++ b/checkpoint-1500/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c712bd791cac55fa81f69cd09bb762e5581e984bb7e4a38539476f683a06f3b +size 4976698672 diff --git a/checkpoint-1500/model-00002-of-00004.safetensors b/checkpoint-1500/model-00002-of-00004.safetensors new file mode 100644 index 0000000..5d0e87b --- /dev/null +++ b/checkpoint-1500/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6093493fabd21f9f37ca4ed0a5ea9e83d5901123965172e07c98b9556fa7d19a +size 4999802720 diff --git a/checkpoint-1500/model-00003-of-00004.safetensors b/checkpoint-1500/model-00003-of-00004.safetensors new file mode 100644 index 0000000..a3a4435 --- /dev/null +++ b/checkpoint-1500/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3db568e34e7aa07729d1ebfa0e7a9e4538ffe7619560daf5647c0bf41c2a56 +size 4915916176 diff --git a/checkpoint-1500/model-00004-of-00004.safetensors b/checkpoint-1500/model-00004-of-00004.safetensors new file mode 100644 index 0000000..c114f99 --- /dev/null +++ b/checkpoint-1500/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34100d445610b94926c5affaa35aff571dbbc9f797904dca416ff3c4a7befc4 +size 1168138808 diff --git a/checkpoint-1500/model.safetensors.index.json b/checkpoint-1500/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-1500/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-1500/rng_state_0.pth b/checkpoint-1500/rng_state_0.pth new file mode 100644 index 0000000..8d84687 --- /dev/null +++ b/checkpoint-1500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ea5dcfd1b4a49b41b4fa01a8b24bba6186957162c3fd555ebff28620c7268b +size 14917 diff --git a/checkpoint-1500/rng_state_1.pth b/checkpoint-1500/rng_state_1.pth new file mode 100644 index 0000000..54f0119 --- /dev/null +++ b/checkpoint-1500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a5af18bb5eae8b7fd6bdef66259014d98ba87ffb16d614bba38f2c32030798 +size 14917 diff --git a/checkpoint-1500/scheduler.pt b/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000..fabfbf2 --- /dev/null +++ b/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5360a9ca3afd27044b0e3751f896f0dd514aa42a145ec88e8857a2bb4c8588f4 +size 1465 diff --git a/checkpoint-1500/special_tokens_map.json b/checkpoint-1500/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/checkpoint-1500/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1500/tokenizer.json b/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/checkpoint-1500/tokenizer_config.json b/checkpoint-1500/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/checkpoint-1500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000..f84e638 --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,3271 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7851347814708192, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005234231876472127, + "grad_norm": 132.6717987060547, + "learning_rate": 0.0, + "logits/chosen": -0.40118408203125, + "logits/rejected": -0.41802978515625, + "logps/chosen": -297.609375, + "logps/rejected": -247.84375, + "logps/weighted_chosen": -4.7568359375, + "logps/weighted_rejected": -3.47998046875, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005234231876472127, + "grad_norm": 226.00839233398438, + "learning_rate": 4.6875e-08, + "logits/chosen": -0.3175845742225647, + "logits/rejected": -0.3532341718673706, + "logps/chosen": -275.5841979980469, + "logps/rejected": -255.84548950195312, + "logps/weighted_chosen": -2.651665687561035, + "logps/weighted_rejected": -2.88427734375, + "loss": 0.6921, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0731336772441864, + "rewards/margins": -0.0670572891831398, + "rewards/rejected": -0.006076388992369175, + "rewards/weighted_accuracies": 0.2951388955116272, + "rewards/weighted_chosen": -0.0015835232334211469, + "rewards/weighted_margins": -0.0009324815473519266, + "rewards/weighted_rejected": -0.0006510416860692203, + "step": 10 + }, + { + "epoch": 0.010468463752944255, + "grad_norm": 14.726158142089844, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -0.29781341552734375, + "logits/rejected": -0.3136836886405945, + "logps/chosen": -294.4296875, + "logps/rejected": -272.58984375, + "logps/weighted_chosen": -2.458728075027466, + "logps/weighted_rejected": -2.455883741378784, + "loss": 0.6924, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.14013671875, + "rewards/margins": -0.13369140028953552, + "rewards/rejected": -0.0064453124068677425, + "rewards/weighted_accuracies": 0.34062498807907104, + "rewards/weighted_chosen": -0.0013603210682049394, + "rewards/weighted_margins": -0.0010925292735919356, + "rewards/weighted_rejected": -0.0002677917364053428, + "step": 20 + }, + { + "epoch": 0.015702695629416383, + "grad_norm": 77.6125259399414, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -0.2918853759765625, + "logits/rejected": -0.3377639651298523, + "logps/chosen": -298.05859375, + "logps/rejected": -268.0132751464844, + "logps/weighted_chosen": -2.4350829124450684, + "logps/weighted_rejected": -2.7343993186950684, + "loss": 0.6926, + "rewards/accuracies": 0.24062499403953552, + "rewards/chosen": -0.06621094048023224, + "rewards/margins": -0.1640625, + "rewards/rejected": 0.09785155951976776, + "rewards/weighted_accuracies": 0.3187499940395355, + "rewards/weighted_chosen": 0.0014068603049963713, + "rewards/weighted_margins": -0.0015777588123455644, + "rewards/weighted_rejected": 0.0029846192337572575, + "step": 30 + }, + { + "epoch": 0.02093692750588851, + "grad_norm": 30.666196823120117, + "learning_rate": 2.03125e-07, + "logits/chosen": -0.30072021484375, + "logits/rejected": -0.3433845639228821, + "logps/chosen": -278.68829345703125, + "logps/rejected": -253.90780639648438, + "logps/weighted_chosen": -2.506396532058716, + "logps/weighted_rejected": -2.8416504859924316, + "loss": 0.6908, + "rewards/accuracies": 0.3062500059604645, + "rewards/chosen": 0.063232421875, + "rewards/margins": 0.04838867112994194, + "rewards/rejected": 0.014843749813735485, + "rewards/weighted_accuracies": 0.40312498807907104, + "rewards/weighted_chosen": 0.0042968750931322575, + "rewards/weighted_margins": 0.0019538879860192537, + "rewards/weighted_rejected": 0.0023429871071130037, + "step": 40 + }, + { + "epoch": 0.02617115938236064, + "grad_norm": 18.60569953918457, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -0.2819870114326477, + "logits/rejected": -0.32059136033058167, + "logps/chosen": -280.31951904296875, + "logps/rejected": -267.4359436035156, + "logps/weighted_chosen": -2.4267334938049316, + "logps/weighted_rejected": -2.529711961746216, + "loss": 0.6891, + "rewards/accuracies": 0.3187499940395355, + "rewards/chosen": -0.03535156324505806, + "rewards/margins": -0.13984374701976776, + "rewards/rejected": 0.1044921875, + "rewards/weighted_accuracies": 0.3968749940395355, + "rewards/weighted_chosen": 0.0039031982887536287, + "rewards/weighted_margins": 0.005755615420639515, + "rewards/weighted_rejected": -0.0018524170154705644, + "step": 50 + }, + { + "epoch": 0.031405391258832765, + "grad_norm": 38.21036911010742, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -0.31453245878219604, + "logits/rejected": -0.30809077620506287, + "logps/chosen": -277.66015625, + "logps/rejected": -261.7445373535156, + "logps/weighted_chosen": -2.8622069358825684, + "logps/weighted_rejected": -2.7553467750549316, + "loss": 0.6894, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": 0.04150390625, + "rewards/margins": 0.08027343451976776, + "rewards/rejected": -0.03876953199505806, + "rewards/weighted_accuracies": 0.4312500059604645, + "rewards/weighted_chosen": 0.0006561279296875, + "rewards/weighted_margins": 0.006243896670639515, + "rewards/weighted_rejected": -0.005587768740952015, + "step": 60 + }, + { + "epoch": 0.036639623135304895, + "grad_norm": 69.19047546386719, + "learning_rate": 3.59375e-07, + "logits/chosen": -0.3177490234375, + "logits/rejected": -0.3246749937534332, + "logps/chosen": -289.76251220703125, + "logps/rejected": -244.92578125, + "logps/weighted_chosen": -2.3438963890075684, + "logps/weighted_rejected": -2.7010498046875, + "loss": 0.6841, + "rewards/accuracies": 0.49687498807907104, + "rewards/chosen": 0.29765623807907104, + "rewards/margins": 0.4546875059604645, + "rewards/rejected": -0.15703125298023224, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": 0.01530532818287611, + "rewards/weighted_margins": 0.01918792724609375, + "rewards/weighted_rejected": -0.0038825988303869963, + "step": 70 + }, + { + "epoch": 0.04187385501177702, + "grad_norm": 51.98476791381836, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -0.2850998044013977, + "logits/rejected": -0.30662041902542114, + "logps/chosen": -289.234375, + "logps/rejected": -270.375, + "logps/weighted_chosen": -2.5325684547424316, + "logps/weighted_rejected": -2.796435594558716, + "loss": 0.6747, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.512499988079071, + "rewards/margins": 0.6001952886581421, + "rewards/rejected": -0.08769531548023224, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.036380767822265625, + "rewards/weighted_margins": 0.04396667331457138, + "rewards/weighted_rejected": -0.007586670108139515, + "step": 80 + }, + { + "epoch": 0.04710808688824915, + "grad_norm": 30.52783203125, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -0.3142959475517273, + "logits/rejected": -0.3075408935546875, + "logps/chosen": -280.11407470703125, + "logps/rejected": -257.95233154296875, + "logps/weighted_chosen": -2.719482421875, + "logps/weighted_rejected": -2.88037109375, + "loss": 0.6687, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.5205078125, + "rewards/margins": 0.737109363079071, + "rewards/rejected": -0.21660156548023224, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": 0.06780395656824112, + "rewards/weighted_margins": 0.07340697944164276, + "rewards/weighted_rejected": -0.0056396485306322575, + "step": 90 + }, + { + "epoch": 0.05234231876472128, + "grad_norm": 69.397705078125, + "learning_rate": 5.156249999999999e-07, + "logits/chosen": -0.28213196992874146, + "logits/rejected": -0.3543289303779602, + "logps/chosen": -290.71875, + "logps/rejected": -286.73126220703125, + "logps/weighted_chosen": -2.2228636741638184, + "logps/weighted_rejected": -2.8367552757263184, + "loss": 0.6848, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": 0.24521484971046448, + "rewards/margins": 1.0690429210662842, + "rewards/rejected": -0.8238281011581421, + "rewards/weighted_accuracies": 0.5843750238418579, + "rewards/weighted_chosen": 0.05242309719324112, + "rewards/weighted_margins": 0.05032653734087944, + "rewards/weighted_rejected": 0.0021240233909338713, + "step": 100 + }, + { + "epoch": 0.05757655064119341, + "grad_norm": 36.600040435791016, + "learning_rate": 5.677083333333333e-07, + "logits/chosen": -0.33063429594039917, + "logits/rejected": -0.319937139749527, + "logps/chosen": -296.82501220703125, + "logps/rejected": -262.2984313964844, + "logps/weighted_chosen": -2.8468017578125, + "logps/weighted_rejected": -2.9306397438049316, + "loss": 0.6773, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.474609375, + "rewards/margins": 1.053613305091858, + "rewards/rejected": -1.528222680091858, + "rewards/weighted_accuracies": 0.534375011920929, + "rewards/weighted_chosen": 0.013439941219985485, + "rewards/weighted_margins": 0.05541381984949112, + "rewards/weighted_rejected": -0.04198913648724556, + "step": 110 + }, + { + "epoch": 0.06281078251766553, + "grad_norm": 57.109580993652344, + "learning_rate": 6.197916666666666e-07, + "logits/chosen": -0.33633461594581604, + "logits/rejected": -0.36155110597610474, + "logps/chosen": -295.3687438964844, + "logps/rejected": -256.1953125, + "logps/weighted_chosen": -2.161865234375, + "logps/weighted_rejected": -2.4251465797424316, + "loss": 0.6791, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": -0.72900390625, + "rewards/margins": 1.641210913658142, + "rewards/rejected": -2.3702149391174316, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.007176590152084827, + "rewards/weighted_margins": 0.05286560207605362, + "rewards/weighted_rejected": -0.04570160061120987, + "step": 120 + }, + { + "epoch": 0.06804501439413765, + "grad_norm": 39.176841735839844, + "learning_rate": 6.718749999999999e-07, + "logits/chosen": -0.29625242948532104, + "logits/rejected": -0.2914108335971832, + "logps/chosen": -306.6781311035156, + "logps/rejected": -280.15936279296875, + "logps/weighted_chosen": -2.188079833984375, + "logps/weighted_rejected": -2.5787596702575684, + "loss": 0.6659, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.644238293170929, + "rewards/margins": 1.972265601158142, + "rewards/rejected": -2.616406202316284, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": 0.01349639892578125, + "rewards/weighted_margins": 0.0841522216796875, + "rewards/weighted_rejected": -0.07064209133386612, + "step": 130 + }, + { + "epoch": 0.07327924627060979, + "grad_norm": 52.14993667602539, + "learning_rate": 7.239583333333333e-07, + "logits/chosen": -0.3304199278354645, + "logits/rejected": -0.3464847505092621, + "logps/chosen": -301.4390563964844, + "logps/rejected": -277.9515686035156, + "logps/weighted_chosen": -2.554003953933716, + "logps/weighted_rejected": -2.881591796875, + "loss": 0.6581, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -2.746875047683716, + "rewards/margins": 2.744921922683716, + "rewards/rejected": -5.491991996765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.02762756310403347, + "rewards/weighted_margins": 0.11510010063648224, + "rewards/weighted_rejected": -0.14276733994483948, + "step": 140 + }, + { + "epoch": 0.07851347814708191, + "grad_norm": 22.611814498901367, + "learning_rate": 7.760416666666666e-07, + "logits/chosen": -0.2870376706123352, + "logits/rejected": -0.2975311279296875, + "logps/chosen": -287.859375, + "logps/rejected": -257.54296875, + "logps/weighted_chosen": -3.089892625808716, + "logps/weighted_rejected": -3.1946043968200684, + "loss": 0.6544, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.7095704078674316, + "rewards/margins": 2.942578077316284, + "rewards/rejected": -6.652148246765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.005145263858139515, + "rewards/weighted_margins": 0.16416625678539276, + "rewards/weighted_rejected": -0.16951599717140198, + "step": 150 + }, + { + "epoch": 0.08374771002355404, + "grad_norm": 15.511767387390137, + "learning_rate": 8.28125e-07, + "logits/chosen": -0.3232177793979645, + "logits/rejected": -0.3726806640625, + "logps/chosen": -308.91796875, + "logps/rejected": -282.15704345703125, + "logps/weighted_chosen": -2.5903563499450684, + "logps/weighted_rejected": -2.742602586746216, + "loss": 0.6211, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -4.956835746765137, + "rewards/margins": 3.9961915016174316, + "rewards/rejected": -8.953222274780273, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.002410888671875, + "rewards/weighted_margins": 0.23797607421875, + "rewards/weighted_rejected": -0.24028320610523224, + "step": 160 + }, + { + "epoch": 0.08898194190002617, + "grad_norm": 167.33956909179688, + "learning_rate": 8.802083333333333e-07, + "logits/chosen": -0.36021536588668823, + "logits/rejected": -0.3597045838832855, + "logps/chosen": -311.03045654296875, + "logps/rejected": -270.46875, + "logps/weighted_chosen": -2.8318848609924316, + "logps/weighted_rejected": -3.139453172683716, + "loss": 0.6949, + "rewards/accuracies": 0.609375, + "rewards/chosen": -8.0087890625, + "rewards/margins": 4.345898628234863, + "rewards/rejected": -12.354199409484863, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.01859130896627903, + "rewards/weighted_margins": 0.20853272080421448, + "rewards/weighted_rejected": -0.22731323540210724, + "step": 170 + }, + { + "epoch": 0.0942161737764983, + "grad_norm": 64.57138061523438, + "learning_rate": 9.322916666666666e-07, + "logits/chosen": -0.33618468046188354, + "logits/rejected": -0.3534431457519531, + "logps/chosen": -284.2171936035156, + "logps/rejected": -272.12969970703125, + "logps/weighted_chosen": -2.694580078125, + "logps/weighted_rejected": -3.225878953933716, + "loss": 0.6814, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -10.43701171875, + "rewards/margins": 5.353320121765137, + "rewards/rejected": -15.7919921875, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.08297424018383026, + "rewards/weighted_margins": 0.26459962129592896, + "rewards/weighted_rejected": -0.347381591796875, + "step": 180 + }, + { + "epoch": 0.09945040565297043, + "grad_norm": 49.0852165222168, + "learning_rate": 9.84375e-07, + "logits/chosen": -0.354086309671402, + "logits/rejected": -0.38891831040382385, + "logps/chosen": -319.17498779296875, + "logps/rejected": -283.31561279296875, + "logps/weighted_chosen": -2.5078492164611816, + "logps/weighted_rejected": -3.016357421875, + "loss": 0.6496, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -13.561426162719727, + "rewards/margins": 5.937890529632568, + "rewards/rejected": -19.498828887939453, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.16942748427391052, + "rewards/weighted_margins": 0.24410399794578552, + "rewards/weighted_rejected": -0.41356199979782104, + "step": 190 + }, + { + "epoch": 0.10468463752944256, + "grad_norm": 53.46296691894531, + "learning_rate": 9.99959085414323e-07, + "logits/chosen": -0.37868577241897583, + "logits/rejected": -0.4114578366279602, + "logps/chosen": -324.7124938964844, + "logps/rejected": -279.72967529296875, + "logps/weighted_chosen": -2.8757567405700684, + "logps/weighted_rejected": -3.3623046875, + "loss": 0.639, + "rewards/accuracies": 0.609375, + "rewards/chosen": -15.428125381469727, + "rewards/margins": 6.552148342132568, + "rewards/rejected": -21.975000381469727, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.18135985732078552, + "rewards/weighted_margins": 0.29008787870407104, + "rewards/weighted_rejected": -0.471527099609375, + "step": 200 + }, + { + "epoch": 0.10991886940591468, + "grad_norm": 24.815481185913086, + "learning_rate": 9.997587035630105e-07, + "logits/chosen": -0.3853309750556946, + "logits/rejected": -0.4257049560546875, + "logps/chosen": -302.82891845703125, + "logps/rejected": -308.671875, + "logps/weighted_chosen": -2.632519483566284, + "logps/weighted_rejected": -3.3669190406799316, + "loss": 0.6558, + "rewards/accuracies": 0.65625, + "rewards/chosen": -15.814453125, + "rewards/margins": 8.331445693969727, + "rewards/rejected": -24.146093368530273, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.225901797413826, + "rewards/weighted_margins": 0.23236694931983948, + "rewards/weighted_rejected": -0.45829468965530396, + "step": 210 + }, + { + "epoch": 0.11515310128238682, + "grad_norm": 24.175745010375977, + "learning_rate": 9.99391406364405e-07, + "logits/chosen": -0.37365952134132385, + "logits/rejected": -0.3758789002895355, + "logps/chosen": -309.34686279296875, + "logps/rejected": -293.98126220703125, + "logps/weighted_chosen": -3.002514600753784, + "logps/weighted_rejected": -3.453906297683716, + "loss": 0.6732, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -17.203418731689453, + "rewards/margins": 7.933203220367432, + "rewards/rejected": -25.137109756469727, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.259225457906723, + "rewards/weighted_margins": 0.29540252685546875, + "rewards/weighted_rejected": -0.5546798706054688, + "step": 220 + }, + { + "epoch": 0.12038733315885894, + "grad_norm": 85.15988159179688, + "learning_rate": 9.988573164927884e-07, + "logits/chosen": -0.3097473084926605, + "logits/rejected": -0.3477935791015625, + "logps/chosen": -286.5078125, + "logps/rejected": -281.8453063964844, + "logps/weighted_chosen": -2.66943359375, + "logps/weighted_rejected": -3.1229491233825684, + "loss": 0.6646, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -17.690723419189453, + "rewards/margins": 12.424609184265137, + "rewards/rejected": -30.110157012939453, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.2716217041015625, + "rewards/weighted_margins": 0.32661741971969604, + "rewards/weighted_rejected": -0.5983597040176392, + "step": 230 + }, + { + "epoch": 0.12562156503533106, + "grad_norm": 26.17377471923828, + "learning_rate": 9.98156612329838e-07, + "logits/chosen": -0.39516907930374146, + "logits/rejected": -0.44511109590530396, + "logps/chosen": -286.74884033203125, + "logps/rejected": -318.22735595703125, + "logps/weighted_chosen": -2.6696534156799316, + "logps/weighted_rejected": -3.4151854515075684, + "loss": 0.643, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -21.990428924560547, + "rewards/margins": 14.028905868530273, + "rewards/rejected": -36.013282775878906, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.2329559326171875, + "rewards/weighted_margins": 0.3950134217739105, + "rewards/weighted_rejected": -0.6281493902206421, + "step": 240 + }, + { + "epoch": 0.13085579691180318, + "grad_norm": 56.73057174682617, + "learning_rate": 9.97289527905053e-07, + "logits/chosen": -0.40631332993507385, + "logits/rejected": -0.4203124940395355, + "logps/chosen": -290.1703186035156, + "logps/rejected": -291.6328125, + "logps/weighted_chosen": -3.051513671875, + "logps/weighted_rejected": -3.3163819313049316, + "loss": 0.677, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -25.742870330810547, + "rewards/margins": 9.973828315734863, + "rewards/rejected": -35.72148513793945, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.2856689393520355, + "rewards/weighted_margins": 0.253326416015625, + "rewards/weighted_rejected": -0.5388733148574829, + "step": 250 + }, + { + "epoch": 0.1360900287882753, + "grad_norm": 17.766258239746094, + "learning_rate": 9.962563528175875e-07, + "logits/chosen": -0.3611465394496918, + "logits/rejected": -0.39628905057907104, + "logps/chosen": -324.36639404296875, + "logps/rejected": -297.765625, + "logps/weighted_chosen": -2.652392625808716, + "logps/weighted_rejected": -3.535571336746216, + "loss": 0.6414, + "rewards/accuracies": 0.578125, + "rewards/chosen": -25.621288299560547, + "rewards/margins": 11.306055068969727, + "rewards/rejected": -36.93359375, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.2533508241176605, + "rewards/weighted_margins": 0.2956604063510895, + "rewards/weighted_rejected": -0.5490142703056335, + "step": 260 + }, + { + "epoch": 0.14132426066474746, + "grad_norm": 17.552453994750977, + "learning_rate": 9.950574321395277e-07, + "logits/chosen": -0.41735154390335083, + "logits/rejected": -0.441476434469223, + "logps/chosen": -314.5093688964844, + "logps/rejected": -295.7093811035156, + "logps/weighted_chosen": -2.864941358566284, + "logps/weighted_rejected": -3.25732421875, + "loss": 0.661, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.1123046875, + "rewards/margins": 7.519726753234863, + "rewards/rejected": -32.62890625, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": -0.3035888671875, + "rewards/weighted_margins": 0.2833190858364105, + "rewards/weighted_rejected": -0.5868393182754517, + "step": 270 + }, + { + "epoch": 0.14655849254121958, + "grad_norm": 47.66518020629883, + "learning_rate": 9.936931663006413e-07, + "logits/chosen": -0.4760284423828125, + "logits/rejected": -0.46795654296875, + "logps/chosen": -323.48126220703125, + "logps/rejected": -313.2875061035156, + "logps/weighted_chosen": -2.794970750808716, + "logps/weighted_rejected": -3.3581910133361816, + "loss": 0.6169, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -20.707616806030273, + "rewards/margins": 13.166601181030273, + "rewards/rejected": -33.86640548706055, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.10174255073070526, + "rewards/weighted_margins": 0.34544676542282104, + "rewards/weighted_rejected": -0.447021484375, + "step": 280 + }, + { + "epoch": 0.1517927244176917, + "grad_norm": 32.503883361816406, + "learning_rate": 9.921640109546357e-07, + "logits/chosen": -0.44742050766944885, + "logits/rejected": -0.5166229009628296, + "logps/chosen": -292.1796875, + "logps/rejected": -289.6234436035156, + "logps/weighted_chosen": -2.7469239234924316, + "logps/weighted_rejected": -3.9541258811950684, + "loss": 0.6249, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.293359756469727, + "rewards/margins": 12.698633193969727, + "rewards/rejected": -37.994529724121094, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -0.15215758979320526, + "rewards/weighted_margins": 0.4393859803676605, + "rewards/weighted_rejected": -0.5915588140487671, + "step": 290 + }, + { + "epoch": 0.15702695629416383, + "grad_norm": 17.32170867919922, + "learning_rate": 9.90470476826975e-07, + "logits/chosen": -0.5146636962890625, + "logits/rejected": -0.515917956829071, + "logps/chosen": -302.3570251464844, + "logps/rejected": -313.68438720703125, + "logps/weighted_chosen": -2.6830201148986816, + "logps/weighted_rejected": -3.202099561691284, + "loss": 0.6526, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -32.978126525878906, + "rewards/margins": 13.435937881469727, + "rewards/rejected": -46.408203125, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.23505249619483948, + "rewards/weighted_margins": 0.33623045682907104, + "rewards/weighted_rejected": -0.5710296630859375, + "step": 300 + }, + { + "epoch": 0.16226118817063595, + "grad_norm": 25.855854034423828, + "learning_rate": 9.886131295443002e-07, + "logits/chosen": -0.6332122683525085, + "logits/rejected": -0.6879852414131165, + "logps/chosen": -315.02264404296875, + "logps/rejected": -296.54998779296875, + "logps/weighted_chosen": -2.8891844749450684, + "logps/weighted_rejected": -3.3497071266174316, + "loss": 0.6099, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -33.83808517456055, + "rewards/margins": 12.542577743530273, + "rewards/rejected": -46.39081954956055, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.20775146782398224, + "rewards/weighted_margins": 0.507794201374054, + "rewards/weighted_rejected": -0.715728759765625, + "step": 310 + }, + { + "epoch": 0.16749542004710807, + "grad_norm": 19.11484718322754, + "learning_rate": 9.865925894455166e-07, + "logits/chosen": -0.730267345905304, + "logits/rejected": -0.746167004108429, + "logps/chosen": -338.2242126464844, + "logps/rejected": -307.18280029296875, + "logps/weighted_chosen": -2.9883790016174316, + "logps/weighted_rejected": -3.5892090797424316, + "loss": 0.6942, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -38.233009338378906, + "rewards/margins": 11.564062118530273, + "rewards/rejected": -49.80937576293945, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.3507080078125, + "rewards/weighted_margins": 0.3366760313510895, + "rewards/weighted_rejected": -0.6871337890625, + "step": 320 + }, + { + "epoch": 0.17272965192358022, + "grad_norm": 57.19697570800781, + "learning_rate": 9.84409531374603e-07, + "logits/chosen": -0.6843910217285156, + "logits/rejected": -0.6659576296806335, + "logps/chosen": -345.46875, + "logps/rejected": -316.2515563964844, + "logps/weighted_chosen": -3.05517578125, + "logps/weighted_rejected": -3.5519776344299316, + "loss": 0.6569, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -40.732032775878906, + "rewards/margins": 12.651952743530273, + "rewards/rejected": -53.38984298706055, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.3262878358364105, + "rewards/weighted_margins": 0.346893310546875, + "rewards/weighted_rejected": -0.6730865240097046, + "step": 330 + }, + { + "epoch": 0.17796388380005235, + "grad_norm": 52.49288558959961, + "learning_rate": 9.820646844552219e-07, + "logits/chosen": -0.6993133425712585, + "logits/rejected": -0.7529846429824829, + "logps/chosen": -313.59295654296875, + "logps/rejected": -322.1499938964844, + "logps/weighted_chosen": -3.0488524436950684, + "logps/weighted_rejected": -3.440136671066284, + "loss": 0.6287, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -37.06660079956055, + "rewards/margins": 19.494531631469727, + "rewards/rejected": -56.556640625, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.30719298124313354, + "rewards/weighted_margins": 0.448944091796875, + "rewards/weighted_rejected": -0.755999743938446, + "step": 340 + }, + { + "epoch": 0.18319811567652447, + "grad_norm": 15.657389640808105, + "learning_rate": 9.795588318471964e-07, + "logits/chosen": -0.7813507318496704, + "logits/rejected": -0.7874206304550171, + "logps/chosen": -299.80157470703125, + "logps/rejected": -331.4375, + "logps/weighted_chosen": -2.84619140625, + "logps/weighted_rejected": -3.315380811691284, + "loss": 0.6405, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -40.32304763793945, + "rewards/margins": 14.830663681030273, + "rewards/rejected": -55.15625, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.315826416015625, + "rewards/weighted_margins": 0.386627197265625, + "rewards/weighted_rejected": -0.702471911907196, + "step": 350 + }, + { + "epoch": 0.1884323475529966, + "grad_norm": 16.19976806640625, + "learning_rate": 9.768928104849415e-07, + "logits/chosen": -0.801177978515625, + "logits/rejected": -0.799664318561554, + "logps/chosen": -323.5171813964844, + "logps/rejected": -305.046875, + "logps/weighted_chosen": -3.1164307594299316, + "logps/weighted_rejected": -3.3475098609924316, + "loss": 0.6865, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -40.823829650878906, + "rewards/margins": 15.389843940734863, + "rewards/rejected": -56.216407775878906, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.31828004121780396, + "rewards/weighted_margins": 0.3831420838832855, + "rewards/weighted_rejected": -0.7014526128768921, + "step": 360 + }, + { + "epoch": 0.19366657942946872, + "grad_norm": 89.87427520751953, + "learning_rate": 9.740675107979355e-07, + "logits/chosen": -0.7640800476074219, + "logits/rejected": -0.7867538332939148, + "logps/chosen": -361.13751220703125, + "logps/rejected": -334.97967529296875, + "logps/weighted_chosen": -2.5084471702575684, + "logps/weighted_rejected": -3.4689698219299316, + "loss": 0.6531, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -46.098045349121094, + "rewards/margins": 14.188085556030273, + "rewards/rejected": -60.26640701293945, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.36387938261032104, + "rewards/weighted_margins": 0.3567260801792145, + "rewards/weighted_rejected": -0.720538318157196, + "step": 370 + }, + { + "epoch": 0.19890081130594087, + "grad_norm": 22.484216690063477, + "learning_rate": 9.71083876413323e-07, + "logits/chosen": -0.7209137082099915, + "logits/rejected": -0.7318176031112671, + "logps/chosen": -353.6031188964844, + "logps/rejected": -339.16485595703125, + "logps/weighted_chosen": -2.70361328125, + "logps/weighted_rejected": -3.5843749046325684, + "loss": 0.6589, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -51.237892150878906, + "rewards/margins": 18.424999237060547, + "rewards/rejected": -69.64530944824219, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.4475158751010895, + "rewards/weighted_margins": 0.3267761170864105, + "rewards/weighted_rejected": -0.7747405767440796, + "step": 380 + }, + { + "epoch": 0.204135043182413, + "grad_norm": 21.885372161865234, + "learning_rate": 9.67942903840751e-07, + "logits/chosen": -0.7708206176757812, + "logits/rejected": -0.8207153081893921, + "logps/chosen": -355.18438720703125, + "logps/rejected": -350.47186279296875, + "logps/weighted_chosen": -2.8836669921875, + "logps/weighted_rejected": -3.5904297828674316, + "loss": 0.6028, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -50.973045349121094, + "rewards/margins": 25.190038681030273, + "rewards/rejected": -76.1617202758789, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.39097899198532104, + "rewards/weighted_margins": 0.4941650331020355, + "rewards/weighted_rejected": -0.884967029094696, + "step": 390 + }, + { + "epoch": 0.2093692750588851, + "grad_norm": 26.357742309570312, + "learning_rate": 9.646456421395447e-07, + "logits/chosen": -0.805267333984375, + "logits/rejected": -0.8178039789199829, + "logps/chosen": -377.52813720703125, + "logps/rejected": -392.0296936035156, + "logps/weighted_chosen": -2.7947998046875, + "logps/weighted_rejected": -3.697582960128784, + "loss": 0.6296, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -59.2109375, + "rewards/margins": 29.121875762939453, + "rewards/rejected": -88.32890319824219, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.41761475801467896, + "rewards/weighted_margins": 0.38171082735061646, + "rewards/weighted_rejected": -0.7994705438613892, + "step": 400 + }, + { + "epoch": 0.21460350693535724, + "grad_norm": 21.382999420166016, + "learning_rate": 9.611931925683266e-07, + "logits/chosen": -0.7703964114189148, + "logits/rejected": -0.808850109577179, + "logps/chosen": -367.3140563964844, + "logps/rejected": -348.0687561035156, + "logps/weighted_chosen": -2.711962938308716, + "logps/weighted_rejected": -3.4615721702575684, + "loss": 0.5758, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -61.02734375, + "rewards/margins": 23.316797256469727, + "rewards/rejected": -84.34687805175781, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.40519410371780396, + "rewards/weighted_margins": 0.521441638469696, + "rewards/weighted_rejected": -0.9261535406112671, + "step": 410 + }, + { + "epoch": 0.21983773881182936, + "grad_norm": 23.030996322631836, + "learning_rate": 9.575867082172085e-07, + "logits/chosen": -0.7789466977119446, + "logits/rejected": -0.8260132074356079, + "logps/chosen": -372.22344970703125, + "logps/rejected": -367.0171813964844, + "logps/weighted_chosen": -3.114550828933716, + "logps/weighted_rejected": -3.364208936691284, + "loss": 0.6211, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -67.16816711425781, + "rewards/margins": 29.731639862060547, + "rewards/rejected": -96.90156555175781, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5122925043106079, + "rewards/weighted_margins": 0.5218566656112671, + "rewards/weighted_rejected": -1.0339782238006592, + "step": 420 + }, + { + "epoch": 0.22507197068830148, + "grad_norm": 16.442333221435547, + "learning_rate": 9.538273936226673e-07, + "logits/chosen": -0.830523669719696, + "logits/rejected": -0.8667358160018921, + "logps/chosen": -328.4546813964844, + "logps/rejected": -347.9593811035156, + "logps/weighted_chosen": -3.373584032058716, + "logps/weighted_rejected": -3.832958936691284, + "loss": 0.6425, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -62.920310974121094, + "rewards/margins": 20.668750762939453, + "rewards/rejected": -83.5894546508789, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.459890753030777, + "rewards/weighted_margins": 0.39284056425094604, + "rewards/weighted_rejected": -0.8525451421737671, + "step": 430 + }, + { + "epoch": 0.23030620256477363, + "grad_norm": 21.955875396728516, + "learning_rate": 9.499165043652391e-07, + "logits/chosen": -0.8598114252090454, + "logits/rejected": -0.868182361125946, + "logps/chosen": -358.21563720703125, + "logps/rejected": -356.26251220703125, + "logps/weighted_chosen": -3.4171142578125, + "logps/weighted_rejected": -3.6997313499450684, + "loss": 0.624, + "rewards/accuracies": 0.625, + "rewards/chosen": -66.96504211425781, + "rewards/margins": 22.563282012939453, + "rewards/rejected": -89.5503921508789, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.5841079950332642, + "rewards/weighted_margins": 0.434326171875, + "rewards/weighted_rejected": -1.018707275390625, + "step": 440 + }, + { + "epoch": 0.23554043444124576, + "grad_norm": 75.56902313232422, + "learning_rate": 9.458553466501665e-07, + "logits/chosen": -0.9330536127090454, + "logits/rejected": -0.9642333984375, + "logps/chosen": -352.6187438964844, + "logps/rejected": -336.0218811035156, + "logps/weighted_chosen": -3.4129395484924316, + "logps/weighted_rejected": -3.74462890625, + "loss": 0.6566, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -66.412109375, + "rewards/margins": 25.757617950439453, + "rewards/rejected": -92.181640625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.689007580280304, + "rewards/weighted_margins": 0.4539245665073395, + "rewards/weighted_rejected": -1.143212914466858, + "step": 450 + }, + { + "epoch": 0.24077466631771788, + "grad_norm": 19.516427993774414, + "learning_rate": 9.416452768711366e-07, + "logits/chosen": -0.945111095905304, + "logits/rejected": -0.9787231683731079, + "logps/chosen": -369.3671875, + "logps/rejected": -358.9624938964844, + "logps/weighted_chosen": -3.1959471702575684, + "logps/weighted_rejected": -3.948193311691284, + "loss": 0.6392, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -77.17265319824219, + "rewards/margins": 21.916015625, + "rewards/rejected": -99.0796890258789, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.67822265625, + "rewards/weighted_margins": 0.532788097858429, + "rewards/weighted_rejected": -1.2112305164337158, + "step": 460 + }, + { + "epoch": 0.24600889819419, + "grad_norm": 19.182979583740234, + "learning_rate": 9.372877011572557e-07, + "logits/chosen": -0.9224609136581421, + "logits/rejected": -0.9388214349746704, + "logps/chosen": -391.6937561035156, + "logps/rejected": -377.0625, + "logps/weighted_chosen": -3.224560499191284, + "logps/weighted_rejected": -3.783252000808716, + "loss": 0.6162, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -80.95976257324219, + "rewards/margins": 21.617578506469727, + "rewards/rejected": -102.59883117675781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.684155285358429, + "rewards/weighted_margins": 0.5555480718612671, + "rewards/weighted_rejected": -1.23956298828125, + "step": 470 + }, + { + "epoch": 0.2512431300706621, + "grad_norm": 31.75469970703125, + "learning_rate": 9.327840749034141e-07, + "logits/chosen": -0.969561755657196, + "logits/rejected": -0.998791515827179, + "logps/chosen": -362.1859436035156, + "logps/rejected": -385.29998779296875, + "logps/weighted_chosen": -3.0771241188049316, + "logps/weighted_rejected": -4.388257026672363, + "loss": 0.6296, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -75.1123046875, + "rewards/margins": 33.66425704956055, + "rewards/rejected": -108.75, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.606555163860321, + "rewards/weighted_margins": 0.612231433391571, + "rewards/weighted_rejected": -1.2182190418243408, + "step": 480 + }, + { + "epoch": 0.2564773619471343, + "grad_norm": 37.024818420410156, + "learning_rate": 9.281359022841965e-07, + "logits/chosen": -0.846588134765625, + "logits/rejected": -0.859790027141571, + "logps/chosen": -352.46405029296875, + "logps/rejected": -355.24688720703125, + "logps/weighted_chosen": -3.219531297683716, + "logps/weighted_rejected": -4.648681640625, + "loss": 0.5897, + "rewards/accuracies": 0.640625, + "rewards/chosen": -77.3949203491211, + "rewards/margins": 32.93046951293945, + "rewards/rejected": -110.32890319824219, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.7288268804550171, + "rewards/weighted_margins": 0.6741577386856079, + "rewards/weighted_rejected": -1.403161644935608, + "step": 490 + }, + { + "epoch": 0.26171159382360637, + "grad_norm": 17.740766525268555, + "learning_rate": 9.233447357514989e-07, + "logits/chosen": -0.8205505609512329, + "logits/rejected": -0.863543689250946, + "logps/chosen": -375.52032470703125, + "logps/rejected": -378.3500061035156, + "logps/weighted_chosen": -3.53125, + "logps/weighted_rejected": -4.106689453125, + "loss": 0.6305, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -77.591796875, + "rewards/margins": 30.978906631469727, + "rewards/rejected": -108.54609680175781, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.6407378911972046, + "rewards/weighted_margins": 0.6662231683731079, + "rewards/weighted_rejected": -1.30645751953125, + "step": 500 + }, + { + "epoch": 0.26171159382360637, + "eval_logits/chosen": -0.9400458931922913, + "eval_logits/rejected": -0.955981433391571, + "eval_logps/chosen": -371.72900390625, + "eval_logps/rejected": -379.6419982910156, + "eval_logps/weighted_chosen": -3.214712381362915, + "eval_logps/weighted_rejected": -4.0158867835998535, + "eval_loss": 0.6316163539886475, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -82.98784637451172, + "eval_rewards/margins": 28.939437866210938, + "eval_rewards/rejected": -111.93875122070312, + "eval_rewards/weighted_accuracies": 0.6725000143051147, + "eval_rewards/weighted_chosen": -0.6669993996620178, + "eval_rewards/weighted_margins": 0.5506796836853027, + "eval_rewards/weighted_rejected": -1.2176789045333862, + "eval_runtime": 1162.5522, + "eval_samples_per_second": 1.72, + "eval_steps_per_second": 0.43, + "step": 500 + }, + { + "epoch": 0.2669458257000785, + "grad_norm": 59.14344787597656, + "learning_rate": 9.184121755160232e-07, + "logits/chosen": -0.9093383550643921, + "logits/rejected": -0.9390915036201477, + "logps/chosen": -378.3890686035156, + "logps/rejected": -401.62811279296875, + "logps/weighted_chosen": -3.4715576171875, + "logps/weighted_rejected": -4.080712795257568, + "loss": 0.6505, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -77.12968444824219, + "rewards/margins": 35.939842224121094, + "rewards/rejected": -113.0718765258789, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.6525024175643921, + "rewards/weighted_margins": 0.52886962890625, + "rewards/weighted_rejected": -1.1813843250274658, + "step": 510 + }, + { + "epoch": 0.2721800575765506, + "grad_norm": 33.64823913574219, + "learning_rate": 9.133398690128193e-07, + "logits/chosen": -0.942626953125, + "logits/rejected": -0.965716540813446, + "logps/chosen": -400.1890563964844, + "logps/rejected": -403.63592529296875, + "logps/weighted_chosen": -3.2196044921875, + "logps/weighted_rejected": -4.1656494140625, + "loss": 0.6062, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -78.64530944824219, + "rewards/margins": 44.30859375, + "rewards/rejected": -122.9917984008789, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -0.48695677518844604, + "rewards/weighted_margins": 0.6421569585800171, + "rewards/weighted_rejected": -1.129034399986267, + "step": 520 + }, + { + "epoch": 0.27741428945302277, + "grad_norm": 36.97419738769531, + "learning_rate": 9.081295103510554e-07, + "logits/chosen": -0.9060531854629517, + "logits/rejected": -0.954547107219696, + "logps/chosen": -377.2640686035156, + "logps/rejected": -402.17498779296875, + "logps/weighted_chosen": -2.8717284202575684, + "logps/weighted_rejected": -4.353662014007568, + "loss": 0.5138, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -93.2210922241211, + "rewards/margins": 39.227149963378906, + "rewards/rejected": -132.44140625, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -0.5675109624862671, + "rewards/weighted_margins": 0.93292236328125, + "rewards/weighted_rejected": -1.501245141029358, + "step": 530 + }, + { + "epoch": 0.2826485213294949, + "grad_norm": 23.371498107910156, + "learning_rate": 9.027828397481989e-07, + "logits/chosen": -0.8855453729629517, + "logits/rejected": -0.911120593547821, + "logps/chosen": -357.30078125, + "logps/rejected": -385.8531188964844, + "logps/weighted_chosen": -3.6192626953125, + "logps/weighted_rejected": -4.450634956359863, + "loss": 0.6284, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -96.7691421508789, + "rewards/margins": 33.722267150878906, + "rewards/rejected": -130.5457000732422, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.788897693157196, + "rewards/weighted_margins": 0.5829833745956421, + "rewards/weighted_rejected": -1.3713562488555908, + "step": 540 + }, + { + "epoch": 0.287882753205967, + "grad_norm": 21.5579833984375, + "learning_rate": 8.973016429487988e-07, + "logits/chosen": -0.9223998785018921, + "logits/rejected": -0.9320526123046875, + "logps/chosen": -376.43438720703125, + "logps/rejected": -393.18438720703125, + "logps/weighted_chosen": -3.503002882003784, + "logps/weighted_rejected": -3.9817872047424316, + "loss": 0.6093, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -94.5347671508789, + "rewards/margins": 44.30546951293945, + "rewards/rejected": -138.84414672851562, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.7487853765487671, + "rewards/weighted_margins": 0.62567138671875, + "rewards/weighted_rejected": -1.3746826648712158, + "step": 550 + }, + { + "epoch": 0.29311698508243916, + "grad_norm": 55.23554992675781, + "learning_rate": 8.916877506280601e-07, + "logits/chosen": -0.969989001750946, + "logits/rejected": -0.9665802121162415, + "logps/chosen": -383.078125, + "logps/rejected": -397.75, + "logps/weighted_chosen": -3.5821290016174316, + "logps/weighted_rejected": -4.008593559265137, + "loss": 0.6282, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -102.19023132324219, + "rewards/margins": 41.46562576293945, + "rewards/rejected": -143.642578125, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.7835754156112671, + "rewards/weighted_margins": 0.644238293170929, + "rewards/weighted_rejected": -1.427978515625, + "step": 560 + }, + { + "epoch": 0.29835121695891126, + "grad_norm": 18.939470291137695, + "learning_rate": 8.85943037780415e-07, + "logits/chosen": -1.0394058227539062, + "logits/rejected": -1.042639136314392, + "logps/chosen": -384.078125, + "logps/rejected": -366.9984436035156, + "logps/weighted_chosen": -3.3567872047424316, + "logps/weighted_rejected": -3.9019775390625, + "loss": 0.5908, + "rewards/accuracies": 0.625, + "rewards/chosen": -97.7894515991211, + "rewards/margins": 27.4404296875, + "rewards/rejected": -125.2328109741211, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.7109375, + "rewards/weighted_margins": 0.566577136516571, + "rewards/weighted_rejected": -1.2770659923553467, + "step": 570 + }, + { + "epoch": 0.3035854488353834, + "grad_norm": 54.97072982788086, + "learning_rate": 8.800694230932884e-07, + "logits/chosen": -0.958050549030304, + "logits/rejected": -0.9729766845703125, + "logps/chosen": -381.4140625, + "logps/rejected": -385.4296875, + "logps/weighted_chosen": -2.9930176734924316, + "logps/weighted_rejected": -3.6538329124450684, + "loss": 0.6283, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -91.8843765258789, + "rewards/margins": 28.43359375, + "rewards/rejected": -120.30078125, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5365799069404602, + "rewards/weighted_margins": 0.4689392149448395, + "rewards/weighted_rejected": -1.005883812904358, + "step": 580 + }, + { + "epoch": 0.30881968071185556, + "grad_norm": 187.63499450683594, + "learning_rate": 8.740688683062723e-07, + "logits/chosen": -1.0116729736328125, + "logits/rejected": -1.0273834466934204, + "logps/chosen": -427.6859436035156, + "logps/rejected": -400.0640563964844, + "logps/weighted_chosen": -2.997143507003784, + "logps/weighted_rejected": -3.7496094703674316, + "loss": 0.6363, + "rewards/accuracies": 0.578125, + "rewards/chosen": -98.6539077758789, + "rewards/margins": 25.619726181030273, + "rewards/rejected": -124.2671890258789, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.539746105670929, + "rewards/weighted_margins": 0.48472291231155396, + "rewards/weighted_rejected": -1.023950219154358, + "step": 590 + }, + { + "epoch": 0.31405391258832765, + "grad_norm": 17.906530380249023, + "learning_rate": 8.679433775559215e-07, + "logits/chosen": -0.991503894329071, + "logits/rejected": -1.0299193859100342, + "logps/chosen": -428.88592529296875, + "logps/rejected": -425.1625061035156, + "logps/weighted_chosen": -2.877368211746216, + "logps/weighted_rejected": -3.985302686691284, + "loss": 0.5932, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -104.1353530883789, + "rewards/margins": 35.59746170043945, + "rewards/rejected": -139.7765655517578, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.6103607416152954, + "rewards/weighted_margins": 0.5564330816268921, + "rewards/weighted_rejected": -1.166479468345642, + "step": 600 + }, + { + "epoch": 0.3192881444647998, + "grad_norm": 33.20716094970703, + "learning_rate": 8.616949967063871e-07, + "logits/chosen": -0.9755920171737671, + "logits/rejected": -1.0198791027069092, + "logps/chosen": -370.45001220703125, + "logps/rejected": -400.4906311035156, + "logps/weighted_chosen": -3.3270506858825684, + "logps/weighted_rejected": -3.9315428733825684, + "loss": 0.6822, + "rewards/accuracies": 0.65625, + "rewards/chosen": -102.12422180175781, + "rewards/margins": 37.60078048706055, + "rewards/rejected": -139.7734375, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.7267090082168579, + "rewards/weighted_margins": 0.43719482421875, + "rewards/weighted_rejected": -1.163793921470642, + "step": 610 + }, + { + "epoch": 0.3245223763412719, + "grad_norm": 13.562949180603027, + "learning_rate": 8.553258126661154e-07, + "logits/chosen": -1.00177001953125, + "logits/rejected": -1.010014295578003, + "logps/chosen": -390.65234375, + "logps/rejected": -402.65155029296875, + "logps/weighted_chosen": -3.4473876953125, + "logps/weighted_rejected": -4.26806640625, + "loss": 0.6605, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -110.80390930175781, + "rewards/margins": 35.68242263793945, + "rewards/rejected": -146.5031280517578, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.7691070437431335, + "rewards/weighted_margins": 0.534271240234375, + "rewards/weighted_rejected": -1.3035888671875, + "step": 620 + }, + { + "epoch": 0.32975660821774405, + "grad_norm": 29.253734588623047, + "learning_rate": 8.488379526908368e-07, + "logits/chosen": -0.978869616985321, + "logits/rejected": -0.9867492914199829, + "logps/chosen": -406.9437561035156, + "logps/rejected": -431.9906311035156, + "logps/weighted_chosen": -3.346386671066284, + "logps/weighted_rejected": -4.080664157867432, + "loss": 0.5991, + "rewards/accuracies": 0.65625, + "rewards/chosen": -116.3648452758789, + "rewards/margins": 46.68730545043945, + "rewards/rejected": -163.06405639648438, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.757556140422821, + "rewards/weighted_margins": 0.597582995891571, + "rewards/weighted_rejected": -1.3551514148712158, + "step": 630 + }, + { + "epoch": 0.33499084009421615, + "grad_norm": 28.59862518310547, + "learning_rate": 8.422335836730802e-07, + "logits/chosen": -0.983142077922821, + "logits/rejected": -0.9791107177734375, + "logps/chosen": -378.4984436035156, + "logps/rejected": -432.71563720703125, + "logps/weighted_chosen": -3.094311475753784, + "logps/weighted_rejected": -3.8768067359924316, + "loss": 0.6061, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -104.8597640991211, + "rewards/margins": 50.2001953125, + "rewards/rejected": -155.0695343017578, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -0.6736419796943665, + "rewards/weighted_margins": 0.5768188238143921, + "rewards/weighted_rejected": -1.250451683998108, + "step": 640 + }, + { + "epoch": 0.3402250719706883, + "grad_norm": 20.690876007080078, + "learning_rate": 8.355149114184485e-07, + "logits/chosen": -1.030615210533142, + "logits/rejected": -1.0146636962890625, + "logps/chosen": -416.39373779296875, + "logps/rejected": -445.0625, + "logps/weighted_chosen": -3.255859375, + "logps/weighted_rejected": -3.76806640625, + "loss": 0.6048, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -108.90547180175781, + "rewards/margins": 53.869140625, + "rewards/rejected": -162.74258422851562, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.700115978717804, + "rewards/weighted_margins": 0.5948852300643921, + "rewards/weighted_rejected": -1.2950623035430908, + "step": 650 + }, + { + "epoch": 0.34545930384716045, + "grad_norm": 33.73557662963867, + "learning_rate": 8.286841799088963e-07, + "logits/chosen": -1.0694351196289062, + "logits/rejected": -1.0623047351837158, + "logps/chosen": -395.19219970703125, + "logps/rejected": -412.45001220703125, + "logps/weighted_chosen": -2.8808836936950684, + "logps/weighted_rejected": -3.636523485183716, + "loss": 0.6207, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -111.27070617675781, + "rewards/margins": 38.04765701293945, + "rewards/rejected": -149.3312530517578, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.649340808391571, + "rewards/weighted_margins": 0.5112365484237671, + "rewards/weighted_rejected": -1.1605103015899658, + "step": 660 + }, + { + "epoch": 0.35069353572363254, + "grad_norm": 18.913761138916016, + "learning_rate": 8.217436705532599e-07, + "logits/chosen": -1.0736572742462158, + "logits/rejected": -1.090576171875, + "logps/chosen": -431.78125, + "logps/rejected": -421.4312438964844, + "logps/weighted_chosen": -3.0259766578674316, + "logps/weighted_rejected": -3.81298828125, + "loss": 0.6004, + "rewards/accuracies": 0.640625, + "rewards/chosen": -129.1144561767578, + "rewards/margins": 28.649608612060547, + "rewards/rejected": -157.74844360351562, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.6614929437637329, + "rewards/weighted_margins": 0.636364758014679, + "rewards/weighted_rejected": -1.2976195812225342, + "step": 670 + }, + { + "epoch": 0.3559277676001047, + "grad_norm": 22.967056274414062, + "learning_rate": 8.14695701425284e-07, + "logits/chosen": -1.051629662513733, + "logits/rejected": -1.0860717296600342, + "logps/chosen": -432.98126220703125, + "logps/rejected": -426.5953063964844, + "logps/weighted_chosen": -3.1195311546325684, + "logps/weighted_rejected": -3.96337890625, + "loss": 0.5827, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -127.5308609008789, + "rewards/margins": 36.908592224121094, + "rewards/rejected": -164.46484375, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.686236560344696, + "rewards/weighted_margins": 0.6052306890487671, + "rewards/weighted_rejected": -1.2908813953399658, + "step": 680 + }, + { + "epoch": 0.3611619994765768, + "grad_norm": 15.409049034118652, + "learning_rate": 8.075426264894046e-07, + "logits/chosen": -1.006170630455017, + "logits/rejected": -1.0271179676055908, + "logps/chosen": -434.3828125, + "logps/rejected": -452.90625, + "logps/weighted_chosen": -3.187304735183716, + "logps/weighted_rejected": -4.466699123382568, + "loss": 0.5501, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -128.9619140625, + "rewards/margins": 46.51679611206055, + "rewards/rejected": -175.45468139648438, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -0.6887573003768921, + "rewards/weighted_margins": 0.76141357421875, + "rewards/weighted_rejected": -1.4498474597930908, + "step": 690 + }, + { + "epoch": 0.36639623135304894, + "grad_norm": 23.13039207458496, + "learning_rate": 8.002868348145435e-07, + "logits/chosen": -0.9920509457588196, + "logits/rejected": -0.996777355670929, + "logps/chosen": -439.54998779296875, + "logps/rejected": -444.30780029296875, + "logps/weighted_chosen": -3.31005859375, + "logps/weighted_rejected": -3.691967725753784, + "loss": 0.6102, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -139.38632202148438, + "rewards/margins": 36.769142150878906, + "rewards/rejected": -176.1867218017578, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.8051727414131165, + "rewards/weighted_margins": 0.5639587640762329, + "rewards/weighted_rejected": -1.3695800304412842, + "step": 700 + }, + { + "epoch": 0.3716304632295211, + "grad_norm": 38.38508224487305, + "learning_rate": 7.92930749776179e-07, + "logits/chosen": -1.0172607898712158, + "logits/rejected": -1.042083740234375, + "logps/chosen": -422.10546875, + "logps/rejected": -444.6812438964844, + "logps/weighted_chosen": -3.483569383621216, + "logps/weighted_rejected": -4.216650485992432, + "loss": 0.6316, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -150.42578125, + "rewards/margins": 33.904685974121094, + "rewards/rejected": -184.33438110351562, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.72515869140625, + "rewards/weighted_margins": 0.590161144733429, + "rewards/weighted_rejected": -1.3153502941131592, + "step": 710 + }, + { + "epoch": 0.3768646951059932, + "grad_norm": 20.4711971282959, + "learning_rate": 7.854768282469582e-07, + "logits/chosen": -1.0770995616912842, + "logits/rejected": -1.114935278892517, + "logps/chosen": -401.88592529296875, + "logps/rejected": -458.3374938964844, + "logps/weighted_chosen": -3.099194288253784, + "logps/weighted_rejected": -3.990966796875, + "loss": 0.5978, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -124.43046569824219, + "rewards/margins": 56.217384338378906, + "rewards/rejected": -180.6015625, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.6303802728652954, + "rewards/weighted_margins": 0.679125964641571, + "rewards/weighted_rejected": -1.3104156255722046, + "step": 720 + }, + { + "epoch": 0.38209892698246534, + "grad_norm": 15.2982177734375, + "learning_rate": 7.779275597761215e-07, + "logits/chosen": -1.0406615734100342, + "logits/rejected": -1.097131371498108, + "logps/chosen": -414.5625, + "logps/rejected": -462.5453186035156, + "logps/weighted_chosen": -3.3689942359924316, + "logps/weighted_rejected": -4.090185642242432, + "loss": 0.5689, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -138.33358764648438, + "rewards/margins": 55.842185974121094, + "rewards/rejected": -194.16171264648438, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -0.6730102300643921, + "rewards/weighted_margins": 0.699902355670929, + "rewards/weighted_rejected": -1.373620629310608, + "step": 730 + }, + { + "epoch": 0.38733315885893743, + "grad_norm": 37.01581954956055, + "learning_rate": 7.702854657580126e-07, + "logits/chosen": -1.1022522449493408, + "logits/rejected": -1.1134154796600342, + "logps/chosen": -459.6156311035156, + "logps/rejected": -462.4125061035156, + "logps/weighted_chosen": -3.382946729660034, + "logps/weighted_rejected": -4.401709079742432, + "loss": 0.6148, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -164.8722686767578, + "rewards/margins": 43.28515625, + "rewards/rejected": -208.21133422851562, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.8088958859443665, + "rewards/weighted_margins": 0.698986828327179, + "rewards/weighted_rejected": -1.5074951648712158, + "step": 740 + }, + { + "epoch": 0.3925673907354096, + "grad_norm": 26.924480438232422, + "learning_rate": 7.625530985899547e-07, + "logits/chosen": -1.0611861944198608, + "logits/rejected": -1.075714111328125, + "logps/chosen": -430.6312561035156, + "logps/rejected": -461.9828186035156, + "logps/weighted_chosen": -3.210217237472534, + "logps/weighted_rejected": -4.483691215515137, + "loss": 0.5815, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -163.78555297851562, + "rewards/margins": 53.288673400878906, + "rewards/rejected": -216.96054077148438, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.876666247844696, + "rewards/weighted_margins": 0.7388671636581421, + "rewards/weighted_rejected": -1.6160767078399658, + "step": 750 + }, + { + "epoch": 0.39780162261188173, + "grad_norm": 18.056201934814453, + "learning_rate": 7.547330408197694e-07, + "logits/chosen": -1.0437713861465454, + "logits/rejected": -1.075250267982483, + "logps/chosen": -460.1875, + "logps/rejected": -450.8109436035156, + "logps/weighted_chosen": -3.229296922683716, + "logps/weighted_rejected": -4.191064357757568, + "loss": 0.6146, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -164.56640625, + "rewards/margins": 29.973241806030273, + "rewards/rejected": -194.6171875, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.7951415777206421, + "rewards/weighted_margins": 0.599578857421875, + "rewards/weighted_rejected": -1.395105004310608, + "step": 760 + }, + { + "epoch": 0.40303585448835383, + "grad_norm": 16.393836975097656, + "learning_rate": 7.468279042832271e-07, + "logits/chosen": -1.0454833507537842, + "logits/rejected": -1.0705687999725342, + "logps/chosen": -416.57421875, + "logps/rejected": -483.1390686035156, + "logps/weighted_chosen": -3.134448289871216, + "logps/weighted_rejected": -3.8697752952575684, + "loss": 0.6132, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -136.8953094482422, + "rewards/margins": 56.886329650878906, + "rewards/rejected": -193.72305297851562, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.7508605718612671, + "rewards/weighted_margins": 0.5269104242324829, + "rewards/weighted_rejected": -1.277490258216858, + "step": 770 + }, + { + "epoch": 0.408270086364826, + "grad_norm": 16.254776000976562, + "learning_rate": 7.388403292317154e-07, + "logits/chosen": -1.0213134288787842, + "logits/rejected": -1.072851538658142, + "logps/chosen": -445.00469970703125, + "logps/rejected": -451.1468811035156, + "logps/weighted_chosen": -3.063525438308716, + "logps/weighted_rejected": -3.897021532058716, + "loss": 0.6134, + "rewards/accuracies": 0.609375, + "rewards/chosen": -144.0207061767578, + "rewards/margins": 41.165626525878906, + "rewards/rejected": -185.15585327148438, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.752685546875, + "rewards/weighted_margins": 0.5895751714706421, + "rewards/weighted_rejected": -1.341894507408142, + "step": 780 + }, + { + "epoch": 0.4135043182412981, + "grad_norm": 19.472450256347656, + "learning_rate": 7.307729834504154e-07, + "logits/chosen": -1.052435278892517, + "logits/rejected": -1.10076904296875, + "logps/chosen": -457.2640686035156, + "logps/rejected": -488.16876220703125, + "logps/weighted_chosen": -3.143115282058716, + "logps/weighted_rejected": -4.254638671875, + "loss": 0.6137, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -175.60116577148438, + "rewards/margins": 46.61640548706055, + "rewards/rejected": -222.21328735351562, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.878021240234375, + "rewards/weighted_margins": 0.650500476360321, + "rewards/weighted_rejected": -1.5286986827850342, + "step": 790 + }, + { + "epoch": 0.4187385501177702, + "grad_norm": 24.911523818969727, + "learning_rate": 7.226285613672847e-07, + "logits/chosen": -1.0021483898162842, + "logits/rejected": -1.031951904296875, + "logps/chosen": -466.4765625, + "logps/rejected": -547.578125, + "logps/weighted_chosen": -3.3023438453674316, + "logps/weighted_rejected": -4.483691215515137, + "loss": 0.6142, + "rewards/accuracies": 0.640625, + "rewards/chosen": -189.1164093017578, + "rewards/margins": 79.8050765991211, + "rewards/rejected": -268.85467529296875, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.978710949420929, + "rewards/weighted_margins": 0.6761840581893921, + "rewards/weighted_rejected": -1.6549804210662842, + "step": 800 + }, + { + "epoch": 0.4239727819942423, + "grad_norm": 16.060869216918945, + "learning_rate": 7.144097831531398e-07, + "logits/chosen": -0.978619396686554, + "logits/rejected": -1.0038635730743408, + "logps/chosen": -456.95623779296875, + "logps/rejected": -505.2093811035156, + "logps/weighted_chosen": -3.171826124191284, + "logps/weighted_rejected": -4.205761909484863, + "loss": 0.5645, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -186.97421264648438, + "rewards/margins": 54.0078125, + "rewards/rejected": -240.85311889648438, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.788342297077179, + "rewards/weighted_margins": 0.7865844964981079, + "rewards/weighted_rejected": -1.574896216392517, + "step": 810 + }, + { + "epoch": 0.42920701387071447, + "grad_norm": 23.292619705200195, + "learning_rate": 7.061193938131396e-07, + "logits/chosen": -0.9266418218612671, + "logits/rejected": -0.9759277105331421, + "logps/chosen": -492.421875, + "logps/rejected": -497.2515563964844, + "logps/weighted_chosen": -3.4576172828674316, + "logps/weighted_rejected": -4.116650581359863, + "loss": 0.5666, + "rewards/accuracies": 0.578125, + "rewards/chosen": -192.64022827148438, + "rewards/margins": 39.222267150878906, + "rewards/rejected": -231.8562469482422, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.797924816608429, + "rewards/weighted_margins": 0.775280773639679, + "rewards/weighted_rejected": -1.573089599609375, + "step": 820 + }, + { + "epoch": 0.4344412457471866, + "grad_norm": 23.763275146484375, + "learning_rate": 6.977601622699789e-07, + "logits/chosen": -0.9908691644668579, + "logits/rejected": -1.057653784751892, + "logps/chosen": -459.80157470703125, + "logps/rejected": -544.8687744140625, + "logps/weighted_chosen": -3.350512742996216, + "logps/weighted_rejected": -4.385839939117432, + "loss": 0.5077, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -175.025390625, + "rewards/margins": 90.8042984008789, + "rewards/rejected": -265.86798095703125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -0.699688732624054, + "rewards/weighted_margins": 0.9197998046875, + "rewards/weighted_rejected": -1.61993408203125, + "step": 830 + }, + { + "epoch": 0.4396754776236587, + "grad_norm": 29.07372283935547, + "learning_rate": 6.893348804390882e-07, + "logits/chosen": -1.094964623451233, + "logits/rejected": -1.1045074462890625, + "logps/chosen": -521.2327880859375, + "logps/rejected": -545.9468994140625, + "logps/weighted_chosen": -3.6615967750549316, + "logps/weighted_rejected": -4.320361137390137, + "loss": 0.5747, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -225.86563110351562, + "rewards/margins": 60.388671875, + "rewards/rejected": -286.2398376464844, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -0.912017822265625, + "rewards/weighted_margins": 0.748242199420929, + "rewards/weighted_rejected": -1.660058617591858, + "step": 840 + }, + { + "epoch": 0.44490970950013087, + "grad_norm": 23.878381729125977, + "learning_rate": 6.808463622961578e-07, + "logits/chosen": -1.0891234874725342, + "logits/rejected": -1.1309936046600342, + "logps/chosen": -545.3046875, + "logps/rejected": -615.7484130859375, + "logps/weighted_chosen": -3.533984422683716, + "logps/weighted_rejected": -4.504004001617432, + "loss": 0.5472, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -252.8015594482422, + "rewards/margins": 81.0531234741211, + "rewards/rejected": -333.59295654296875, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -0.967456042766571, + "rewards/weighted_margins": 0.86865234375, + "rewards/weighted_rejected": -1.835351586341858, + "step": 850 + }, + { + "epoch": 0.45014394137660296, + "grad_norm": 101.535888671875, + "learning_rate": 6.722974429372925e-07, + "logits/chosen": -1.0688354969024658, + "logits/rejected": -1.1046874523162842, + "logps/chosen": -578.9781494140625, + "logps/rejected": -604.3499755859375, + "logps/weighted_chosen": -3.37939453125, + "logps/weighted_rejected": -5.040380954742432, + "loss": 0.5051, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -275.2124938964844, + "rewards/margins": 66.9710922241211, + "rewards/rejected": -342.20001220703125, + "rewards/weighted_accuracies": 0.7749999761581421, + "rewards/weighted_chosen": -1.086279273033142, + "rewards/weighted_margins": 1.05950927734375, + "rewards/weighted_rejected": -2.1461181640625, + "step": 860 + }, + { + "epoch": 0.4553781732530751, + "grad_norm": 66.56680297851562, + "learning_rate": 6.636909776321128e-07, + "logits/chosen": -1.1214478015899658, + "logits/rejected": -1.11016845703125, + "logps/chosen": -493.3843688964844, + "logps/rejected": -590.8125, + "logps/weighted_chosen": -3.670654296875, + "logps/weighted_rejected": -4.723730564117432, + "loss": 0.5107, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -231.6789093017578, + "rewards/margins": 94.42109680175781, + "rewards/rejected": -326.2250061035156, + "rewards/weighted_accuracies": 0.746874988079071, + "rewards/weighted_chosen": -1.0464904308319092, + "rewards/weighted_margins": 0.9440551996231079, + "rewards/weighted_rejected": -1.99102783203125, + "step": 870 + }, + { + "epoch": 0.46061240512954726, + "grad_norm": 23.494997024536133, + "learning_rate": 6.550298408701174e-07, + "logits/chosen": -1.094885230064392, + "logits/rejected": -1.1415894031524658, + "logps/chosen": -534.1663818359375, + "logps/rejected": -621.9547119140625, + "logps/weighted_chosen": -3.794140577316284, + "logps/weighted_rejected": -5.148291110992432, + "loss": 0.5174, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -247.2595672607422, + "rewards/margins": 92.109375, + "rewards/rejected": -339.3968811035156, + "rewards/weighted_accuracies": 0.753125011920929, + "rewards/weighted_chosen": -0.9449707269668579, + "rewards/weighted_margins": 0.938586413860321, + "rewards/weighted_rejected": -1.8829224109649658, + "step": 880 + }, + { + "epoch": 0.46584663700601936, + "grad_norm": 27.359371185302734, + "learning_rate": 6.463169254006276e-07, + "logits/chosen": -1.1160705089569092, + "logits/rejected": -1.157629370689392, + "logps/chosen": -538.1109619140625, + "logps/rejected": -562.7062377929688, + "logps/weighted_chosen": -3.73779296875, + "logps/weighted_rejected": -4.817724704742432, + "loss": 0.5154, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -256.6499938964844, + "rewards/margins": 59.66523361206055, + "rewards/rejected": -316.37225341796875, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.019891381263733, + "rewards/weighted_margins": 1.008874535560608, + "rewards/weighted_rejected": -2.028430223464966, + "step": 890 + }, + { + "epoch": 0.4710808688824915, + "grad_norm": 20.988025665283203, + "learning_rate": 6.375551412666326e-07, + "logits/chosen": -1.0879943370819092, + "logits/rejected": -1.1199951171875, + "logps/chosen": -514.9187622070312, + "logps/rejected": -545.7906494140625, + "logps/weighted_chosen": -3.4129395484924316, + "logps/weighted_rejected": -4.592138767242432, + "loss": 0.6113, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -240.97891235351562, + "rewards/margins": 48.454689025878906, + "rewards/rejected": -289.4117126464844, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.1000854969024658, + "rewards/weighted_margins": 0.7867187261581421, + "rewards/weighted_rejected": -1.887121558189392, + "step": 900 + }, + { + "epoch": 0.4763151007589636, + "grad_norm": 29.907148361206055, + "learning_rate": 6.287474148328583e-07, + "logits/chosen": -1.0193603038787842, + "logits/rejected": -0.9993133544921875, + "logps/chosen": -474.3374938964844, + "logps/rejected": -501.23748779296875, + "logps/weighted_chosen": -3.7416014671325684, + "logps/weighted_rejected": -5.238329887390137, + "loss": 0.5727, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -203.36563110351562, + "rewards/margins": 50.95781326293945, + "rewards/rejected": -254.2734375, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.043182373046875, + "rewards/weighted_margins": 0.8107665777206421, + "rewards/weighted_rejected": -1.8539307117462158, + "step": 910 + }, + { + "epoch": 0.48154933263543576, + "grad_norm": 36.32797622680664, + "learning_rate": 6.198966878083857e-07, + "logits/chosen": -1.0350799560546875, + "logits/rejected": -1.0523681640625, + "logps/chosen": -488.9765625, + "logps/rejected": -553.484375, + "logps/weighted_chosen": -3.7232666015625, + "logps/weighted_rejected": -4.598974704742432, + "loss": 0.5581, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -217.74844360351562, + "rewards/margins": 68.5667953491211, + "rewards/rejected": -286.3515625, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -1.01214599609375, + "rewards/weighted_margins": 0.775787353515625, + "rewards/weighted_rejected": -1.787988305091858, + "step": 920 + }, + { + "epoch": 0.48678356451190785, + "grad_norm": 40.16273880004883, + "learning_rate": 6.110059162641439e-07, + "logits/chosen": -1.0597412586212158, + "logits/rejected": -1.0781066417694092, + "logps/chosen": -513.3226318359375, + "logps/rejected": -559.4593505859375, + "logps/weighted_chosen": -3.147705078125, + "logps/weighted_rejected": -4.214404106140137, + "loss": 0.5481, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -231.1085968017578, + "rewards/margins": 61.108985900878906, + "rewards/rejected": -292.21954345703125, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -0.8689819574356079, + "rewards/weighted_margins": 0.7522827386856079, + "rewards/weighted_rejected": -1.6212646961212158, + "step": 930 + }, + { + "epoch": 0.49201779638838, + "grad_norm": 29.65454864501953, + "learning_rate": 6.020780696456059e-07, + "logits/chosen": -1.072198510169983, + "logits/rejected": -1.104650855064392, + "logps/chosen": -511.18280029296875, + "logps/rejected": -601.0250244140625, + "logps/weighted_chosen": -3.1954102516174316, + "logps/weighted_rejected": -4.517724514007568, + "loss": 0.5407, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -240.52108764648438, + "rewards/margins": 93.419921875, + "rewards/rejected": -333.953125, + "rewards/weighted_accuracies": 0.731249988079071, + "rewards/weighted_chosen": -1.0927855968475342, + "rewards/weighted_margins": 0.8496459722518921, + "rewards/weighted_rejected": -1.942968726158142, + "step": 940 + }, + { + "epoch": 0.49725202826485215, + "grad_norm": 210.50332641601562, + "learning_rate": 5.931161297810185e-07, + "logits/chosen": -1.132635474205017, + "logits/rejected": -1.1451904773712158, + "logps/chosen": -574.6031494140625, + "logps/rejected": -629.4656372070312, + "logps/weighted_chosen": -4.126172065734863, + "logps/weighted_rejected": -5.016747951507568, + "loss": 0.5998, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -301.52264404296875, + "rewards/margins": 66.5218734741211, + "rewards/rejected": -368.080078125, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -1.307519555091858, + "rewards/weighted_margins": 0.74249267578125, + "rewards/weighted_rejected": -2.0491180419921875, + "step": 950 + }, + { + "epoch": 0.5024862601413242, + "grad_norm": 113.20726013183594, + "learning_rate": 5.841230898854959e-07, + "logits/chosen": -1.070550560951233, + "logits/rejected": -1.0872802734375, + "logps/chosen": -652.0281372070312, + "logps/rejected": -711.1765747070312, + "logps/weighted_chosen": -3.9981932640075684, + "logps/weighted_rejected": -5.147070407867432, + "loss": 0.6329, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -351.828125, + "rewards/margins": 97.4195327758789, + "rewards/rejected": -449.2261657714844, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.564361572265625, + "rewards/weighted_margins": 0.872241199016571, + "rewards/weighted_rejected": -2.436718702316284, + "step": 960 + }, + { + "epoch": 0.5077204920177963, + "grad_norm": 22.200820922851562, + "learning_rate": 5.751019535613102e-07, + "logits/chosen": -0.987274169921875, + "logits/rejected": -1.0052611827850342, + "logps/chosen": -531.3624877929688, + "logps/rejected": -610.1218872070312, + "logps/weighted_chosen": -3.8460450172424316, + "logps/weighted_rejected": -5.256982326507568, + "loss": 0.5675, + "rewards/accuracies": 0.65625, + "rewards/chosen": -271.6910095214844, + "rewards/margins": 91.53633117675781, + "rewards/rejected": -363.22344970703125, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -1.357843041419983, + "rewards/weighted_margins": 1.01031494140625, + "rewards/weighted_rejected": -2.367443799972534, + "step": 970 + }, + { + "epoch": 0.5129547238942685, + "grad_norm": 17.362323760986328, + "learning_rate": 5.660557337947117e-07, + "logits/chosen": -0.9707549810409546, + "logits/rejected": -0.983325183391571, + "logps/chosen": -549.1953125, + "logps/rejected": -586.7874755859375, + "logps/weighted_chosen": -3.279223680496216, + "logps/weighted_rejected": -4.507519721984863, + "loss": 0.5466, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -254.6687469482422, + "rewards/margins": 74.56758117675781, + "rewards/rejected": -329.2632751464844, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -1.0866820812225342, + "rewards/weighted_margins": 0.8655151128768921, + "rewards/weighted_rejected": -1.9528076648712158, + "step": 980 + }, + { + "epoch": 0.5181889557707406, + "grad_norm": 39.01738739013672, + "learning_rate": 5.569874519496174e-07, + "logits/chosen": -0.963134765625, + "logits/rejected": -1.01763916015625, + "logps/chosen": -488.7406311035156, + "logps/rejected": -554.3687744140625, + "logps/weighted_chosen": -3.665576219558716, + "logps/weighted_rejected": -4.876318454742432, + "loss": 0.5929, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -213.4460906982422, + "rewards/margins": 73.8890609741211, + "rewards/rejected": -287.3500061035156, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.959338366985321, + "rewards/weighted_margins": 0.7813965082168579, + "rewards/weighted_rejected": -1.740045189857483, + "step": 990 + }, + { + "epoch": 0.5234231876472127, + "grad_norm": 33.2608642578125, + "learning_rate": 5.47900136758499e-07, + "logits/chosen": -0.9298340082168579, + "logits/rejected": -0.989898681640625, + "logps/chosen": -527.16015625, + "logps/rejected": -566.2453002929688, + "logps/weighted_chosen": -3.71044921875, + "logps/weighted_rejected": -4.887304782867432, + "loss": 0.5395, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -259.56170654296875, + "rewards/margins": 64.66015625, + "rewards/rejected": -324.302734375, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.119836449623108, + "rewards/weighted_margins": 0.944445788860321, + "rewards/weighted_rejected": -2.063854932785034, + "step": 1000 + }, + { + "epoch": 0.5234231876472127, + "eval_logits/chosen": -1.0472733974456787, + "eval_logits/rejected": -1.0595996379852295, + "eval_logps/chosen": -573.6119995117188, + "eval_logps/rejected": -629.1840209960938, + "eval_logps/weighted_chosen": -3.815713405609131, + "eval_logps/weighted_rejected": -4.930161476135254, + "eval_loss": 0.5728335976600647, + "eval_rewards/accuracies": 0.5989999771118164, + "eval_rewards/chosen": -284.8971252441406, + "eval_rewards/margins": 76.59700012207031, + "eval_rewards/rejected": -361.5224914550781, + "eval_rewards/weighted_accuracies": 0.7070000171661377, + "eval_rewards/weighted_chosen": -1.2679998874664307, + "eval_rewards/weighted_margins": 0.8639541268348694, + "eval_rewards/weighted_rejected": -2.1319541931152344, + "eval_runtime": 1366.4223, + "eval_samples_per_second": 1.464, + "eval_steps_per_second": 0.366, + "step": 1000 + }, + { + "epoch": 0.528657419523685, + "grad_norm": 30.763290405273438, + "learning_rate": 5.387968233108113e-07, + "logits/chosen": -0.9412124752998352, + "logits/rejected": -0.9331512451171875, + "logps/chosen": -583.2468872070312, + "logps/rejected": -633.9749755859375, + "logps/weighted_chosen": -4.168408393859863, + "logps/weighted_rejected": -5.263281345367432, + "loss": 0.5622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -298.86407470703125, + "rewards/margins": 80.30000305175781, + "rewards/rejected": -379.27032470703125, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -1.356286644935608, + "rewards/weighted_margins": 0.906384289264679, + "rewards/weighted_rejected": -2.262927293777466, + "step": 1010 + }, + { + "epoch": 0.533891651400157, + "grad_norm": 18.650068283081055, + "learning_rate": 5.296805520392962e-07, + "logits/chosen": -1.010156273841858, + "logits/rejected": -1.0419880151748657, + "logps/chosen": -606.2078247070312, + "logps/rejected": -638.6656494140625, + "logps/weighted_chosen": -3.4315428733825684, + "logps/weighted_rejected": -4.852490425109863, + "loss": 0.6285, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -293.66796875, + "rewards/margins": 62.296485900878906, + "rewards/rejected": -355.8941345214844, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -1.4029419422149658, + "rewards/weighted_margins": 0.724365234375, + "rewards/weighted_rejected": -2.1269164085388184, + "step": 1020 + }, + { + "epoch": 0.5391258832766291, + "grad_norm": 28.17354393005371, + "learning_rate": 5.205543677045049e-07, + "logits/chosen": -0.9372314214706421, + "logits/rejected": -0.974993884563446, + "logps/chosen": -493.52655029296875, + "logps/rejected": -522.546875, + "logps/weighted_chosen": -3.700146436691284, + "logps/weighted_rejected": -4.827466011047363, + "loss": 0.5313, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -219.2234344482422, + "rewards/margins": 58.09453201293945, + "rewards/rejected": -277.33203125, + "rewards/weighted_accuracies": 0.7593749761581421, + "rewards/weighted_chosen": -1.0529053211212158, + "rewards/weighted_margins": 0.86126708984375, + "rewards/weighted_rejected": -1.9149185419082642, + "step": 1030 + }, + { + "epoch": 0.5443601151531012, + "grad_norm": 40.029666900634766, + "learning_rate": 5.114213183778697e-07, + "logits/chosen": -1.017327904701233, + "logits/rejected": -1.0485351085662842, + "logps/chosen": -514.8624877929688, + "logps/rejected": -569.7859497070312, + "logps/weighted_chosen": -4.155713081359863, + "logps/weighted_rejected": -4.990136623382568, + "loss": 0.5441, + "rewards/accuracies": 0.609375, + "rewards/chosen": -234.3312530517578, + "rewards/margins": 78.56758117675781, + "rewards/rejected": -312.8812561035156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.097131371498108, + "rewards/weighted_margins": 0.8939269781112671, + "rewards/weighted_rejected": -1.9906127452850342, + "step": 1040 + }, + { + "epoch": 0.5495943470295734, + "grad_norm": 37.25775146484375, + "learning_rate": 5.022844544236754e-07, + "logits/chosen": -0.9515380859375, + "logits/rejected": -0.961352527141571, + "logps/chosen": -573.2562255859375, + "logps/rejected": -641.7312622070312, + "logps/weighted_chosen": -4.061865329742432, + "logps/weighted_rejected": -5.167675971984863, + "loss": 0.5774, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -293.25311279296875, + "rewards/margins": 93.615234375, + "rewards/rejected": -386.7679748535156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.2718932628631592, + "rewards/weighted_margins": 0.8445068597793579, + "rewards/weighted_rejected": -2.1155028343200684, + "step": 1050 + }, + { + "epoch": 0.5548285789060455, + "grad_norm": 26.58415985107422, + "learning_rate": 4.931468274802608e-07, + "logits/chosen": -0.9689911007881165, + "logits/rejected": -0.9828445315361023, + "logps/chosen": -585.3031005859375, + "logps/rejected": -649.8265380859375, + "logps/weighted_chosen": -3.440234422683716, + "logps/weighted_rejected": -4.7862548828125, + "loss": 0.5493, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -303.04412841796875, + "rewards/margins": 85.91679382324219, + "rewards/rejected": -388.83087158203125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -1.2489440441131592, + "rewards/weighted_margins": 0.8478637933731079, + "rewards/weighted_rejected": -2.095629930496216, + "step": 1060 + }, + { + "epoch": 0.5600628107825176, + "grad_norm": 25.14666175842285, + "learning_rate": 4.840114894407974e-07, + "logits/chosen": -0.9988906979560852, + "logits/rejected": -1.0262877941131592, + "logps/chosen": -564.2750244140625, + "logps/rejected": -604.7640380859375, + "logps/weighted_chosen": -3.8533082008361816, + "logps/weighted_rejected": -4.584790229797363, + "loss": 0.5612, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -287.8531188964844, + "rewards/margins": 65.25312805175781, + "rewards/rejected": -353.18670654296875, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.2451751232147217, + "rewards/weighted_margins": 0.882397472858429, + "rewards/weighted_rejected": -2.1273193359375, + "step": 1070 + }, + { + "epoch": 0.5652970426589898, + "grad_norm": 34.41138458251953, + "learning_rate": 4.748814914339811e-07, + "logits/chosen": -0.9615001678466797, + "logits/rejected": -0.990710437297821, + "logps/chosen": -606.1984252929688, + "logps/rejected": -647.2062377929688, + "logps/weighted_chosen": -3.8893065452575684, + "logps/weighted_rejected": -4.611474514007568, + "loss": 0.5687, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -321.95098876953125, + "rewards/margins": 69.9859390258789, + "rewards/rejected": -391.935546875, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.2852051258087158, + "rewards/weighted_margins": 0.868273913860321, + "rewards/weighted_rejected": -2.153552293777466, + "step": 1080 + }, + { + "epoch": 0.5705312745354619, + "grad_norm": 20.902027130126953, + "learning_rate": 4.657598828049801e-07, + "logits/chosen": -1.0034713745117188, + "logits/rejected": -1.0612213611602783, + "logps/chosen": -613.8250122070312, + "logps/rejected": -699.54296875, + "logps/weighted_chosen": -3.7084593772888184, + "logps/weighted_rejected": -4.5335693359375, + "loss": 0.537, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -323.48028564453125, + "rewards/margins": 95.4664077758789, + "rewards/rejected": -418.94842529296875, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.2582886219024658, + "rewards/weighted_margins": 0.9008544683456421, + "rewards/weighted_rejected": -2.15960693359375, + "step": 1090 + }, + { + "epoch": 0.575765506411934, + "grad_norm": 40.39773178100586, + "learning_rate": 4.566497100969792e-07, + "logits/chosen": -0.9749755859375, + "logits/rejected": -0.9959548711776733, + "logps/chosen": -720.859375, + "logps/rejected": -785.3062744140625, + "logps/weighted_chosen": -4.180810451507568, + "logps/weighted_rejected": -5.250244140625, + "loss": 0.5644, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -398.22674560546875, + "rewards/margins": 88.7249984741211, + "rewards/rejected": -486.95782470703125, + "rewards/weighted_accuracies": 0.778124988079071, + "rewards/weighted_chosen": -1.482934594154358, + "rewards/weighted_margins": 0.999176025390625, + "rewards/weighted_rejected": -2.483081102371216, + "step": 1100 + }, + { + "epoch": 0.5809997382884062, + "grad_norm": 31.201040267944336, + "learning_rate": 4.475540160336576e-07, + "logits/chosen": -0.992321789264679, + "logits/rejected": -1.0310242176055908, + "logps/chosen": -624.4796752929688, + "logps/rejected": -671.109375, + "logps/weighted_chosen": -4.22021484375, + "logps/weighted_rejected": -5.428515434265137, + "loss": 0.5427, + "rewards/accuracies": 0.609375, + "rewards/chosen": -326.1617126464844, + "rewards/margins": 86.7378921508789, + "rewards/rejected": -412.96405029296875, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.3425171375274658, + "rewards/weighted_margins": 1.0147826671600342, + "rewards/weighted_rejected": -2.3572998046875, + "step": 1110 + }, + { + "epoch": 0.5862339701648783, + "grad_norm": 47.25414276123047, + "learning_rate": 4.3847583850294565e-07, + "logits/chosen": -0.9623962640762329, + "logits/rejected": -0.9765838384628296, + "logps/chosen": -671.4148559570312, + "logps/rejected": -715.609375, + "logps/weighted_chosen": -4.4444580078125, + "logps/weighted_rejected": -5.09375, + "loss": 0.5772, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -380.1949157714844, + "rewards/margins": 75.4898452758789, + "rewards/rejected": -455.4906311035156, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.4877197742462158, + "rewards/weighted_margins": 0.929028332233429, + "rewards/weighted_rejected": -2.4179930686950684, + "step": 1120 + }, + { + "epoch": 0.5914682020413504, + "grad_norm": 30.387371063232422, + "learning_rate": 4.294182095423934e-07, + "logits/chosen": -0.939868152141571, + "logits/rejected": -0.9976135492324829, + "logps/chosen": -623.6375122070312, + "logps/rejected": -687.7578125, + "logps/weighted_chosen": -3.7587890625, + "logps/weighted_rejected": -4.962597846984863, + "loss": 0.5553, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -338.00079345703125, + "rewards/margins": 88.7320327758789, + "rewards/rejected": -426.6976623535156, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.33770751953125, + "rewards/weighted_margins": 0.9156738519668579, + "rewards/weighted_rejected": -2.252673387527466, + "step": 1130 + }, + { + "epoch": 0.5967024339178225, + "grad_norm": 15.42784309387207, + "learning_rate": 4.20384154326496e-07, + "logits/chosen": -0.9435394406318665, + "logits/rejected": -0.9906860589981079, + "logps/chosen": -516.5921630859375, + "logps/rejected": -537.7835693359375, + "logps/weighted_chosen": -3.5862059593200684, + "logps/weighted_rejected": -4.702197074890137, + "loss": 0.5867, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -255.84805297851562, + "rewards/margins": 46.25468826293945, + "rewards/rejected": -302.1802673339844, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.0679458379745483, + "rewards/weighted_margins": 0.756103515625, + "rewards/weighted_rejected": -1.824121117591858, + "step": 1140 + }, + { + "epoch": 0.6019366657942947, + "grad_norm": 27.973642349243164, + "learning_rate": 4.1137669015630863e-07, + "logits/chosen": -0.9399688839912415, + "logits/rejected": -0.9874938726425171, + "logps/chosen": -538.5843505859375, + "logps/rejected": -606.5062255859375, + "logps/weighted_chosen": -3.3528809547424316, + "logps/weighted_rejected": -4.451220512390137, + "loss": 0.5538, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -242.4329071044922, + "rewards/margins": 80.2808609008789, + "rewards/rejected": -322.6656188964844, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.035125732421875, + "rewards/weighted_margins": 0.787548840045929, + "rewards/weighted_rejected": -1.822839379310608, + "step": 1150 + }, + { + "epoch": 0.6071708976707668, + "grad_norm": 16.80686378479004, + "learning_rate": 4.023988254516943e-07, + "logits/chosen": -0.9526001214981079, + "logits/rejected": -1.002233862876892, + "logps/chosen": -565.6570434570312, + "logps/rejected": -599.0374755859375, + "logps/weighted_chosen": -3.925537109375, + "logps/weighted_rejected": -4.567919731140137, + "loss": 0.4945, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -264.87188720703125, + "rewards/margins": 67.05058288574219, + "rewards/rejected": -331.87774658203125, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -0.928997814655304, + "rewards/weighted_margins": 0.8846801519393921, + "rewards/weighted_rejected": -1.813256859779358, + "step": 1160 + }, + { + "epoch": 0.6124051295472389, + "grad_norm": 26.579771041870117, + "learning_rate": 3.9345355874653366e-07, + "logits/chosen": -0.964202880859375, + "logits/rejected": -0.984423816204071, + "logps/chosen": -594.9468994140625, + "logps/rejected": -598.2398681640625, + "logps/weighted_chosen": -3.7232666015625, + "logps/weighted_rejected": -4.643334865570068, + "loss": 0.6137, + "rewards/accuracies": 0.578125, + "rewards/chosen": -297.5835876464844, + "rewards/margins": 50.535545349121094, + "rewards/rejected": -348.0869140625, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -1.2086670398712158, + "rewards/weighted_margins": 0.742016613483429, + "rewards/weighted_rejected": -1.9512207508087158, + "step": 1170 + }, + { + "epoch": 0.6176393614237111, + "grad_norm": 30.812177658081055, + "learning_rate": 3.8454387768724157e-07, + "logits/chosen": -1.005767822265625, + "logits/rejected": -1.008856177330017, + "logps/chosen": -506.8109436035156, + "logps/rejected": -517.0929565429688, + "logps/weighted_chosen": -3.8594727516174316, + "logps/weighted_rejected": -4.857763767242432, + "loss": 0.5536, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -233.98828125, + "rewards/margins": 52.480857849121094, + "rewards/rejected": -286.2953186035156, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.0302002429962158, + "rewards/weighted_margins": 0.875408947467804, + "rewards/weighted_rejected": -1.905310034751892, + "step": 1180 + }, + { + "epoch": 0.6228735933001832, + "grad_norm": 21.219039916992188, + "learning_rate": 3.7567275803491525e-07, + "logits/chosen": -1.0049774646759033, + "logits/rejected": -1.033941626548767, + "logps/chosen": -568.109375, + "logps/rejected": -577.7015380859375, + "logps/weighted_chosen": -3.424023389816284, + "logps/weighted_rejected": -4.737890720367432, + "loss": 0.5183, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -265.75665283203125, + "rewards/margins": 58.340232849121094, + "rewards/rejected": -324.1148376464844, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -0.9457031488418579, + "rewards/weighted_margins": 0.9256957769393921, + "rewards/weighted_rejected": -1.871618628501892, + "step": 1190 + }, + { + "epoch": 0.6281078251766553, + "grad_norm": 24.416122436523438, + "learning_rate": 3.66843162671456e-07, + "logits/chosen": -0.992877185344696, + "logits/rejected": -1.0040404796600342, + "logps/chosen": -542.8703002929688, + "logps/rejected": -650.1124877929688, + "logps/weighted_chosen": -4.203027248382568, + "logps/weighted_rejected": -4.642626762390137, + "loss": 0.6415, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -268.76171875, + "rewards/margins": 109.0687484741211, + "rewards/rejected": -377.92889404296875, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -1.240747094154358, + "rewards/weighted_margins": 0.7083190679550171, + "rewards/weighted_rejected": -1.9493834972381592, + "step": 1200 + }, + { + "epoch": 0.6333420570531274, + "grad_norm": 30.961528778076172, + "learning_rate": 3.5805804060998924e-07, + "logits/chosen": -0.988171398639679, + "logits/rejected": -1.0112731456756592, + "logps/chosen": -576.0609130859375, + "logps/rejected": -664.0671997070312, + "logps/weighted_chosen": -3.3628907203674316, + "logps/weighted_rejected": -4.699365139007568, + "loss": 0.5317, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -275.33905029296875, + "rewards/margins": 118.02656555175781, + "rewards/rejected": -393.25311279296875, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -1.125707983970642, + "rewards/weighted_margins": 0.971728503704071, + "rewards/weighted_rejected": -2.0982666015625, + "step": 1210 + }, + { + "epoch": 0.6385762889295996, + "grad_norm": 41.25253677368164, + "learning_rate": 3.493203260099197e-07, + "logits/chosen": -0.99005126953125, + "logits/rejected": -1.052459716796875, + "logps/chosen": -616.2062377929688, + "logps/rejected": -658.71875, + "logps/weighted_chosen": -3.477587938308716, + "logps/weighted_rejected": -4.777050971984863, + "loss": 0.565, + "rewards/accuracies": 0.578125, + "rewards/chosen": -328.61328125, + "rewards/margins": 57.30156326293945, + "rewards/rejected": -385.90899658203125, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.273229956626892, + "rewards/weighted_margins": 0.817840576171875, + "rewards/weighted_rejected": -2.0914306640625, + "step": 1220 + }, + { + "epoch": 0.6438105208060717, + "grad_norm": 44.952823638916016, + "learning_rate": 3.4063293719694407e-07, + "logits/chosen": -0.9899932742118835, + "logits/rejected": -1.038726806640625, + "logps/chosen": -558.2008056640625, + "logps/rejected": -622.3187255859375, + "logps/weighted_chosen": -3.8162598609924316, + "logps/weighted_rejected": -4.878759860992432, + "loss": 0.5962, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -281.5263671875, + "rewards/margins": 83.4378890991211, + "rewards/rejected": -365.0884704589844, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.238305687904358, + "rewards/weighted_margins": 0.806610107421875, + "rewards/weighted_rejected": -2.045300245285034, + "step": 1230 + }, + { + "epoch": 0.6490447526825438, + "grad_norm": 17.675373077392578, + "learning_rate": 3.319987756883559e-07, + "logits/chosen": -1.0293700695037842, + "logits/rejected": -1.05963134765625, + "logps/chosen": -563.71875, + "logps/rejected": -646.96875, + "logps/weighted_chosen": -3.506591796875, + "logps/weighted_rejected": -4.776757717132568, + "loss": 0.5093, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -286.01483154296875, + "rewards/margins": 91.8824234008789, + "rewards/rejected": -377.9115295410156, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -1.059393286705017, + "rewards/weighted_margins": 1.0341796875, + "rewards/weighted_rejected": -2.09356689453125, + "step": 1240 + }, + { + "epoch": 0.654278984559016, + "grad_norm": 30.464399337768555, + "learning_rate": 3.234207252239607e-07, + "logits/chosen": -1.0212494134902954, + "logits/rejected": -1.053070068359375, + "logps/chosen": -624.4281005859375, + "logps/rejected": -653.6124877929688, + "logps/weighted_chosen": -4.078759670257568, + "logps/weighted_rejected": -4.910693168640137, + "loss": 0.5845, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -331.91796875, + "rewards/margins": 68.6617202758789, + "rewards/rejected": -400.6312561035156, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.3435547351837158, + "rewards/weighted_margins": 0.7761596441268921, + "rewards/weighted_rejected": -2.1199707984924316, + "step": 1250 + }, + { + "epoch": 0.6595132164354881, + "grad_norm": 22.67099380493164, + "learning_rate": 3.1490165080293175e-07, + "logits/chosen": -1.032771348953247, + "logits/rejected": -1.0862915515899658, + "logps/chosen": -546.2867431640625, + "logps/rejected": -636.9609375, + "logps/weighted_chosen": -3.594482421875, + "logps/weighted_rejected": -4.584668159484863, + "loss": 0.5515, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -278.97149658203125, + "rewards/margins": 94.80000305175781, + "rewards/rejected": -373.8335876464844, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.1841598749160767, + "rewards/weighted_margins": 0.892077624797821, + "rewards/weighted_rejected": -2.0762572288513184, + "step": 1260 + }, + { + "epoch": 0.6647474483119602, + "grad_norm": 18.550798416137695, + "learning_rate": 3.06444397726922e-07, + "logits/chosen": -1.000738501548767, + "logits/rejected": -1.0697616338729858, + "logps/chosen": -617.4578247070312, + "logps/rejected": -667.5968627929688, + "logps/weighted_chosen": -3.587646484375, + "logps/weighted_rejected": -5.254980564117432, + "loss": 0.5184, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -321.51385498046875, + "rewards/margins": 83.83515930175781, + "rewards/rejected": -405.3265686035156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.1221191883087158, + "rewards/weighted_margins": 1.162841796875, + "rewards/weighted_rejected": -2.2856812477111816, + "step": 1270 + }, + { + "epoch": 0.6699816801884323, + "grad_norm": 22.748411178588867, + "learning_rate": 2.980517906497586e-07, + "logits/chosen": -1.0525604486465454, + "logits/rejected": -1.106359839439392, + "logps/chosen": -608.9031372070312, + "logps/rejected": -702.9249877929688, + "logps/weighted_chosen": -3.8238282203674316, + "logps/weighted_rejected": -5.245898246765137, + "loss": 0.5269, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -316.0335998535156, + "rewards/margins": 114.6685562133789, + "rewards/rejected": -430.79608154296875, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -1.237799048423767, + "rewards/weighted_margins": 0.95733642578125, + "rewards/weighted_rejected": -2.19537353515625, + "step": 1280 + }, + { + "epoch": 0.6752159120649045, + "grad_norm": 25.510196685791016, + "learning_rate": 2.89726632634029e-07, + "logits/chosen": -1.055084228515625, + "logits/rejected": -1.07818603515625, + "logps/chosen": -630.3406372070312, + "logps/rejected": -696.7406005859375, + "logps/weighted_chosen": -3.6622557640075684, + "logps/weighted_rejected": -4.793408393859863, + "loss": 0.5721, + "rewards/accuracies": 0.59375, + "rewards/chosen": -356.0171813964844, + "rewards/margins": 85.5914077758789, + "rewards/rejected": -441.40625, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.368402123451233, + "rewards/weighted_margins": 0.8181518316268921, + "rewards/weighted_rejected": -2.1871337890625, + "step": 1290 + }, + { + "epoch": 0.6804501439413766, + "grad_norm": 3403.25146484375, + "learning_rate": 2.814717042148827e-07, + "logits/chosen": -1.0733153820037842, + "logits/rejected": -1.1039886474609375, + "logps/chosen": -579.2171630859375, + "logps/rejected": -664.2179565429688, + "logps/weighted_chosen": -4.351758003234863, + "logps/weighted_rejected": -5.040478706359863, + "loss": 0.5988, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": -305.47967529296875, + "rewards/margins": 97.9154281616211, + "rewards/rejected": -403.4019470214844, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.3027832508087158, + "rewards/weighted_margins": 0.817980945110321, + "rewards/weighted_rejected": -2.1208739280700684, + "step": 1300 + }, + { + "epoch": 0.6856843758178487, + "grad_norm": 17.00541877746582, + "learning_rate": 2.7328976247135416e-07, + "logits/chosen": -1.098138451576233, + "logits/rejected": -1.1229279041290283, + "logps/chosen": -568.8577880859375, + "logps/rejected": -613.6781005859375, + "logps/weighted_chosen": -3.7420411109924316, + "logps/weighted_rejected": -4.709765434265137, + "loss": 0.6077, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -279.4331970214844, + "rewards/margins": 82.03047180175781, + "rewards/rejected": -361.5140686035156, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.1976807117462158, + "rewards/weighted_margins": 0.7601562738418579, + "rewards/weighted_rejected": -1.9588134288787842, + "step": 1310 + }, + { + "epoch": 0.6909186076943209, + "grad_norm": 19.76185417175293, + "learning_rate": 2.651835401055217e-07, + "logits/chosen": -1.06744384765625, + "logits/rejected": -1.0995299816131592, + "logps/chosen": -551.1812744140625, + "logps/rejected": -621.4281005859375, + "logps/weighted_chosen": -3.578369140625, + "logps/weighted_rejected": -4.4482421875, + "loss": 0.6022, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -273.7007751464844, + "rewards/margins": 82.255859375, + "rewards/rejected": -355.9320373535156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.109167456626892, + "rewards/weighted_margins": 0.736828625202179, + "rewards/weighted_rejected": -1.8462402820587158, + "step": 1320 + }, + { + "epoch": 0.696152839570793, + "grad_norm": 38.951107025146484, + "learning_rate": 2.571557445298055e-07, + "logits/chosen": -1.060308814048767, + "logits/rejected": -1.1077148914337158, + "logps/chosen": -517.7781372070312, + "logps/rejected": -573.0328369140625, + "logps/weighted_chosen": -3.490673780441284, + "logps/weighted_rejected": -4.511181831359863, + "loss": 0.5684, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -239.5070343017578, + "rewards/margins": 69.423828125, + "rewards/rejected": -308.96209716796875, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.9827636480331421, + "rewards/weighted_margins": 0.767047107219696, + "rewards/weighted_rejected": -1.749639868736267, + "step": 1330 + }, + { + "epoch": 0.7013870714472651, + "grad_norm": 24.909101486206055, + "learning_rate": 2.49209056962716e-07, + "logits/chosen": -1.0824463367462158, + "logits/rejected": -1.101318359375, + "logps/chosen": -593.8343505859375, + "logps/rejected": -619.0203247070312, + "logps/weighted_chosen": -3.7747559547424316, + "logps/weighted_rejected": -4.873144626617432, + "loss": 0.5711, + "rewards/accuracies": 0.59375, + "rewards/chosen": -279.1812438964844, + "rewards/margins": 72.595703125, + "rewards/rejected": -351.7855529785156, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -1.0869140625, + "rewards/weighted_margins": 0.7144775390625, + "rewards/weighted_rejected": -1.8008911609649658, + "step": 1340 + }, + { + "epoch": 0.7066213033237373, + "grad_norm": 24.351770401000977, + "learning_rate": 2.41346131533347e-07, + "logits/chosen": -1.13226318359375, + "logits/rejected": -1.141271948814392, + "logps/chosen": -624.3922119140625, + "logps/rejected": -672.2625122070312, + "logps/weighted_chosen": -3.3324952125549316, + "logps/weighted_rejected": -4.617163181304932, + "loss": 0.5572, + "rewards/accuracies": 0.609375, + "rewards/chosen": -317.93731689453125, + "rewards/margins": 79.771484375, + "rewards/rejected": -397.5132751464844, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -1.1239502429962158, + "rewards/weighted_margins": 0.762219250202179, + "rewards/weighted_rejected": -1.88616943359375, + "step": 1350 + }, + { + "epoch": 0.7118555352002094, + "grad_norm": 51.18987274169922, + "learning_rate": 2.3356959439491898e-07, + "logits/chosen": -1.053808569908142, + "logits/rejected": -1.1220916509628296, + "logps/chosen": -584.7609252929688, + "logps/rejected": -641.2109375, + "logps/weighted_chosen": -4.107861518859863, + "logps/weighted_rejected": -4.883447170257568, + "loss": 0.5525, + "rewards/accuracies": 0.609375, + "rewards/chosen": -314.8609313964844, + "rewards/margins": 84.24922180175781, + "rewards/rejected": -399.08319091796875, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -1.143286108970642, + "rewards/weighted_margins": 0.9397217035293579, + "rewards/weighted_rejected": -2.082202196121216, + "step": 1360 + }, + { + "epoch": 0.7170897670766815, + "grad_norm": 25.061872482299805, + "learning_rate": 2.258820428476645e-07, + "logits/chosen": -1.083398461341858, + "logits/rejected": -1.124755859375, + "logps/chosen": -615.3109130859375, + "logps/rejected": -711.2015380859375, + "logps/weighted_chosen": -3.571972608566284, + "logps/weighted_rejected": -4.434179782867432, + "loss": 0.5486, + "rewards/accuracies": 0.625, + "rewards/chosen": -336.1929626464844, + "rewards/margins": 105.92304992675781, + "rewards/rejected": -442.1929626464844, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.143713355064392, + "rewards/weighted_margins": 0.7840820550918579, + "rewards/weighted_rejected": -1.9280884265899658, + "step": 1370 + }, + { + "epoch": 0.7223239989531536, + "grad_norm": 50.347843170166016, + "learning_rate": 2.1828604447135245e-07, + "logits/chosen": -1.015539526939392, + "logits/rejected": -1.0683166980743408, + "logps/chosen": -660.08203125, + "logps/rejected": -700.8531494140625, + "logps/weighted_chosen": -4.080639839172363, + "logps/weighted_rejected": -5.272363185882568, + "loss": 0.5525, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -390.23712158203125, + "rewards/margins": 54.01288986206055, + "rewards/rejected": -444.32110595703125, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.2563965320587158, + "rewards/weighted_margins": 0.91107177734375, + "rewards/weighted_rejected": -2.1673583984375, + "step": 1380 + }, + { + "epoch": 0.7275582308296258, + "grad_norm": 29.76629638671875, + "learning_rate": 2.1078413626773545e-07, + "logits/chosen": -1.0745728015899658, + "logits/rejected": -1.095086693763733, + "logps/chosen": -615.3843994140625, + "logps/rejected": -720.0593872070312, + "logps/weighted_chosen": -3.6241729259490967, + "logps/weighted_rejected": -5.315381050109863, + "loss": 0.5552, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -335.61053466796875, + "rewards/margins": 115.576171875, + "rewards/rejected": -451.1439514160156, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -1.17816162109375, + "rewards/weighted_margins": 0.8593689203262329, + "rewards/weighted_rejected": -2.037463426589966, + "step": 1390 + }, + { + "epoch": 0.7327924627060979, + "grad_norm": 78.16152954101562, + "learning_rate": 2.0337882381321347e-07, + "logits/chosen": -1.062066674232483, + "logits/rejected": -1.0702636241912842, + "logps/chosen": -643.6328125, + "logps/rejected": -690.3914184570312, + "logps/weighted_chosen": -3.64990234375, + "logps/weighted_rejected": -4.711035251617432, + "loss": 0.5461, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -352.107421875, + "rewards/margins": 84.32890319824219, + "rewards/rejected": -436.3828125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -1.304632544517517, + "rewards/weighted_margins": 0.9240967035293579, + "rewards/weighted_rejected": -2.2285399436950684, + "step": 1400 + }, + { + "epoch": 0.73802669458257, + "grad_norm": 30.649791717529297, + "learning_rate": 1.960725804219905e-07, + "logits/chosen": -1.016119360923767, + "logits/rejected": -1.067724585533142, + "logps/chosen": -629.0554809570312, + "logps/rejected": -716.56640625, + "logps/weighted_chosen": -4.213110446929932, + "logps/weighted_rejected": -4.397546291351318, + "loss": 0.5731, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -329.6851501464844, + "rewards/margins": 103.38749694824219, + "rewards/rejected": -433.10235595703125, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.220544457435608, + "rewards/weighted_margins": 0.807751476764679, + "rewards/weighted_rejected": -2.0281982421875, + "step": 1410 + }, + { + "epoch": 0.7432609264590422, + "grad_norm": 22.40865707397461, + "learning_rate": 1.8886784632000824e-07, + "logits/chosen": -1.037255883216858, + "logits/rejected": -1.0631592273712158, + "logps/chosen": -600.8796997070312, + "logps/rejected": -739.5546875, + "logps/weighted_chosen": -3.5133299827575684, + "logps/weighted_rejected": -5.098974704742432, + "loss": 0.5074, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -314.95819091796875, + "rewards/margins": 156.93203735351562, + "rewards/rejected": -471.8910217285156, + "rewards/weighted_accuracies": 0.746874988079071, + "rewards/weighted_chosen": -1.09588623046875, + "rewards/weighted_margins": 1.1151854991912842, + "rewards/weighted_rejected": -2.209277391433716, + "step": 1420 + }, + { + "epoch": 0.7484951583355143, + "grad_norm": 33.5097541809082, + "learning_rate": 1.8176702782993025e-07, + "logits/chosen": -1.0573241710662842, + "logits/rejected": -1.0565185546875, + "logps/chosen": -581.8117065429688, + "logps/rejected": -670.4046630859375, + "logps/weighted_chosen": -3.524365186691284, + "logps/weighted_rejected": -4.905322074890137, + "loss": 0.5604, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -320.2757873535156, + "rewards/margins": 95.93476867675781, + "rewards/rejected": -416.09686279296875, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -1.2260253429412842, + "rewards/weighted_margins": 0.9058593511581421, + "rewards/weighted_rejected": -2.131915330886841, + "step": 1430 + }, + { + "epoch": 0.7537293902119864, + "grad_norm": 25.396400451660156, + "learning_rate": 1.7477249656745034e-07, + "logits/chosen": -0.9870361089706421, + "logits/rejected": -1.025244116783142, + "logps/chosen": -535.6265869140625, + "logps/rejected": -582.4281005859375, + "logps/weighted_chosen": -3.7501220703125, + "logps/weighted_rejected": -5.042870998382568, + "loss": 0.5337, + "rewards/accuracies": 0.609375, + "rewards/chosen": -284.6830139160156, + "rewards/margins": 71.47187805175781, + "rewards/rejected": -356.1968688964844, + "rewards/weighted_accuracies": 0.7562500238418579, + "rewards/weighted_chosen": -1.081658959388733, + "rewards/weighted_margins": 1.011804223060608, + "rewards/weighted_rejected": -2.094250440597534, + "step": 1440 + }, + { + "epoch": 0.7589636220884585, + "grad_norm": 46.104244232177734, + "learning_rate": 1.6788658864919118e-07, + "logits/chosen": -0.998852550983429, + "logits/rejected": -1.0865967273712158, + "logps/chosen": -691.5070190429688, + "logps/rejected": -765.0437622070312, + "logps/weighted_chosen": -3.697497606277466, + "logps/weighted_rejected": -4.519140720367432, + "loss": 0.5339, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -376.79376220703125, + "rewards/margins": 104.693359375, + "rewards/rejected": -481.35467529296875, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -1.15362548828125, + "rewards/weighted_margins": 0.9962402582168579, + "rewards/weighted_rejected": -2.150378465652466, + "step": 1450 + }, + { + "epoch": 0.7641978539649307, + "grad_norm": 29.475303649902344, + "learning_rate": 1.611116039124613e-07, + "logits/chosen": -0.993756115436554, + "logits/rejected": -1.0471680164337158, + "logps/chosen": -612.1336059570312, + "logps/rejected": -652.9812622070312, + "logps/weighted_chosen": -4.120263576507568, + "logps/weighted_rejected": -5.172119140625, + "loss": 0.5626, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -349.39178466796875, + "rewards/margins": 74.2535171508789, + "rewards/rejected": -423.4437561035156, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.174108862876892, + "rewards/weighted_margins": 0.957659900188446, + "rewards/weighted_rejected": -2.132617235183716, + "step": 1460 + }, + { + "epoch": 0.7694320858414028, + "grad_norm": 40.777061462402344, + "learning_rate": 1.5444980514712723e-07, + "logits/chosen": -1.0843079090118408, + "logits/rejected": -1.1043212413787842, + "logps/chosen": -677.7750244140625, + "logps/rejected": -797.6781005859375, + "logps/weighted_chosen": -3.82275390625, + "logps/weighted_rejected": -4.651171684265137, + "loss": 0.6086, + "rewards/accuracies": 0.625, + "rewards/chosen": -366.34765625, + "rewards/margins": 125.67655944824219, + "rewards/rejected": -492.0234375, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.312963843345642, + "rewards/weighted_margins": 0.756854236125946, + "rewards/weighted_rejected": -2.069854736328125, + "step": 1470 + }, + { + "epoch": 0.7746663177178749, + "grad_norm": 29.14368438720703, + "learning_rate": 1.4790341733986083e-07, + "logits/chosen": -1.0463683605194092, + "logits/rejected": -1.0748412609100342, + "logps/chosen": -621.4453125, + "logps/rejected": -694.0281372070312, + "logps/weighted_chosen": -4.119336128234863, + "logps/weighted_rejected": -4.574511528015137, + "loss": 0.5669, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -334.2621154785156, + "rewards/margins": 97.23515319824219, + "rewards/rejected": -431.4808654785156, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -1.203649878501892, + "rewards/weighted_margins": 0.847003161907196, + "rewards/weighted_rejected": -2.0507445335388184, + "step": 1480 + }, + { + "epoch": 0.7799005495943471, + "grad_norm": 95.21968841552734, + "learning_rate": 1.4147462693101108e-07, + "logits/chosen": -1.0290710926055908, + "logits/rejected": -1.058990478515625, + "logps/chosen": -640.1702880859375, + "logps/rejected": -748.3312377929688, + "logps/weighted_chosen": -3.6262450218200684, + "logps/weighted_rejected": -4.870263576507568, + "loss": 0.5476, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -353.05682373046875, + "rewards/margins": 124.0137710571289, + "rewards/rejected": -476.9703063964844, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.14178466796875, + "rewards/weighted_margins": 1.012396216392517, + "rewards/weighted_rejected": -2.1542115211486816, + "step": 1490 + }, + { + "epoch": 0.7851347814708192, + "grad_norm": 29.23267936706543, + "learning_rate": 1.3516558108435177e-07, + "logits/chosen": -1.0289306640625, + "logits/rejected": -1.0320098400115967, + "logps/chosen": -576.7921752929688, + "logps/rejected": -693.203125, + "logps/weighted_chosen": -3.387939453125, + "logps/weighted_rejected": -5.066210746765137, + "loss": 0.538, + "rewards/accuracies": 0.609375, + "rewards/chosen": -297.62811279296875, + "rewards/margins": 130.5636749267578, + "rewards/rejected": -428.2289123535156, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -0.9833618402481079, + "rewards/weighted_margins": 0.922503650188446, + "rewards/weighted_rejected": -1.9057738780975342, + "step": 1500 + }, + { + "epoch": 0.7851347814708192, + "eval_logits/chosen": -1.1175518035888672, + "eval_logits/rejected": -1.1384687423706055, + "eval_logps/chosen": -604.7919921875, + "eval_logps/rejected": -676.4500122070312, + "eval_logps/weighted_chosen": -3.6202943325042725, + "eval_logps/weighted_rejected": -4.712391376495361, + "eval_loss": 0.5499775409698486, + "eval_rewards/accuracies": 0.593500018119812, + "eval_rewards/chosen": -316.02337646484375, + "eval_rewards/margins": 92.78912353515625, + "eval_rewards/rejected": -408.82000732421875, + "eval_rewards/weighted_accuracies": 0.7145000100135803, + "eval_rewards/weighted_chosen": -1.0725815296173096, + "eval_rewards/weighted_margins": 0.8416025638580322, + "eval_rewards/weighted_rejected": -1.9141839742660522, + "eval_runtime": 1154.371, + "eval_samples_per_second": 1.733, + "eval_steps_per_second": 0.433, + "step": 1500 + } + ], + "logging_steps": 10, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000..f626c5c --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe50c447c403d41daaff89efed0e09938eb1d87a3da0072f9c7935887bef87f +size 8721 diff --git a/checkpoint-1500/zero_to_fp32.py b/checkpoint-1500/zero_to_fp32.py new file mode 100644 index 0000000..0e75914 --- /dev/null +++ b/checkpoint-1500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-1911/chat_template.jinja b/checkpoint-1911/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-1911/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1911/config.json b/checkpoint-1911/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/checkpoint-1911/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-1911/generation_config.json b/checkpoint-1911/generation_config.json new file mode 100644 index 0000000..fc3c54a --- /dev/null +++ b/checkpoint-1911/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-1911/latest b/checkpoint-1911/latest new file mode 100644 index 0000000..44970e7 --- /dev/null +++ b/checkpoint-1911/latest @@ -0,0 +1 @@ +global_step1910 \ No newline at end of file diff --git a/checkpoint-1911/model-00001-of-00004.safetensors b/checkpoint-1911/model-00001-of-00004.safetensors new file mode 100644 index 0000000..1048745 --- /dev/null +++ b/checkpoint-1911/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62cda7039fdb68297f19cd6e4ff6231bf4276e8d1490bee26d47a248a93bca06 +size 4976698672 diff --git a/checkpoint-1911/model-00002-of-00004.safetensors b/checkpoint-1911/model-00002-of-00004.safetensors new file mode 100644 index 0000000..256655f --- /dev/null +++ b/checkpoint-1911/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da90fd805e109a8d8e42902c0608f6f49b9c7d23d089b483e31e2815ab5a9561 +size 4999802720 diff --git a/checkpoint-1911/model-00003-of-00004.safetensors b/checkpoint-1911/model-00003-of-00004.safetensors new file mode 100644 index 0000000..d648b73 --- /dev/null +++ b/checkpoint-1911/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8adc12e04e3b30a33fde52b873d7fc71c439b78f45dd662efc85ee25dc84bfa +size 4915916176 diff --git a/checkpoint-1911/model-00004-of-00004.safetensors b/checkpoint-1911/model-00004-of-00004.safetensors new file mode 100644 index 0000000..441141d --- /dev/null +++ b/checkpoint-1911/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9955deefb62c1bb8725dfa07a4c3c4649abc3b65dac0f0295c2e906f5b27c6e1 +size 1168138808 diff --git a/checkpoint-1911/model.safetensors.index.json b/checkpoint-1911/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-1911/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-1911/rng_state_0.pth b/checkpoint-1911/rng_state_0.pth new file mode 100644 index 0000000..8d84687 --- /dev/null +++ b/checkpoint-1911/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ea5dcfd1b4a49b41b4fa01a8b24bba6186957162c3fd555ebff28620c7268b +size 14917 diff --git a/checkpoint-1911/rng_state_1.pth b/checkpoint-1911/rng_state_1.pth new file mode 100644 index 0000000..54f0119 --- /dev/null +++ b/checkpoint-1911/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a5af18bb5eae8b7fd6bdef66259014d98ba87ffb16d614bba38f2c32030798 +size 14917 diff --git a/checkpoint-1911/scheduler.pt b/checkpoint-1911/scheduler.pt new file mode 100644 index 0000000..b593d50 --- /dev/null +++ b/checkpoint-1911/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b59ab434923fa38180a582743116cb648692f61b72d4eddecc5e7980e6087d +size 1465 diff --git a/checkpoint-1911/special_tokens_map.json b/checkpoint-1911/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/checkpoint-1911/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1911/tokenizer.json b/checkpoint-1911/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/checkpoint-1911/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/checkpoint-1911/tokenizer_config.json b/checkpoint-1911/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/checkpoint-1911/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1911/trainer_state.json b/checkpoint-1911/trainer_state.json new file mode 100644 index 0000000..e2b5341 --- /dev/null +++ b/checkpoint-1911/trainer_state.json @@ -0,0 +1,4132 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 1911, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005234231876472127, + "grad_norm": 132.6717987060547, + "learning_rate": 0.0, + "logits/chosen": -0.40118408203125, + "logits/rejected": -0.41802978515625, + "logps/chosen": -297.609375, + "logps/rejected": -247.84375, + "logps/weighted_chosen": -4.7568359375, + "logps/weighted_rejected": -3.47998046875, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005234231876472127, + "grad_norm": 226.00839233398438, + "learning_rate": 4.6875e-08, + "logits/chosen": -0.3175845742225647, + "logits/rejected": -0.3532341718673706, + "logps/chosen": -275.5841979980469, + "logps/rejected": -255.84548950195312, + "logps/weighted_chosen": -2.651665687561035, + "logps/weighted_rejected": -2.88427734375, + "loss": 0.6921, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0731336772441864, + "rewards/margins": -0.0670572891831398, + "rewards/rejected": -0.006076388992369175, + "rewards/weighted_accuracies": 0.2951388955116272, + "rewards/weighted_chosen": -0.0015835232334211469, + "rewards/weighted_margins": -0.0009324815473519266, + "rewards/weighted_rejected": -0.0006510416860692203, + "step": 10 + }, + { + "epoch": 0.010468463752944255, + "grad_norm": 14.726158142089844, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -0.29781341552734375, + "logits/rejected": -0.3136836886405945, + "logps/chosen": -294.4296875, + "logps/rejected": -272.58984375, + "logps/weighted_chosen": -2.458728075027466, + "logps/weighted_rejected": -2.455883741378784, + "loss": 0.6924, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.14013671875, + "rewards/margins": -0.13369140028953552, + "rewards/rejected": -0.0064453124068677425, + "rewards/weighted_accuracies": 0.34062498807907104, + "rewards/weighted_chosen": -0.0013603210682049394, + "rewards/weighted_margins": -0.0010925292735919356, + "rewards/weighted_rejected": -0.0002677917364053428, + "step": 20 + }, + { + "epoch": 0.015702695629416383, + "grad_norm": 77.6125259399414, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -0.2918853759765625, + "logits/rejected": -0.3377639651298523, + "logps/chosen": -298.05859375, + "logps/rejected": -268.0132751464844, + "logps/weighted_chosen": -2.4350829124450684, + "logps/weighted_rejected": -2.7343993186950684, + "loss": 0.6926, + "rewards/accuracies": 0.24062499403953552, + "rewards/chosen": -0.06621094048023224, + "rewards/margins": -0.1640625, + "rewards/rejected": 0.09785155951976776, + "rewards/weighted_accuracies": 0.3187499940395355, + "rewards/weighted_chosen": 0.0014068603049963713, + "rewards/weighted_margins": -0.0015777588123455644, + "rewards/weighted_rejected": 0.0029846192337572575, + "step": 30 + }, + { + "epoch": 0.02093692750588851, + "grad_norm": 30.666196823120117, + "learning_rate": 2.03125e-07, + "logits/chosen": -0.30072021484375, + "logits/rejected": -0.3433845639228821, + "logps/chosen": -278.68829345703125, + "logps/rejected": -253.90780639648438, + "logps/weighted_chosen": -2.506396532058716, + "logps/weighted_rejected": -2.8416504859924316, + "loss": 0.6908, + "rewards/accuracies": 0.3062500059604645, + "rewards/chosen": 0.063232421875, + "rewards/margins": 0.04838867112994194, + "rewards/rejected": 0.014843749813735485, + "rewards/weighted_accuracies": 0.40312498807907104, + "rewards/weighted_chosen": 0.0042968750931322575, + "rewards/weighted_margins": 0.0019538879860192537, + "rewards/weighted_rejected": 0.0023429871071130037, + "step": 40 + }, + { + "epoch": 0.02617115938236064, + "grad_norm": 18.60569953918457, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -0.2819870114326477, + "logits/rejected": -0.32059136033058167, + "logps/chosen": -280.31951904296875, + "logps/rejected": -267.4359436035156, + "logps/weighted_chosen": -2.4267334938049316, + "logps/weighted_rejected": -2.529711961746216, + "loss": 0.6891, + "rewards/accuracies": 0.3187499940395355, + "rewards/chosen": -0.03535156324505806, + "rewards/margins": -0.13984374701976776, + "rewards/rejected": 0.1044921875, + "rewards/weighted_accuracies": 0.3968749940395355, + "rewards/weighted_chosen": 0.0039031982887536287, + "rewards/weighted_margins": 0.005755615420639515, + "rewards/weighted_rejected": -0.0018524170154705644, + "step": 50 + }, + { + "epoch": 0.031405391258832765, + "grad_norm": 38.21036911010742, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -0.31453245878219604, + "logits/rejected": -0.30809077620506287, + "logps/chosen": -277.66015625, + "logps/rejected": -261.7445373535156, + "logps/weighted_chosen": -2.8622069358825684, + "logps/weighted_rejected": -2.7553467750549316, + "loss": 0.6894, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": 0.04150390625, + "rewards/margins": 0.08027343451976776, + "rewards/rejected": -0.03876953199505806, + "rewards/weighted_accuracies": 0.4312500059604645, + "rewards/weighted_chosen": 0.0006561279296875, + "rewards/weighted_margins": 0.006243896670639515, + "rewards/weighted_rejected": -0.005587768740952015, + "step": 60 + }, + { + "epoch": 0.036639623135304895, + "grad_norm": 69.19047546386719, + "learning_rate": 3.59375e-07, + "logits/chosen": -0.3177490234375, + "logits/rejected": -0.3246749937534332, + "logps/chosen": -289.76251220703125, + "logps/rejected": -244.92578125, + "logps/weighted_chosen": -2.3438963890075684, + "logps/weighted_rejected": -2.7010498046875, + "loss": 0.6841, + "rewards/accuracies": 0.49687498807907104, + "rewards/chosen": 0.29765623807907104, + "rewards/margins": 0.4546875059604645, + "rewards/rejected": -0.15703125298023224, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": 0.01530532818287611, + "rewards/weighted_margins": 0.01918792724609375, + "rewards/weighted_rejected": -0.0038825988303869963, + "step": 70 + }, + { + "epoch": 0.04187385501177702, + "grad_norm": 51.98476791381836, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -0.2850998044013977, + "logits/rejected": -0.30662041902542114, + "logps/chosen": -289.234375, + "logps/rejected": -270.375, + "logps/weighted_chosen": -2.5325684547424316, + "logps/weighted_rejected": -2.796435594558716, + "loss": 0.6747, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.512499988079071, + "rewards/margins": 0.6001952886581421, + "rewards/rejected": -0.08769531548023224, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.036380767822265625, + "rewards/weighted_margins": 0.04396667331457138, + "rewards/weighted_rejected": -0.007586670108139515, + "step": 80 + }, + { + "epoch": 0.04710808688824915, + "grad_norm": 30.52783203125, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -0.3142959475517273, + "logits/rejected": -0.3075408935546875, + "logps/chosen": -280.11407470703125, + "logps/rejected": -257.95233154296875, + "logps/weighted_chosen": -2.719482421875, + "logps/weighted_rejected": -2.88037109375, + "loss": 0.6687, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.5205078125, + "rewards/margins": 0.737109363079071, + "rewards/rejected": -0.21660156548023224, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": 0.06780395656824112, + "rewards/weighted_margins": 0.07340697944164276, + "rewards/weighted_rejected": -0.0056396485306322575, + "step": 90 + }, + { + "epoch": 0.05234231876472128, + "grad_norm": 69.397705078125, + "learning_rate": 5.156249999999999e-07, + "logits/chosen": -0.28213196992874146, + "logits/rejected": -0.3543289303779602, + "logps/chosen": -290.71875, + "logps/rejected": -286.73126220703125, + "logps/weighted_chosen": -2.2228636741638184, + "logps/weighted_rejected": -2.8367552757263184, + "loss": 0.6848, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": 0.24521484971046448, + "rewards/margins": 1.0690429210662842, + "rewards/rejected": -0.8238281011581421, + "rewards/weighted_accuracies": 0.5843750238418579, + "rewards/weighted_chosen": 0.05242309719324112, + "rewards/weighted_margins": 0.05032653734087944, + "rewards/weighted_rejected": 0.0021240233909338713, + "step": 100 + }, + { + "epoch": 0.05757655064119341, + "grad_norm": 36.600040435791016, + "learning_rate": 5.677083333333333e-07, + "logits/chosen": -0.33063429594039917, + "logits/rejected": -0.319937139749527, + "logps/chosen": -296.82501220703125, + "logps/rejected": -262.2984313964844, + "logps/weighted_chosen": -2.8468017578125, + "logps/weighted_rejected": -2.9306397438049316, + "loss": 0.6773, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.474609375, + "rewards/margins": 1.053613305091858, + "rewards/rejected": -1.528222680091858, + "rewards/weighted_accuracies": 0.534375011920929, + "rewards/weighted_chosen": 0.013439941219985485, + "rewards/weighted_margins": 0.05541381984949112, + "rewards/weighted_rejected": -0.04198913648724556, + "step": 110 + }, + { + "epoch": 0.06281078251766553, + "grad_norm": 57.109580993652344, + "learning_rate": 6.197916666666666e-07, + "logits/chosen": -0.33633461594581604, + "logits/rejected": -0.36155110597610474, + "logps/chosen": -295.3687438964844, + "logps/rejected": -256.1953125, + "logps/weighted_chosen": -2.161865234375, + "logps/weighted_rejected": -2.4251465797424316, + "loss": 0.6791, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": -0.72900390625, + "rewards/margins": 1.641210913658142, + "rewards/rejected": -2.3702149391174316, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.007176590152084827, + "rewards/weighted_margins": 0.05286560207605362, + "rewards/weighted_rejected": -0.04570160061120987, + "step": 120 + }, + { + "epoch": 0.06804501439413765, + "grad_norm": 39.176841735839844, + "learning_rate": 6.718749999999999e-07, + "logits/chosen": -0.29625242948532104, + "logits/rejected": -0.2914108335971832, + "logps/chosen": -306.6781311035156, + "logps/rejected": -280.15936279296875, + "logps/weighted_chosen": -2.188079833984375, + "logps/weighted_rejected": -2.5787596702575684, + "loss": 0.6659, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.644238293170929, + "rewards/margins": 1.972265601158142, + "rewards/rejected": -2.616406202316284, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": 0.01349639892578125, + "rewards/weighted_margins": 0.0841522216796875, + "rewards/weighted_rejected": -0.07064209133386612, + "step": 130 + }, + { + "epoch": 0.07327924627060979, + "grad_norm": 52.14993667602539, + "learning_rate": 7.239583333333333e-07, + "logits/chosen": -0.3304199278354645, + "logits/rejected": -0.3464847505092621, + "logps/chosen": -301.4390563964844, + "logps/rejected": -277.9515686035156, + "logps/weighted_chosen": -2.554003953933716, + "logps/weighted_rejected": -2.881591796875, + "loss": 0.6581, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -2.746875047683716, + "rewards/margins": 2.744921922683716, + "rewards/rejected": -5.491991996765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.02762756310403347, + "rewards/weighted_margins": 0.11510010063648224, + "rewards/weighted_rejected": -0.14276733994483948, + "step": 140 + }, + { + "epoch": 0.07851347814708191, + "grad_norm": 22.611814498901367, + "learning_rate": 7.760416666666666e-07, + "logits/chosen": -0.2870376706123352, + "logits/rejected": -0.2975311279296875, + "logps/chosen": -287.859375, + "logps/rejected": -257.54296875, + "logps/weighted_chosen": -3.089892625808716, + "logps/weighted_rejected": -3.1946043968200684, + "loss": 0.6544, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.7095704078674316, + "rewards/margins": 2.942578077316284, + "rewards/rejected": -6.652148246765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.005145263858139515, + "rewards/weighted_margins": 0.16416625678539276, + "rewards/weighted_rejected": -0.16951599717140198, + "step": 150 + }, + { + "epoch": 0.08374771002355404, + "grad_norm": 15.511767387390137, + "learning_rate": 8.28125e-07, + "logits/chosen": -0.3232177793979645, + "logits/rejected": -0.3726806640625, + "logps/chosen": -308.91796875, + "logps/rejected": -282.15704345703125, + "logps/weighted_chosen": -2.5903563499450684, + "logps/weighted_rejected": -2.742602586746216, + "loss": 0.6211, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -4.956835746765137, + "rewards/margins": 3.9961915016174316, + "rewards/rejected": -8.953222274780273, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.002410888671875, + "rewards/weighted_margins": 0.23797607421875, + "rewards/weighted_rejected": -0.24028320610523224, + "step": 160 + }, + { + "epoch": 0.08898194190002617, + "grad_norm": 167.33956909179688, + "learning_rate": 8.802083333333333e-07, + "logits/chosen": -0.36021536588668823, + "logits/rejected": -0.3597045838832855, + "logps/chosen": -311.03045654296875, + "logps/rejected": -270.46875, + "logps/weighted_chosen": -2.8318848609924316, + "logps/weighted_rejected": -3.139453172683716, + "loss": 0.6949, + "rewards/accuracies": 0.609375, + "rewards/chosen": -8.0087890625, + "rewards/margins": 4.345898628234863, + "rewards/rejected": -12.354199409484863, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.01859130896627903, + "rewards/weighted_margins": 0.20853272080421448, + "rewards/weighted_rejected": -0.22731323540210724, + "step": 170 + }, + { + "epoch": 0.0942161737764983, + "grad_norm": 64.57138061523438, + "learning_rate": 9.322916666666666e-07, + "logits/chosen": -0.33618468046188354, + "logits/rejected": -0.3534431457519531, + "logps/chosen": -284.2171936035156, + "logps/rejected": -272.12969970703125, + "logps/weighted_chosen": -2.694580078125, + "logps/weighted_rejected": -3.225878953933716, + "loss": 0.6814, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -10.43701171875, + "rewards/margins": 5.353320121765137, + "rewards/rejected": -15.7919921875, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.08297424018383026, + "rewards/weighted_margins": 0.26459962129592896, + "rewards/weighted_rejected": -0.347381591796875, + "step": 180 + }, + { + "epoch": 0.09945040565297043, + "grad_norm": 49.0852165222168, + "learning_rate": 9.84375e-07, + "logits/chosen": -0.354086309671402, + "logits/rejected": -0.38891831040382385, + "logps/chosen": -319.17498779296875, + "logps/rejected": -283.31561279296875, + "logps/weighted_chosen": -2.5078492164611816, + "logps/weighted_rejected": -3.016357421875, + "loss": 0.6496, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -13.561426162719727, + "rewards/margins": 5.937890529632568, + "rewards/rejected": -19.498828887939453, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.16942748427391052, + "rewards/weighted_margins": 0.24410399794578552, + "rewards/weighted_rejected": -0.41356199979782104, + "step": 190 + }, + { + "epoch": 0.10468463752944256, + "grad_norm": 53.46296691894531, + "learning_rate": 9.99959085414323e-07, + "logits/chosen": -0.37868577241897583, + "logits/rejected": -0.4114578366279602, + "logps/chosen": -324.7124938964844, + "logps/rejected": -279.72967529296875, + "logps/weighted_chosen": -2.8757567405700684, + "logps/weighted_rejected": -3.3623046875, + "loss": 0.639, + "rewards/accuracies": 0.609375, + "rewards/chosen": -15.428125381469727, + "rewards/margins": 6.552148342132568, + "rewards/rejected": -21.975000381469727, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.18135985732078552, + "rewards/weighted_margins": 0.29008787870407104, + "rewards/weighted_rejected": -0.471527099609375, + "step": 200 + }, + { + "epoch": 0.10991886940591468, + "grad_norm": 24.815481185913086, + "learning_rate": 9.997587035630105e-07, + "logits/chosen": -0.3853309750556946, + "logits/rejected": -0.4257049560546875, + "logps/chosen": -302.82891845703125, + "logps/rejected": -308.671875, + "logps/weighted_chosen": -2.632519483566284, + "logps/weighted_rejected": -3.3669190406799316, + "loss": 0.6558, + "rewards/accuracies": 0.65625, + "rewards/chosen": -15.814453125, + "rewards/margins": 8.331445693969727, + "rewards/rejected": -24.146093368530273, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.225901797413826, + "rewards/weighted_margins": 0.23236694931983948, + "rewards/weighted_rejected": -0.45829468965530396, + "step": 210 + }, + { + "epoch": 0.11515310128238682, + "grad_norm": 24.175745010375977, + "learning_rate": 9.99391406364405e-07, + "logits/chosen": -0.37365952134132385, + "logits/rejected": -0.3758789002895355, + "logps/chosen": -309.34686279296875, + "logps/rejected": -293.98126220703125, + "logps/weighted_chosen": -3.002514600753784, + "logps/weighted_rejected": -3.453906297683716, + "loss": 0.6732, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -17.203418731689453, + "rewards/margins": 7.933203220367432, + "rewards/rejected": -25.137109756469727, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.259225457906723, + "rewards/weighted_margins": 0.29540252685546875, + "rewards/weighted_rejected": -0.5546798706054688, + "step": 220 + }, + { + "epoch": 0.12038733315885894, + "grad_norm": 85.15988159179688, + "learning_rate": 9.988573164927884e-07, + "logits/chosen": -0.3097473084926605, + "logits/rejected": -0.3477935791015625, + "logps/chosen": -286.5078125, + "logps/rejected": -281.8453063964844, + "logps/weighted_chosen": -2.66943359375, + "logps/weighted_rejected": -3.1229491233825684, + "loss": 0.6646, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -17.690723419189453, + "rewards/margins": 12.424609184265137, + "rewards/rejected": -30.110157012939453, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.2716217041015625, + "rewards/weighted_margins": 0.32661741971969604, + "rewards/weighted_rejected": -0.5983597040176392, + "step": 230 + }, + { + "epoch": 0.12562156503533106, + "grad_norm": 26.17377471923828, + "learning_rate": 9.98156612329838e-07, + "logits/chosen": -0.39516907930374146, + "logits/rejected": -0.44511109590530396, + "logps/chosen": -286.74884033203125, + "logps/rejected": -318.22735595703125, + "logps/weighted_chosen": -2.6696534156799316, + "logps/weighted_rejected": -3.4151854515075684, + "loss": 0.643, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -21.990428924560547, + "rewards/margins": 14.028905868530273, + "rewards/rejected": -36.013282775878906, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.2329559326171875, + "rewards/weighted_margins": 0.3950134217739105, + "rewards/weighted_rejected": -0.6281493902206421, + "step": 240 + }, + { + "epoch": 0.13085579691180318, + "grad_norm": 56.73057174682617, + "learning_rate": 9.97289527905053e-07, + "logits/chosen": -0.40631332993507385, + "logits/rejected": -0.4203124940395355, + "logps/chosen": -290.1703186035156, + "logps/rejected": -291.6328125, + "logps/weighted_chosen": -3.051513671875, + "logps/weighted_rejected": -3.3163819313049316, + "loss": 0.677, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -25.742870330810547, + "rewards/margins": 9.973828315734863, + "rewards/rejected": -35.72148513793945, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.2856689393520355, + "rewards/weighted_margins": 0.253326416015625, + "rewards/weighted_rejected": -0.5388733148574829, + "step": 250 + }, + { + "epoch": 0.1360900287882753, + "grad_norm": 17.766258239746094, + "learning_rate": 9.962563528175875e-07, + "logits/chosen": -0.3611465394496918, + "logits/rejected": -0.39628905057907104, + "logps/chosen": -324.36639404296875, + "logps/rejected": -297.765625, + "logps/weighted_chosen": -2.652392625808716, + "logps/weighted_rejected": -3.535571336746216, + "loss": 0.6414, + "rewards/accuracies": 0.578125, + "rewards/chosen": -25.621288299560547, + "rewards/margins": 11.306055068969727, + "rewards/rejected": -36.93359375, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.2533508241176605, + "rewards/weighted_margins": 0.2956604063510895, + "rewards/weighted_rejected": -0.5490142703056335, + "step": 260 + }, + { + "epoch": 0.14132426066474746, + "grad_norm": 17.552453994750977, + "learning_rate": 9.950574321395277e-07, + "logits/chosen": -0.41735154390335083, + "logits/rejected": -0.441476434469223, + "logps/chosen": -314.5093688964844, + "logps/rejected": -295.7093811035156, + "logps/weighted_chosen": -2.864941358566284, + "logps/weighted_rejected": -3.25732421875, + "loss": 0.661, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.1123046875, + "rewards/margins": 7.519726753234863, + "rewards/rejected": -32.62890625, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": -0.3035888671875, + "rewards/weighted_margins": 0.2833190858364105, + "rewards/weighted_rejected": -0.5868393182754517, + "step": 270 + }, + { + "epoch": 0.14655849254121958, + "grad_norm": 47.66518020629883, + "learning_rate": 9.936931663006413e-07, + "logits/chosen": -0.4760284423828125, + "logits/rejected": -0.46795654296875, + "logps/chosen": -323.48126220703125, + "logps/rejected": -313.2875061035156, + "logps/weighted_chosen": -2.794970750808716, + "logps/weighted_rejected": -3.3581910133361816, + "loss": 0.6169, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -20.707616806030273, + "rewards/margins": 13.166601181030273, + "rewards/rejected": -33.86640548706055, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.10174255073070526, + "rewards/weighted_margins": 0.34544676542282104, + "rewards/weighted_rejected": -0.447021484375, + "step": 280 + }, + { + "epoch": 0.1517927244176917, + "grad_norm": 32.503883361816406, + "learning_rate": 9.921640109546357e-07, + "logits/chosen": -0.44742050766944885, + "logits/rejected": -0.5166229009628296, + "logps/chosen": -292.1796875, + "logps/rejected": -289.6234436035156, + "logps/weighted_chosen": -2.7469239234924316, + "logps/weighted_rejected": -3.9541258811950684, + "loss": 0.6249, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.293359756469727, + "rewards/margins": 12.698633193969727, + "rewards/rejected": -37.994529724121094, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -0.15215758979320526, + "rewards/weighted_margins": 0.4393859803676605, + "rewards/weighted_rejected": -0.5915588140487671, + "step": 290 + }, + { + "epoch": 0.15702695629416383, + "grad_norm": 17.32170867919922, + "learning_rate": 9.90470476826975e-07, + "logits/chosen": -0.5146636962890625, + "logits/rejected": -0.515917956829071, + "logps/chosen": -302.3570251464844, + "logps/rejected": -313.68438720703125, + "logps/weighted_chosen": -2.6830201148986816, + "logps/weighted_rejected": -3.202099561691284, + "loss": 0.6526, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -32.978126525878906, + "rewards/margins": 13.435937881469727, + "rewards/rejected": -46.408203125, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.23505249619483948, + "rewards/weighted_margins": 0.33623045682907104, + "rewards/weighted_rejected": -0.5710296630859375, + "step": 300 + }, + { + "epoch": 0.16226118817063595, + "grad_norm": 25.855854034423828, + "learning_rate": 9.886131295443002e-07, + "logits/chosen": -0.6332122683525085, + "logits/rejected": -0.6879852414131165, + "logps/chosen": -315.02264404296875, + "logps/rejected": -296.54998779296875, + "logps/weighted_chosen": -2.8891844749450684, + "logps/weighted_rejected": -3.3497071266174316, + "loss": 0.6099, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -33.83808517456055, + "rewards/margins": 12.542577743530273, + "rewards/rejected": -46.39081954956055, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.20775146782398224, + "rewards/weighted_margins": 0.507794201374054, + "rewards/weighted_rejected": -0.715728759765625, + "step": 310 + }, + { + "epoch": 0.16749542004710807, + "grad_norm": 19.11484718322754, + "learning_rate": 9.865925894455166e-07, + "logits/chosen": -0.730267345905304, + "logits/rejected": -0.746167004108429, + "logps/chosen": -338.2242126464844, + "logps/rejected": -307.18280029296875, + "logps/weighted_chosen": -2.9883790016174316, + "logps/weighted_rejected": -3.5892090797424316, + "loss": 0.6942, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -38.233009338378906, + "rewards/margins": 11.564062118530273, + "rewards/rejected": -49.80937576293945, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.3507080078125, + "rewards/weighted_margins": 0.3366760313510895, + "rewards/weighted_rejected": -0.6871337890625, + "step": 320 + }, + { + "epoch": 0.17272965192358022, + "grad_norm": 57.19697570800781, + "learning_rate": 9.84409531374603e-07, + "logits/chosen": -0.6843910217285156, + "logits/rejected": -0.6659576296806335, + "logps/chosen": -345.46875, + "logps/rejected": -316.2515563964844, + "logps/weighted_chosen": -3.05517578125, + "logps/weighted_rejected": -3.5519776344299316, + "loss": 0.6569, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -40.732032775878906, + "rewards/margins": 12.651952743530273, + "rewards/rejected": -53.38984298706055, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.3262878358364105, + "rewards/weighted_margins": 0.346893310546875, + "rewards/weighted_rejected": -0.6730865240097046, + "step": 330 + }, + { + "epoch": 0.17796388380005235, + "grad_norm": 52.49288558959961, + "learning_rate": 9.820646844552219e-07, + "logits/chosen": -0.6993133425712585, + "logits/rejected": -0.7529846429824829, + "logps/chosen": -313.59295654296875, + "logps/rejected": -322.1499938964844, + "logps/weighted_chosen": -3.0488524436950684, + "logps/weighted_rejected": -3.440136671066284, + "loss": 0.6287, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -37.06660079956055, + "rewards/margins": 19.494531631469727, + "rewards/rejected": -56.556640625, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.30719298124313354, + "rewards/weighted_margins": 0.448944091796875, + "rewards/weighted_rejected": -0.755999743938446, + "step": 340 + }, + { + "epoch": 0.18319811567652447, + "grad_norm": 15.657389640808105, + "learning_rate": 9.795588318471964e-07, + "logits/chosen": -0.7813507318496704, + "logits/rejected": -0.7874206304550171, + "logps/chosen": -299.80157470703125, + "logps/rejected": -331.4375, + "logps/weighted_chosen": -2.84619140625, + "logps/weighted_rejected": -3.315380811691284, + "loss": 0.6405, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -40.32304763793945, + "rewards/margins": 14.830663681030273, + "rewards/rejected": -55.15625, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.315826416015625, + "rewards/weighted_margins": 0.386627197265625, + "rewards/weighted_rejected": -0.702471911907196, + "step": 350 + }, + { + "epoch": 0.1884323475529966, + "grad_norm": 16.19976806640625, + "learning_rate": 9.768928104849415e-07, + "logits/chosen": -0.801177978515625, + "logits/rejected": -0.799664318561554, + "logps/chosen": -323.5171813964844, + "logps/rejected": -305.046875, + "logps/weighted_chosen": -3.1164307594299316, + "logps/weighted_rejected": -3.3475098609924316, + "loss": 0.6865, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -40.823829650878906, + "rewards/margins": 15.389843940734863, + "rewards/rejected": -56.216407775878906, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.31828004121780396, + "rewards/weighted_margins": 0.3831420838832855, + "rewards/weighted_rejected": -0.7014526128768921, + "step": 360 + }, + { + "epoch": 0.19366657942946872, + "grad_norm": 89.87427520751953, + "learning_rate": 9.740675107979355e-07, + "logits/chosen": -0.7640800476074219, + "logits/rejected": -0.7867538332939148, + "logps/chosen": -361.13751220703125, + "logps/rejected": -334.97967529296875, + "logps/weighted_chosen": -2.5084471702575684, + "logps/weighted_rejected": -3.4689698219299316, + "loss": 0.6531, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -46.098045349121094, + "rewards/margins": 14.188085556030273, + "rewards/rejected": -60.26640701293945, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.36387938261032104, + "rewards/weighted_margins": 0.3567260801792145, + "rewards/weighted_rejected": -0.720538318157196, + "step": 370 + }, + { + "epoch": 0.19890081130594087, + "grad_norm": 22.484216690063477, + "learning_rate": 9.71083876413323e-07, + "logits/chosen": -0.7209137082099915, + "logits/rejected": -0.7318176031112671, + "logps/chosen": -353.6031188964844, + "logps/rejected": -339.16485595703125, + "logps/weighted_chosen": -2.70361328125, + "logps/weighted_rejected": -3.5843749046325684, + "loss": 0.6589, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -51.237892150878906, + "rewards/margins": 18.424999237060547, + "rewards/rejected": -69.64530944824219, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.4475158751010895, + "rewards/weighted_margins": 0.3267761170864105, + "rewards/weighted_rejected": -0.7747405767440796, + "step": 380 + }, + { + "epoch": 0.204135043182413, + "grad_norm": 21.885372161865234, + "learning_rate": 9.67942903840751e-07, + "logits/chosen": -0.7708206176757812, + "logits/rejected": -0.8207153081893921, + "logps/chosen": -355.18438720703125, + "logps/rejected": -350.47186279296875, + "logps/weighted_chosen": -2.8836669921875, + "logps/weighted_rejected": -3.5904297828674316, + "loss": 0.6028, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -50.973045349121094, + "rewards/margins": 25.190038681030273, + "rewards/rejected": -76.1617202758789, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.39097899198532104, + "rewards/weighted_margins": 0.4941650331020355, + "rewards/weighted_rejected": -0.884967029094696, + "step": 390 + }, + { + "epoch": 0.2093692750588851, + "grad_norm": 26.357742309570312, + "learning_rate": 9.646456421395447e-07, + "logits/chosen": -0.805267333984375, + "logits/rejected": -0.8178039789199829, + "logps/chosen": -377.52813720703125, + "logps/rejected": -392.0296936035156, + "logps/weighted_chosen": -2.7947998046875, + "logps/weighted_rejected": -3.697582960128784, + "loss": 0.6296, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -59.2109375, + "rewards/margins": 29.121875762939453, + "rewards/rejected": -88.32890319824219, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.41761475801467896, + "rewards/weighted_margins": 0.38171082735061646, + "rewards/weighted_rejected": -0.7994705438613892, + "step": 400 + }, + { + "epoch": 0.21460350693535724, + "grad_norm": 21.382999420166016, + "learning_rate": 9.611931925683266e-07, + "logits/chosen": -0.7703964114189148, + "logits/rejected": -0.808850109577179, + "logps/chosen": -367.3140563964844, + "logps/rejected": -348.0687561035156, + "logps/weighted_chosen": -2.711962938308716, + "logps/weighted_rejected": -3.4615721702575684, + "loss": 0.5758, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -61.02734375, + "rewards/margins": 23.316797256469727, + "rewards/rejected": -84.34687805175781, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.40519410371780396, + "rewards/weighted_margins": 0.521441638469696, + "rewards/weighted_rejected": -0.9261535406112671, + "step": 410 + }, + { + "epoch": 0.21983773881182936, + "grad_norm": 23.030996322631836, + "learning_rate": 9.575867082172085e-07, + "logits/chosen": -0.7789466977119446, + "logits/rejected": -0.8260132074356079, + "logps/chosen": -372.22344970703125, + "logps/rejected": -367.0171813964844, + "logps/weighted_chosen": -3.114550828933716, + "logps/weighted_rejected": -3.364208936691284, + "loss": 0.6211, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -67.16816711425781, + "rewards/margins": 29.731639862060547, + "rewards/rejected": -96.90156555175781, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5122925043106079, + "rewards/weighted_margins": 0.5218566656112671, + "rewards/weighted_rejected": -1.0339782238006592, + "step": 420 + }, + { + "epoch": 0.22507197068830148, + "grad_norm": 16.442333221435547, + "learning_rate": 9.538273936226673e-07, + "logits/chosen": -0.830523669719696, + "logits/rejected": -0.8667358160018921, + "logps/chosen": -328.4546813964844, + "logps/rejected": -347.9593811035156, + "logps/weighted_chosen": -3.373584032058716, + "logps/weighted_rejected": -3.832958936691284, + "loss": 0.6425, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -62.920310974121094, + "rewards/margins": 20.668750762939453, + "rewards/rejected": -83.5894546508789, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.459890753030777, + "rewards/weighted_margins": 0.39284056425094604, + "rewards/weighted_rejected": -0.8525451421737671, + "step": 430 + }, + { + "epoch": 0.23030620256477363, + "grad_norm": 21.955875396728516, + "learning_rate": 9.499165043652391e-07, + "logits/chosen": -0.8598114252090454, + "logits/rejected": -0.868182361125946, + "logps/chosen": -358.21563720703125, + "logps/rejected": -356.26251220703125, + "logps/weighted_chosen": -3.4171142578125, + "logps/weighted_rejected": -3.6997313499450684, + "loss": 0.624, + "rewards/accuracies": 0.625, + "rewards/chosen": -66.96504211425781, + "rewards/margins": 22.563282012939453, + "rewards/rejected": -89.5503921508789, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.5841079950332642, + "rewards/weighted_margins": 0.434326171875, + "rewards/weighted_rejected": -1.018707275390625, + "step": 440 + }, + { + "epoch": 0.23554043444124576, + "grad_norm": 75.56902313232422, + "learning_rate": 9.458553466501665e-07, + "logits/chosen": -0.9330536127090454, + "logits/rejected": -0.9642333984375, + "logps/chosen": -352.6187438964844, + "logps/rejected": -336.0218811035156, + "logps/weighted_chosen": -3.4129395484924316, + "logps/weighted_rejected": -3.74462890625, + "loss": 0.6566, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -66.412109375, + "rewards/margins": 25.757617950439453, + "rewards/rejected": -92.181640625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.689007580280304, + "rewards/weighted_margins": 0.4539245665073395, + "rewards/weighted_rejected": -1.143212914466858, + "step": 450 + }, + { + "epoch": 0.24077466631771788, + "grad_norm": 19.516427993774414, + "learning_rate": 9.416452768711366e-07, + "logits/chosen": -0.945111095905304, + "logits/rejected": -0.9787231683731079, + "logps/chosen": -369.3671875, + "logps/rejected": -358.9624938964844, + "logps/weighted_chosen": -3.1959471702575684, + "logps/weighted_rejected": -3.948193311691284, + "loss": 0.6392, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -77.17265319824219, + "rewards/margins": 21.916015625, + "rewards/rejected": -99.0796890258789, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.67822265625, + "rewards/weighted_margins": 0.532788097858429, + "rewards/weighted_rejected": -1.2112305164337158, + "step": 460 + }, + { + "epoch": 0.24600889819419, + "grad_norm": 19.182979583740234, + "learning_rate": 9.372877011572557e-07, + "logits/chosen": -0.9224609136581421, + "logits/rejected": -0.9388214349746704, + "logps/chosen": -391.6937561035156, + "logps/rejected": -377.0625, + "logps/weighted_chosen": -3.224560499191284, + "logps/weighted_rejected": -3.783252000808716, + "loss": 0.6162, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -80.95976257324219, + "rewards/margins": 21.617578506469727, + "rewards/rejected": -102.59883117675781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.684155285358429, + "rewards/weighted_margins": 0.5555480718612671, + "rewards/weighted_rejected": -1.23956298828125, + "step": 470 + }, + { + "epoch": 0.2512431300706621, + "grad_norm": 31.75469970703125, + "learning_rate": 9.327840749034141e-07, + "logits/chosen": -0.969561755657196, + "logits/rejected": -0.998791515827179, + "logps/chosen": -362.1859436035156, + "logps/rejected": -385.29998779296875, + "logps/weighted_chosen": -3.0771241188049316, + "logps/weighted_rejected": -4.388257026672363, + "loss": 0.6296, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -75.1123046875, + "rewards/margins": 33.66425704956055, + "rewards/rejected": -108.75, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.606555163860321, + "rewards/weighted_margins": 0.612231433391571, + "rewards/weighted_rejected": -1.2182190418243408, + "step": 480 + }, + { + "epoch": 0.2564773619471343, + "grad_norm": 37.024818420410156, + "learning_rate": 9.281359022841965e-07, + "logits/chosen": -0.846588134765625, + "logits/rejected": -0.859790027141571, + "logps/chosen": -352.46405029296875, + "logps/rejected": -355.24688720703125, + "logps/weighted_chosen": -3.219531297683716, + "logps/weighted_rejected": -4.648681640625, + "loss": 0.5897, + "rewards/accuracies": 0.640625, + "rewards/chosen": -77.3949203491211, + "rewards/margins": 32.93046951293945, + "rewards/rejected": -110.32890319824219, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.7288268804550171, + "rewards/weighted_margins": 0.6741577386856079, + "rewards/weighted_rejected": -1.403161644935608, + "step": 490 + }, + { + "epoch": 0.26171159382360637, + "grad_norm": 17.740766525268555, + "learning_rate": 9.233447357514989e-07, + "logits/chosen": -0.8205505609512329, + "logits/rejected": -0.863543689250946, + "logps/chosen": -375.52032470703125, + "logps/rejected": -378.3500061035156, + "logps/weighted_chosen": -3.53125, + "logps/weighted_rejected": -4.106689453125, + "loss": 0.6305, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -77.591796875, + "rewards/margins": 30.978906631469727, + "rewards/rejected": -108.54609680175781, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.6407378911972046, + "rewards/weighted_margins": 0.6662231683731079, + "rewards/weighted_rejected": -1.30645751953125, + "step": 500 + }, + { + "epoch": 0.26171159382360637, + "eval_logits/chosen": -0.9400458931922913, + "eval_logits/rejected": -0.955981433391571, + "eval_logps/chosen": -371.72900390625, + "eval_logps/rejected": -379.6419982910156, + "eval_logps/weighted_chosen": -3.214712381362915, + "eval_logps/weighted_rejected": -4.0158867835998535, + "eval_loss": 0.6316163539886475, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -82.98784637451172, + "eval_rewards/margins": 28.939437866210938, + "eval_rewards/rejected": -111.93875122070312, + "eval_rewards/weighted_accuracies": 0.6725000143051147, + "eval_rewards/weighted_chosen": -0.6669993996620178, + "eval_rewards/weighted_margins": 0.5506796836853027, + "eval_rewards/weighted_rejected": -1.2176789045333862, + "eval_runtime": 1162.5522, + "eval_samples_per_second": 1.72, + "eval_steps_per_second": 0.43, + "step": 500 + }, + { + "epoch": 0.2669458257000785, + "grad_norm": 59.14344787597656, + "learning_rate": 9.184121755160232e-07, + "logits/chosen": -0.9093383550643921, + "logits/rejected": -0.9390915036201477, + "logps/chosen": -378.3890686035156, + "logps/rejected": -401.62811279296875, + "logps/weighted_chosen": -3.4715576171875, + "logps/weighted_rejected": -4.080712795257568, + "loss": 0.6505, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -77.12968444824219, + "rewards/margins": 35.939842224121094, + "rewards/rejected": -113.0718765258789, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.6525024175643921, + "rewards/weighted_margins": 0.52886962890625, + "rewards/weighted_rejected": -1.1813843250274658, + "step": 510 + }, + { + "epoch": 0.2721800575765506, + "grad_norm": 33.64823913574219, + "learning_rate": 9.133398690128193e-07, + "logits/chosen": -0.942626953125, + "logits/rejected": -0.965716540813446, + "logps/chosen": -400.1890563964844, + "logps/rejected": -403.63592529296875, + "logps/weighted_chosen": -3.2196044921875, + "logps/weighted_rejected": -4.1656494140625, + "loss": 0.6062, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -78.64530944824219, + "rewards/margins": 44.30859375, + "rewards/rejected": -122.9917984008789, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -0.48695677518844604, + "rewards/weighted_margins": 0.6421569585800171, + "rewards/weighted_rejected": -1.129034399986267, + "step": 520 + }, + { + "epoch": 0.27741428945302277, + "grad_norm": 36.97419738769531, + "learning_rate": 9.081295103510554e-07, + "logits/chosen": -0.9060531854629517, + "logits/rejected": -0.954547107219696, + "logps/chosen": -377.2640686035156, + "logps/rejected": -402.17498779296875, + "logps/weighted_chosen": -2.8717284202575684, + "logps/weighted_rejected": -4.353662014007568, + "loss": 0.5138, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -93.2210922241211, + "rewards/margins": 39.227149963378906, + "rewards/rejected": -132.44140625, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -0.5675109624862671, + "rewards/weighted_margins": 0.93292236328125, + "rewards/weighted_rejected": -1.501245141029358, + "step": 530 + }, + { + "epoch": 0.2826485213294949, + "grad_norm": 23.371498107910156, + "learning_rate": 9.027828397481989e-07, + "logits/chosen": -0.8855453729629517, + "logits/rejected": -0.911120593547821, + "logps/chosen": -357.30078125, + "logps/rejected": -385.8531188964844, + "logps/weighted_chosen": -3.6192626953125, + "logps/weighted_rejected": -4.450634956359863, + "loss": 0.6284, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -96.7691421508789, + "rewards/margins": 33.722267150878906, + "rewards/rejected": -130.5457000732422, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.788897693157196, + "rewards/weighted_margins": 0.5829833745956421, + "rewards/weighted_rejected": -1.3713562488555908, + "step": 540 + }, + { + "epoch": 0.287882753205967, + "grad_norm": 21.5579833984375, + "learning_rate": 8.973016429487988e-07, + "logits/chosen": -0.9223998785018921, + "logits/rejected": -0.9320526123046875, + "logps/chosen": -376.43438720703125, + "logps/rejected": -393.18438720703125, + "logps/weighted_chosen": -3.503002882003784, + "logps/weighted_rejected": -3.9817872047424316, + "loss": 0.6093, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -94.5347671508789, + "rewards/margins": 44.30546951293945, + "rewards/rejected": -138.84414672851562, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.7487853765487671, + "rewards/weighted_margins": 0.62567138671875, + "rewards/weighted_rejected": -1.3746826648712158, + "step": 550 + }, + { + "epoch": 0.29311698508243916, + "grad_norm": 55.23554992675781, + "learning_rate": 8.916877506280601e-07, + "logits/chosen": -0.969989001750946, + "logits/rejected": -0.9665802121162415, + "logps/chosen": -383.078125, + "logps/rejected": -397.75, + "logps/weighted_chosen": -3.5821290016174316, + "logps/weighted_rejected": -4.008593559265137, + "loss": 0.6282, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -102.19023132324219, + "rewards/margins": 41.46562576293945, + "rewards/rejected": -143.642578125, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.7835754156112671, + "rewards/weighted_margins": 0.644238293170929, + "rewards/weighted_rejected": -1.427978515625, + "step": 560 + }, + { + "epoch": 0.29835121695891126, + "grad_norm": 18.939470291137695, + "learning_rate": 8.85943037780415e-07, + "logits/chosen": -1.0394058227539062, + "logits/rejected": -1.042639136314392, + "logps/chosen": -384.078125, + "logps/rejected": -366.9984436035156, + "logps/weighted_chosen": -3.3567872047424316, + "logps/weighted_rejected": -3.9019775390625, + "loss": 0.5908, + "rewards/accuracies": 0.625, + "rewards/chosen": -97.7894515991211, + "rewards/margins": 27.4404296875, + "rewards/rejected": -125.2328109741211, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.7109375, + "rewards/weighted_margins": 0.566577136516571, + "rewards/weighted_rejected": -1.2770659923553467, + "step": 570 + }, + { + "epoch": 0.3035854488353834, + "grad_norm": 54.97072982788086, + "learning_rate": 8.800694230932884e-07, + "logits/chosen": -0.958050549030304, + "logits/rejected": -0.9729766845703125, + "logps/chosen": -381.4140625, + "logps/rejected": -385.4296875, + "logps/weighted_chosen": -2.9930176734924316, + "logps/weighted_rejected": -3.6538329124450684, + "loss": 0.6283, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -91.8843765258789, + "rewards/margins": 28.43359375, + "rewards/rejected": -120.30078125, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5365799069404602, + "rewards/weighted_margins": 0.4689392149448395, + "rewards/weighted_rejected": -1.005883812904358, + "step": 580 + }, + { + "epoch": 0.30881968071185556, + "grad_norm": 187.63499450683594, + "learning_rate": 8.740688683062723e-07, + "logits/chosen": -1.0116729736328125, + "logits/rejected": -1.0273834466934204, + "logps/chosen": -427.6859436035156, + "logps/rejected": -400.0640563964844, + "logps/weighted_chosen": -2.997143507003784, + "logps/weighted_rejected": -3.7496094703674316, + "loss": 0.6363, + "rewards/accuracies": 0.578125, + "rewards/chosen": -98.6539077758789, + "rewards/margins": 25.619726181030273, + "rewards/rejected": -124.2671890258789, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.539746105670929, + "rewards/weighted_margins": 0.48472291231155396, + "rewards/weighted_rejected": -1.023950219154358, + "step": 590 + }, + { + "epoch": 0.31405391258832765, + "grad_norm": 17.906530380249023, + "learning_rate": 8.679433775559215e-07, + "logits/chosen": -0.991503894329071, + "logits/rejected": -1.0299193859100342, + "logps/chosen": -428.88592529296875, + "logps/rejected": -425.1625061035156, + "logps/weighted_chosen": -2.877368211746216, + "logps/weighted_rejected": -3.985302686691284, + "loss": 0.5932, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -104.1353530883789, + "rewards/margins": 35.59746170043945, + "rewards/rejected": -139.7765655517578, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.6103607416152954, + "rewards/weighted_margins": 0.5564330816268921, + "rewards/weighted_rejected": -1.166479468345642, + "step": 600 + }, + { + "epoch": 0.3192881444647998, + "grad_norm": 33.20716094970703, + "learning_rate": 8.616949967063871e-07, + "logits/chosen": -0.9755920171737671, + "logits/rejected": -1.0198791027069092, + "logps/chosen": -370.45001220703125, + "logps/rejected": -400.4906311035156, + "logps/weighted_chosen": -3.3270506858825684, + "logps/weighted_rejected": -3.9315428733825684, + "loss": 0.6822, + "rewards/accuracies": 0.65625, + "rewards/chosen": -102.12422180175781, + "rewards/margins": 37.60078048706055, + "rewards/rejected": -139.7734375, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.7267090082168579, + "rewards/weighted_margins": 0.43719482421875, + "rewards/weighted_rejected": -1.163793921470642, + "step": 610 + }, + { + "epoch": 0.3245223763412719, + "grad_norm": 13.562949180603027, + "learning_rate": 8.553258126661154e-07, + "logits/chosen": -1.00177001953125, + "logits/rejected": -1.010014295578003, + "logps/chosen": -390.65234375, + "logps/rejected": -402.65155029296875, + "logps/weighted_chosen": -3.4473876953125, + "logps/weighted_rejected": -4.26806640625, + "loss": 0.6605, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -110.80390930175781, + "rewards/margins": 35.68242263793945, + "rewards/rejected": -146.5031280517578, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.7691070437431335, + "rewards/weighted_margins": 0.534271240234375, + "rewards/weighted_rejected": -1.3035888671875, + "step": 620 + }, + { + "epoch": 0.32975660821774405, + "grad_norm": 29.253734588623047, + "learning_rate": 8.488379526908368e-07, + "logits/chosen": -0.978869616985321, + "logits/rejected": -0.9867492914199829, + "logps/chosen": -406.9437561035156, + "logps/rejected": -431.9906311035156, + "logps/weighted_chosen": -3.346386671066284, + "logps/weighted_rejected": -4.080664157867432, + "loss": 0.5991, + "rewards/accuracies": 0.65625, + "rewards/chosen": -116.3648452758789, + "rewards/margins": 46.68730545043945, + "rewards/rejected": -163.06405639648438, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.757556140422821, + "rewards/weighted_margins": 0.597582995891571, + "rewards/weighted_rejected": -1.3551514148712158, + "step": 630 + }, + { + "epoch": 0.33499084009421615, + "grad_norm": 28.59862518310547, + "learning_rate": 8.422335836730802e-07, + "logits/chosen": -0.983142077922821, + "logits/rejected": -0.9791107177734375, + "logps/chosen": -378.4984436035156, + "logps/rejected": -432.71563720703125, + "logps/weighted_chosen": -3.094311475753784, + "logps/weighted_rejected": -3.8768067359924316, + "loss": 0.6061, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -104.8597640991211, + "rewards/margins": 50.2001953125, + "rewards/rejected": -155.0695343017578, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -0.6736419796943665, + "rewards/weighted_margins": 0.5768188238143921, + "rewards/weighted_rejected": -1.250451683998108, + "step": 640 + }, + { + "epoch": 0.3402250719706883, + "grad_norm": 20.690876007080078, + "learning_rate": 8.355149114184485e-07, + "logits/chosen": -1.030615210533142, + "logits/rejected": -1.0146636962890625, + "logps/chosen": -416.39373779296875, + "logps/rejected": -445.0625, + "logps/weighted_chosen": -3.255859375, + "logps/weighted_rejected": -3.76806640625, + "loss": 0.6048, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -108.90547180175781, + "rewards/margins": 53.869140625, + "rewards/rejected": -162.74258422851562, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.700115978717804, + "rewards/weighted_margins": 0.5948852300643921, + "rewards/weighted_rejected": -1.2950623035430908, + "step": 650 + }, + { + "epoch": 0.34545930384716045, + "grad_norm": 33.73557662963867, + "learning_rate": 8.286841799088963e-07, + "logits/chosen": -1.0694351196289062, + "logits/rejected": -1.0623047351837158, + "logps/chosen": -395.19219970703125, + "logps/rejected": -412.45001220703125, + "logps/weighted_chosen": -2.8808836936950684, + "logps/weighted_rejected": -3.636523485183716, + "loss": 0.6207, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -111.27070617675781, + "rewards/margins": 38.04765701293945, + "rewards/rejected": -149.3312530517578, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.649340808391571, + "rewards/weighted_margins": 0.5112365484237671, + "rewards/weighted_rejected": -1.1605103015899658, + "step": 660 + }, + { + "epoch": 0.35069353572363254, + "grad_norm": 18.913761138916016, + "learning_rate": 8.217436705532599e-07, + "logits/chosen": -1.0736572742462158, + "logits/rejected": -1.090576171875, + "logps/chosen": -431.78125, + "logps/rejected": -421.4312438964844, + "logps/weighted_chosen": -3.0259766578674316, + "logps/weighted_rejected": -3.81298828125, + "loss": 0.6004, + "rewards/accuracies": 0.640625, + "rewards/chosen": -129.1144561767578, + "rewards/margins": 28.649608612060547, + "rewards/rejected": -157.74844360351562, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.6614929437637329, + "rewards/weighted_margins": 0.636364758014679, + "rewards/weighted_rejected": -1.2976195812225342, + "step": 670 + }, + { + "epoch": 0.3559277676001047, + "grad_norm": 22.967056274414062, + "learning_rate": 8.14695701425284e-07, + "logits/chosen": -1.051629662513733, + "logits/rejected": -1.0860717296600342, + "logps/chosen": -432.98126220703125, + "logps/rejected": -426.5953063964844, + "logps/weighted_chosen": -3.1195311546325684, + "logps/weighted_rejected": -3.96337890625, + "loss": 0.5827, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -127.5308609008789, + "rewards/margins": 36.908592224121094, + "rewards/rejected": -164.46484375, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.686236560344696, + "rewards/weighted_margins": 0.6052306890487671, + "rewards/weighted_rejected": -1.2908813953399658, + "step": 680 + }, + { + "epoch": 0.3611619994765768, + "grad_norm": 15.409049034118652, + "learning_rate": 8.075426264894046e-07, + "logits/chosen": -1.006170630455017, + "logits/rejected": -1.0271179676055908, + "logps/chosen": -434.3828125, + "logps/rejected": -452.90625, + "logps/weighted_chosen": -3.187304735183716, + "logps/weighted_rejected": -4.466699123382568, + "loss": 0.5501, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -128.9619140625, + "rewards/margins": 46.51679611206055, + "rewards/rejected": -175.45468139648438, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -0.6887573003768921, + "rewards/weighted_margins": 0.76141357421875, + "rewards/weighted_rejected": -1.4498474597930908, + "step": 690 + }, + { + "epoch": 0.36639623135304894, + "grad_norm": 23.13039207458496, + "learning_rate": 8.002868348145435e-07, + "logits/chosen": -0.9920509457588196, + "logits/rejected": -0.996777355670929, + "logps/chosen": -439.54998779296875, + "logps/rejected": -444.30780029296875, + "logps/weighted_chosen": -3.31005859375, + "logps/weighted_rejected": -3.691967725753784, + "loss": 0.6102, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -139.38632202148438, + "rewards/margins": 36.769142150878906, + "rewards/rejected": -176.1867218017578, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.8051727414131165, + "rewards/weighted_margins": 0.5639587640762329, + "rewards/weighted_rejected": -1.3695800304412842, + "step": 700 + }, + { + "epoch": 0.3716304632295211, + "grad_norm": 38.38508224487305, + "learning_rate": 7.92930749776179e-07, + "logits/chosen": -1.0172607898712158, + "logits/rejected": -1.042083740234375, + "logps/chosen": -422.10546875, + "logps/rejected": -444.6812438964844, + "logps/weighted_chosen": -3.483569383621216, + "logps/weighted_rejected": -4.216650485992432, + "loss": 0.6316, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -150.42578125, + "rewards/margins": 33.904685974121094, + "rewards/rejected": -184.33438110351562, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.72515869140625, + "rewards/weighted_margins": 0.590161144733429, + "rewards/weighted_rejected": -1.3153502941131592, + "step": 710 + }, + { + "epoch": 0.3768646951059932, + "grad_norm": 20.4711971282959, + "learning_rate": 7.854768282469582e-07, + "logits/chosen": -1.0770995616912842, + "logits/rejected": -1.114935278892517, + "logps/chosen": -401.88592529296875, + "logps/rejected": -458.3374938964844, + "logps/weighted_chosen": -3.099194288253784, + "logps/weighted_rejected": -3.990966796875, + "loss": 0.5978, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -124.43046569824219, + "rewards/margins": 56.217384338378906, + "rewards/rejected": -180.6015625, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.6303802728652954, + "rewards/weighted_margins": 0.679125964641571, + "rewards/weighted_rejected": -1.3104156255722046, + "step": 720 + }, + { + "epoch": 0.38209892698246534, + "grad_norm": 15.2982177734375, + "learning_rate": 7.779275597761215e-07, + "logits/chosen": -1.0406615734100342, + "logits/rejected": -1.097131371498108, + "logps/chosen": -414.5625, + "logps/rejected": -462.5453186035156, + "logps/weighted_chosen": -3.3689942359924316, + "logps/weighted_rejected": -4.090185642242432, + "loss": 0.5689, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -138.33358764648438, + "rewards/margins": 55.842185974121094, + "rewards/rejected": -194.16171264648438, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -0.6730102300643921, + "rewards/weighted_margins": 0.699902355670929, + "rewards/weighted_rejected": -1.373620629310608, + "step": 730 + }, + { + "epoch": 0.38733315885893743, + "grad_norm": 37.01581954956055, + "learning_rate": 7.702854657580126e-07, + "logits/chosen": -1.1022522449493408, + "logits/rejected": -1.1134154796600342, + "logps/chosen": -459.6156311035156, + "logps/rejected": -462.4125061035156, + "logps/weighted_chosen": -3.382946729660034, + "logps/weighted_rejected": -4.401709079742432, + "loss": 0.6148, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -164.8722686767578, + "rewards/margins": 43.28515625, + "rewards/rejected": -208.21133422851562, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.8088958859443665, + "rewards/weighted_margins": 0.698986828327179, + "rewards/weighted_rejected": -1.5074951648712158, + "step": 740 + }, + { + "epoch": 0.3925673907354096, + "grad_norm": 26.924480438232422, + "learning_rate": 7.625530985899547e-07, + "logits/chosen": -1.0611861944198608, + "logits/rejected": -1.075714111328125, + "logps/chosen": -430.6312561035156, + "logps/rejected": -461.9828186035156, + "logps/weighted_chosen": -3.210217237472534, + "logps/weighted_rejected": -4.483691215515137, + "loss": 0.5815, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -163.78555297851562, + "rewards/margins": 53.288673400878906, + "rewards/rejected": -216.96054077148438, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.876666247844696, + "rewards/weighted_margins": 0.7388671636581421, + "rewards/weighted_rejected": -1.6160767078399658, + "step": 750 + }, + { + "epoch": 0.39780162261188173, + "grad_norm": 18.056201934814453, + "learning_rate": 7.547330408197694e-07, + "logits/chosen": -1.0437713861465454, + "logits/rejected": -1.075250267982483, + "logps/chosen": -460.1875, + "logps/rejected": -450.8109436035156, + "logps/weighted_chosen": -3.229296922683716, + "logps/weighted_rejected": -4.191064357757568, + "loss": 0.6146, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -164.56640625, + "rewards/margins": 29.973241806030273, + "rewards/rejected": -194.6171875, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.7951415777206421, + "rewards/weighted_margins": 0.599578857421875, + "rewards/weighted_rejected": -1.395105004310608, + "step": 760 + }, + { + "epoch": 0.40303585448835383, + "grad_norm": 16.393836975097656, + "learning_rate": 7.468279042832271e-07, + "logits/chosen": -1.0454833507537842, + "logits/rejected": -1.0705687999725342, + "logps/chosen": -416.57421875, + "logps/rejected": -483.1390686035156, + "logps/weighted_chosen": -3.134448289871216, + "logps/weighted_rejected": -3.8697752952575684, + "loss": 0.6132, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -136.8953094482422, + "rewards/margins": 56.886329650878906, + "rewards/rejected": -193.72305297851562, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.7508605718612671, + "rewards/weighted_margins": 0.5269104242324829, + "rewards/weighted_rejected": -1.277490258216858, + "step": 770 + }, + { + "epoch": 0.408270086364826, + "grad_norm": 16.254776000976562, + "learning_rate": 7.388403292317154e-07, + "logits/chosen": -1.0213134288787842, + "logits/rejected": -1.072851538658142, + "logps/chosen": -445.00469970703125, + "logps/rejected": -451.1468811035156, + "logps/weighted_chosen": -3.063525438308716, + "logps/weighted_rejected": -3.897021532058716, + "loss": 0.6134, + "rewards/accuracies": 0.609375, + "rewards/chosen": -144.0207061767578, + "rewards/margins": 41.165626525878906, + "rewards/rejected": -185.15585327148438, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.752685546875, + "rewards/weighted_margins": 0.5895751714706421, + "rewards/weighted_rejected": -1.341894507408142, + "step": 780 + }, + { + "epoch": 0.4135043182412981, + "grad_norm": 19.472450256347656, + "learning_rate": 7.307729834504154e-07, + "logits/chosen": -1.052435278892517, + "logits/rejected": -1.10076904296875, + "logps/chosen": -457.2640686035156, + "logps/rejected": -488.16876220703125, + "logps/weighted_chosen": -3.143115282058716, + "logps/weighted_rejected": -4.254638671875, + "loss": 0.6137, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -175.60116577148438, + "rewards/margins": 46.61640548706055, + "rewards/rejected": -222.21328735351562, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.878021240234375, + "rewards/weighted_margins": 0.650500476360321, + "rewards/weighted_rejected": -1.5286986827850342, + "step": 790 + }, + { + "epoch": 0.4187385501177702, + "grad_norm": 24.911523818969727, + "learning_rate": 7.226285613672847e-07, + "logits/chosen": -1.0021483898162842, + "logits/rejected": -1.031951904296875, + "logps/chosen": -466.4765625, + "logps/rejected": -547.578125, + "logps/weighted_chosen": -3.3023438453674316, + "logps/weighted_rejected": -4.483691215515137, + "loss": 0.6142, + "rewards/accuracies": 0.640625, + "rewards/chosen": -189.1164093017578, + "rewards/margins": 79.8050765991211, + "rewards/rejected": -268.85467529296875, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.978710949420929, + "rewards/weighted_margins": 0.6761840581893921, + "rewards/weighted_rejected": -1.6549804210662842, + "step": 800 + }, + { + "epoch": 0.4239727819942423, + "grad_norm": 16.060869216918945, + "learning_rate": 7.144097831531398e-07, + "logits/chosen": -0.978619396686554, + "logits/rejected": -1.0038635730743408, + "logps/chosen": -456.95623779296875, + "logps/rejected": -505.2093811035156, + "logps/weighted_chosen": -3.171826124191284, + "logps/weighted_rejected": -4.205761909484863, + "loss": 0.5645, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -186.97421264648438, + "rewards/margins": 54.0078125, + "rewards/rejected": -240.85311889648438, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.788342297077179, + "rewards/weighted_margins": 0.7865844964981079, + "rewards/weighted_rejected": -1.574896216392517, + "step": 810 + }, + { + "epoch": 0.42920701387071447, + "grad_norm": 23.292619705200195, + "learning_rate": 7.061193938131396e-07, + "logits/chosen": -0.9266418218612671, + "logits/rejected": -0.9759277105331421, + "logps/chosen": -492.421875, + "logps/rejected": -497.2515563964844, + "logps/weighted_chosen": -3.4576172828674316, + "logps/weighted_rejected": -4.116650581359863, + "loss": 0.5666, + "rewards/accuracies": 0.578125, + "rewards/chosen": -192.64022827148438, + "rewards/margins": 39.222267150878906, + "rewards/rejected": -231.8562469482422, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.797924816608429, + "rewards/weighted_margins": 0.775280773639679, + "rewards/weighted_rejected": -1.573089599609375, + "step": 820 + }, + { + "epoch": 0.4344412457471866, + "grad_norm": 23.763275146484375, + "learning_rate": 6.977601622699789e-07, + "logits/chosen": -0.9908691644668579, + "logits/rejected": -1.057653784751892, + "logps/chosen": -459.80157470703125, + "logps/rejected": -544.8687744140625, + "logps/weighted_chosen": -3.350512742996216, + "logps/weighted_rejected": -4.385839939117432, + "loss": 0.5077, + "rewards/accuracies": 0.6781250238418579, + "rewards/chosen": -175.025390625, + "rewards/margins": 90.8042984008789, + "rewards/rejected": -265.86798095703125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -0.699688732624054, + "rewards/weighted_margins": 0.9197998046875, + "rewards/weighted_rejected": -1.61993408203125, + "step": 830 + }, + { + "epoch": 0.4396754776236587, + "grad_norm": 29.07372283935547, + "learning_rate": 6.893348804390882e-07, + "logits/chosen": -1.094964623451233, + "logits/rejected": -1.1045074462890625, + "logps/chosen": -521.2327880859375, + "logps/rejected": -545.9468994140625, + "logps/weighted_chosen": -3.6615967750549316, + "logps/weighted_rejected": -4.320361137390137, + "loss": 0.5747, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -225.86563110351562, + "rewards/margins": 60.388671875, + "rewards/rejected": -286.2398376464844, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -0.912017822265625, + "rewards/weighted_margins": 0.748242199420929, + "rewards/weighted_rejected": -1.660058617591858, + "step": 840 + }, + { + "epoch": 0.44490970950013087, + "grad_norm": 23.878381729125977, + "learning_rate": 6.808463622961578e-07, + "logits/chosen": -1.0891234874725342, + "logits/rejected": -1.1309936046600342, + "logps/chosen": -545.3046875, + "logps/rejected": -615.7484130859375, + "logps/weighted_chosen": -3.533984422683716, + "logps/weighted_rejected": -4.504004001617432, + "loss": 0.5472, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -252.8015594482422, + "rewards/margins": 81.0531234741211, + "rewards/rejected": -333.59295654296875, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -0.967456042766571, + "rewards/weighted_margins": 0.86865234375, + "rewards/weighted_rejected": -1.835351586341858, + "step": 850 + }, + { + "epoch": 0.45014394137660296, + "grad_norm": 101.535888671875, + "learning_rate": 6.722974429372925e-07, + "logits/chosen": -1.0688354969024658, + "logits/rejected": -1.1046874523162842, + "logps/chosen": -578.9781494140625, + "logps/rejected": -604.3499755859375, + "logps/weighted_chosen": -3.37939453125, + "logps/weighted_rejected": -5.040380954742432, + "loss": 0.5051, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -275.2124938964844, + "rewards/margins": 66.9710922241211, + "rewards/rejected": -342.20001220703125, + "rewards/weighted_accuracies": 0.7749999761581421, + "rewards/weighted_chosen": -1.086279273033142, + "rewards/weighted_margins": 1.05950927734375, + "rewards/weighted_rejected": -2.1461181640625, + "step": 860 + }, + { + "epoch": 0.4553781732530751, + "grad_norm": 66.56680297851562, + "learning_rate": 6.636909776321128e-07, + "logits/chosen": -1.1214478015899658, + "logits/rejected": -1.11016845703125, + "logps/chosen": -493.3843688964844, + "logps/rejected": -590.8125, + "logps/weighted_chosen": -3.670654296875, + "logps/weighted_rejected": -4.723730564117432, + "loss": 0.5107, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -231.6789093017578, + "rewards/margins": 94.42109680175781, + "rewards/rejected": -326.2250061035156, + "rewards/weighted_accuracies": 0.746874988079071, + "rewards/weighted_chosen": -1.0464904308319092, + "rewards/weighted_margins": 0.9440551996231079, + "rewards/weighted_rejected": -1.99102783203125, + "step": 870 + }, + { + "epoch": 0.46061240512954726, + "grad_norm": 23.494997024536133, + "learning_rate": 6.550298408701174e-07, + "logits/chosen": -1.094885230064392, + "logits/rejected": -1.1415894031524658, + "logps/chosen": -534.1663818359375, + "logps/rejected": -621.9547119140625, + "logps/weighted_chosen": -3.794140577316284, + "logps/weighted_rejected": -5.148291110992432, + "loss": 0.5174, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -247.2595672607422, + "rewards/margins": 92.109375, + "rewards/rejected": -339.3968811035156, + "rewards/weighted_accuracies": 0.753125011920929, + "rewards/weighted_chosen": -0.9449707269668579, + "rewards/weighted_margins": 0.938586413860321, + "rewards/weighted_rejected": -1.8829224109649658, + "step": 880 + }, + { + "epoch": 0.46584663700601936, + "grad_norm": 27.359371185302734, + "learning_rate": 6.463169254006276e-07, + "logits/chosen": -1.1160705089569092, + "logits/rejected": -1.157629370689392, + "logps/chosen": -538.1109619140625, + "logps/rejected": -562.7062377929688, + "logps/weighted_chosen": -3.73779296875, + "logps/weighted_rejected": -4.817724704742432, + "loss": 0.5154, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -256.6499938964844, + "rewards/margins": 59.66523361206055, + "rewards/rejected": -316.37225341796875, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.019891381263733, + "rewards/weighted_margins": 1.008874535560608, + "rewards/weighted_rejected": -2.028430223464966, + "step": 890 + }, + { + "epoch": 0.4710808688824915, + "grad_norm": 20.988025665283203, + "learning_rate": 6.375551412666326e-07, + "logits/chosen": -1.0879943370819092, + "logits/rejected": -1.1199951171875, + "logps/chosen": -514.9187622070312, + "logps/rejected": -545.7906494140625, + "logps/weighted_chosen": -3.4129395484924316, + "logps/weighted_rejected": -4.592138767242432, + "loss": 0.6113, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -240.97891235351562, + "rewards/margins": 48.454689025878906, + "rewards/rejected": -289.4117126464844, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.1000854969024658, + "rewards/weighted_margins": 0.7867187261581421, + "rewards/weighted_rejected": -1.887121558189392, + "step": 900 + }, + { + "epoch": 0.4763151007589636, + "grad_norm": 29.907148361206055, + "learning_rate": 6.287474148328583e-07, + "logits/chosen": -1.0193603038787842, + "logits/rejected": -0.9993133544921875, + "logps/chosen": -474.3374938964844, + "logps/rejected": -501.23748779296875, + "logps/weighted_chosen": -3.7416014671325684, + "logps/weighted_rejected": -5.238329887390137, + "loss": 0.5727, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -203.36563110351562, + "rewards/margins": 50.95781326293945, + "rewards/rejected": -254.2734375, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.043182373046875, + "rewards/weighted_margins": 0.8107665777206421, + "rewards/weighted_rejected": -1.8539307117462158, + "step": 910 + }, + { + "epoch": 0.48154933263543576, + "grad_norm": 36.32797622680664, + "learning_rate": 6.198966878083857e-07, + "logits/chosen": -1.0350799560546875, + "logits/rejected": -1.0523681640625, + "logps/chosen": -488.9765625, + "logps/rejected": -553.484375, + "logps/weighted_chosen": -3.7232666015625, + "logps/weighted_rejected": -4.598974704742432, + "loss": 0.5581, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -217.74844360351562, + "rewards/margins": 68.5667953491211, + "rewards/rejected": -286.3515625, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -1.01214599609375, + "rewards/weighted_margins": 0.775787353515625, + "rewards/weighted_rejected": -1.787988305091858, + "step": 920 + }, + { + "epoch": 0.48678356451190785, + "grad_norm": 40.16273880004883, + "learning_rate": 6.110059162641439e-07, + "logits/chosen": -1.0597412586212158, + "logits/rejected": -1.0781066417694092, + "logps/chosen": -513.3226318359375, + "logps/rejected": -559.4593505859375, + "logps/weighted_chosen": -3.147705078125, + "logps/weighted_rejected": -4.214404106140137, + "loss": 0.5481, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -231.1085968017578, + "rewards/margins": 61.108985900878906, + "rewards/rejected": -292.21954345703125, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -0.8689819574356079, + "rewards/weighted_margins": 0.7522827386856079, + "rewards/weighted_rejected": -1.6212646961212158, + "step": 930 + }, + { + "epoch": 0.49201779638838, + "grad_norm": 29.65454864501953, + "learning_rate": 6.020780696456059e-07, + "logits/chosen": -1.072198510169983, + "logits/rejected": -1.104650855064392, + "logps/chosen": -511.18280029296875, + "logps/rejected": -601.0250244140625, + "logps/weighted_chosen": -3.1954102516174316, + "logps/weighted_rejected": -4.517724514007568, + "loss": 0.5407, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -240.52108764648438, + "rewards/margins": 93.419921875, + "rewards/rejected": -333.953125, + "rewards/weighted_accuracies": 0.731249988079071, + "rewards/weighted_chosen": -1.0927855968475342, + "rewards/weighted_margins": 0.8496459722518921, + "rewards/weighted_rejected": -1.942968726158142, + "step": 940 + }, + { + "epoch": 0.49725202826485215, + "grad_norm": 210.50332641601562, + "learning_rate": 5.931161297810185e-07, + "logits/chosen": -1.132635474205017, + "logits/rejected": -1.1451904773712158, + "logps/chosen": -574.6031494140625, + "logps/rejected": -629.4656372070312, + "logps/weighted_chosen": -4.126172065734863, + "logps/weighted_rejected": -5.016747951507568, + "loss": 0.5998, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -301.52264404296875, + "rewards/margins": 66.5218734741211, + "rewards/rejected": -368.080078125, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -1.307519555091858, + "rewards/weighted_margins": 0.74249267578125, + "rewards/weighted_rejected": -2.0491180419921875, + "step": 950 + }, + { + "epoch": 0.5024862601413242, + "grad_norm": 113.20726013183594, + "learning_rate": 5.841230898854959e-07, + "logits/chosen": -1.070550560951233, + "logits/rejected": -1.0872802734375, + "logps/chosen": -652.0281372070312, + "logps/rejected": -711.1765747070312, + "logps/weighted_chosen": -3.9981932640075684, + "logps/weighted_rejected": -5.147070407867432, + "loss": 0.6329, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -351.828125, + "rewards/margins": 97.4195327758789, + "rewards/rejected": -449.2261657714844, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.564361572265625, + "rewards/weighted_margins": 0.872241199016571, + "rewards/weighted_rejected": -2.436718702316284, + "step": 960 + }, + { + "epoch": 0.5077204920177963, + "grad_norm": 22.200820922851562, + "learning_rate": 5.751019535613102e-07, + "logits/chosen": -0.987274169921875, + "logits/rejected": -1.0052611827850342, + "logps/chosen": -531.3624877929688, + "logps/rejected": -610.1218872070312, + "logps/weighted_chosen": -3.8460450172424316, + "logps/weighted_rejected": -5.256982326507568, + "loss": 0.5675, + "rewards/accuracies": 0.65625, + "rewards/chosen": -271.6910095214844, + "rewards/margins": 91.53633117675781, + "rewards/rejected": -363.22344970703125, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -1.357843041419983, + "rewards/weighted_margins": 1.01031494140625, + "rewards/weighted_rejected": -2.367443799972534, + "step": 970 + }, + { + "epoch": 0.5129547238942685, + "grad_norm": 17.362323760986328, + "learning_rate": 5.660557337947117e-07, + "logits/chosen": -0.9707549810409546, + "logits/rejected": -0.983325183391571, + "logps/chosen": -549.1953125, + "logps/rejected": -586.7874755859375, + "logps/weighted_chosen": -3.279223680496216, + "logps/weighted_rejected": -4.507519721984863, + "loss": 0.5466, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -254.6687469482422, + "rewards/margins": 74.56758117675781, + "rewards/rejected": -329.2632751464844, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -1.0866820812225342, + "rewards/weighted_margins": 0.8655151128768921, + "rewards/weighted_rejected": -1.9528076648712158, + "step": 980 + }, + { + "epoch": 0.5181889557707406, + "grad_norm": 39.01738739013672, + "learning_rate": 5.569874519496174e-07, + "logits/chosen": -0.963134765625, + "logits/rejected": -1.01763916015625, + "logps/chosen": -488.7406311035156, + "logps/rejected": -554.3687744140625, + "logps/weighted_chosen": -3.665576219558716, + "logps/weighted_rejected": -4.876318454742432, + "loss": 0.5929, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -213.4460906982422, + "rewards/margins": 73.8890609741211, + "rewards/rejected": -287.3500061035156, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.959338366985321, + "rewards/weighted_margins": 0.7813965082168579, + "rewards/weighted_rejected": -1.740045189857483, + "step": 990 + }, + { + "epoch": 0.5234231876472127, + "grad_norm": 33.2608642578125, + "learning_rate": 5.47900136758499e-07, + "logits/chosen": -0.9298340082168579, + "logits/rejected": -0.989898681640625, + "logps/chosen": -527.16015625, + "logps/rejected": -566.2453002929688, + "logps/weighted_chosen": -3.71044921875, + "logps/weighted_rejected": -4.887304782867432, + "loss": 0.5395, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -259.56170654296875, + "rewards/margins": 64.66015625, + "rewards/rejected": -324.302734375, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.119836449623108, + "rewards/weighted_margins": 0.944445788860321, + "rewards/weighted_rejected": -2.063854932785034, + "step": 1000 + }, + { + "epoch": 0.5234231876472127, + "eval_logits/chosen": -1.0472733974456787, + "eval_logits/rejected": -1.0595996379852295, + "eval_logps/chosen": -573.6119995117188, + "eval_logps/rejected": -629.1840209960938, + "eval_logps/weighted_chosen": -3.815713405609131, + "eval_logps/weighted_rejected": -4.930161476135254, + "eval_loss": 0.5728335976600647, + "eval_rewards/accuracies": 0.5989999771118164, + "eval_rewards/chosen": -284.8971252441406, + "eval_rewards/margins": 76.59700012207031, + "eval_rewards/rejected": -361.5224914550781, + "eval_rewards/weighted_accuracies": 0.7070000171661377, + "eval_rewards/weighted_chosen": -1.2679998874664307, + "eval_rewards/weighted_margins": 0.8639541268348694, + "eval_rewards/weighted_rejected": -2.1319541931152344, + "eval_runtime": 1366.4223, + "eval_samples_per_second": 1.464, + "eval_steps_per_second": 0.366, + "step": 1000 + }, + { + "epoch": 0.528657419523685, + "grad_norm": 30.763290405273438, + "learning_rate": 5.387968233108113e-07, + "logits/chosen": -0.9412124752998352, + "logits/rejected": -0.9331512451171875, + "logps/chosen": -583.2468872070312, + "logps/rejected": -633.9749755859375, + "logps/weighted_chosen": -4.168408393859863, + "logps/weighted_rejected": -5.263281345367432, + "loss": 0.5622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -298.86407470703125, + "rewards/margins": 80.30000305175781, + "rewards/rejected": -379.27032470703125, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -1.356286644935608, + "rewards/weighted_margins": 0.906384289264679, + "rewards/weighted_rejected": -2.262927293777466, + "step": 1010 + }, + { + "epoch": 0.533891651400157, + "grad_norm": 18.650068283081055, + "learning_rate": 5.296805520392962e-07, + "logits/chosen": -1.010156273841858, + "logits/rejected": -1.0419880151748657, + "logps/chosen": -606.2078247070312, + "logps/rejected": -638.6656494140625, + "logps/weighted_chosen": -3.4315428733825684, + "logps/weighted_rejected": -4.852490425109863, + "loss": 0.6285, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -293.66796875, + "rewards/margins": 62.296485900878906, + "rewards/rejected": -355.8941345214844, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -1.4029419422149658, + "rewards/weighted_margins": 0.724365234375, + "rewards/weighted_rejected": -2.1269164085388184, + "step": 1020 + }, + { + "epoch": 0.5391258832766291, + "grad_norm": 28.17354393005371, + "learning_rate": 5.205543677045049e-07, + "logits/chosen": -0.9372314214706421, + "logits/rejected": -0.974993884563446, + "logps/chosen": -493.52655029296875, + "logps/rejected": -522.546875, + "logps/weighted_chosen": -3.700146436691284, + "logps/weighted_rejected": -4.827466011047363, + "loss": 0.5313, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -219.2234344482422, + "rewards/margins": 58.09453201293945, + "rewards/rejected": -277.33203125, + "rewards/weighted_accuracies": 0.7593749761581421, + "rewards/weighted_chosen": -1.0529053211212158, + "rewards/weighted_margins": 0.86126708984375, + "rewards/weighted_rejected": -1.9149185419082642, + "step": 1030 + }, + { + "epoch": 0.5443601151531012, + "grad_norm": 40.029666900634766, + "learning_rate": 5.114213183778697e-07, + "logits/chosen": -1.017327904701233, + "logits/rejected": -1.0485351085662842, + "logps/chosen": -514.8624877929688, + "logps/rejected": -569.7859497070312, + "logps/weighted_chosen": -4.155713081359863, + "logps/weighted_rejected": -4.990136623382568, + "loss": 0.5441, + "rewards/accuracies": 0.609375, + "rewards/chosen": -234.3312530517578, + "rewards/margins": 78.56758117675781, + "rewards/rejected": -312.8812561035156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.097131371498108, + "rewards/weighted_margins": 0.8939269781112671, + "rewards/weighted_rejected": -1.9906127452850342, + "step": 1040 + }, + { + "epoch": 0.5495943470295734, + "grad_norm": 37.25775146484375, + "learning_rate": 5.022844544236754e-07, + "logits/chosen": -0.9515380859375, + "logits/rejected": -0.961352527141571, + "logps/chosen": -573.2562255859375, + "logps/rejected": -641.7312622070312, + "logps/weighted_chosen": -4.061865329742432, + "logps/weighted_rejected": -5.167675971984863, + "loss": 0.5774, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -293.25311279296875, + "rewards/margins": 93.615234375, + "rewards/rejected": -386.7679748535156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.2718932628631592, + "rewards/weighted_margins": 0.8445068597793579, + "rewards/weighted_rejected": -2.1155028343200684, + "step": 1050 + }, + { + "epoch": 0.5548285789060455, + "grad_norm": 26.58415985107422, + "learning_rate": 4.931468274802608e-07, + "logits/chosen": -0.9689911007881165, + "logits/rejected": -0.9828445315361023, + "logps/chosen": -585.3031005859375, + "logps/rejected": -649.8265380859375, + "logps/weighted_chosen": -3.440234422683716, + "logps/weighted_rejected": -4.7862548828125, + "loss": 0.5493, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -303.04412841796875, + "rewards/margins": 85.91679382324219, + "rewards/rejected": -388.83087158203125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -1.2489440441131592, + "rewards/weighted_margins": 0.8478637933731079, + "rewards/weighted_rejected": -2.095629930496216, + "step": 1060 + }, + { + "epoch": 0.5600628107825176, + "grad_norm": 25.14666175842285, + "learning_rate": 4.840114894407974e-07, + "logits/chosen": -0.9988906979560852, + "logits/rejected": -1.0262877941131592, + "logps/chosen": -564.2750244140625, + "logps/rejected": -604.7640380859375, + "logps/weighted_chosen": -3.8533082008361816, + "logps/weighted_rejected": -4.584790229797363, + "loss": 0.5612, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -287.8531188964844, + "rewards/margins": 65.25312805175781, + "rewards/rejected": -353.18670654296875, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.2451751232147217, + "rewards/weighted_margins": 0.882397472858429, + "rewards/weighted_rejected": -2.1273193359375, + "step": 1070 + }, + { + "epoch": 0.5652970426589898, + "grad_norm": 34.41138458251953, + "learning_rate": 4.748814914339811e-07, + "logits/chosen": -0.9615001678466797, + "logits/rejected": -0.990710437297821, + "logps/chosen": -606.1984252929688, + "logps/rejected": -647.2062377929688, + "logps/weighted_chosen": -3.8893065452575684, + "logps/weighted_rejected": -4.611474514007568, + "loss": 0.5687, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -321.95098876953125, + "rewards/margins": 69.9859390258789, + "rewards/rejected": -391.935546875, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.2852051258087158, + "rewards/weighted_margins": 0.868273913860321, + "rewards/weighted_rejected": -2.153552293777466, + "step": 1080 + }, + { + "epoch": 0.5705312745354619, + "grad_norm": 20.902027130126953, + "learning_rate": 4.657598828049801e-07, + "logits/chosen": -1.0034713745117188, + "logits/rejected": -1.0612213611602783, + "logps/chosen": -613.8250122070312, + "logps/rejected": -699.54296875, + "logps/weighted_chosen": -3.7084593772888184, + "logps/weighted_rejected": -4.5335693359375, + "loss": 0.537, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -323.48028564453125, + "rewards/margins": 95.4664077758789, + "rewards/rejected": -418.94842529296875, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.2582886219024658, + "rewards/weighted_margins": 0.9008544683456421, + "rewards/weighted_rejected": -2.15960693359375, + "step": 1090 + }, + { + "epoch": 0.575765506411934, + "grad_norm": 40.39773178100586, + "learning_rate": 4.566497100969792e-07, + "logits/chosen": -0.9749755859375, + "logits/rejected": -0.9959548711776733, + "logps/chosen": -720.859375, + "logps/rejected": -785.3062744140625, + "logps/weighted_chosen": -4.180810451507568, + "logps/weighted_rejected": -5.250244140625, + "loss": 0.5644, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -398.22674560546875, + "rewards/margins": 88.7249984741211, + "rewards/rejected": -486.95782470703125, + "rewards/weighted_accuracies": 0.778124988079071, + "rewards/weighted_chosen": -1.482934594154358, + "rewards/weighted_margins": 0.999176025390625, + "rewards/weighted_rejected": -2.483081102371216, + "step": 1100 + }, + { + "epoch": 0.5809997382884062, + "grad_norm": 31.201040267944336, + "learning_rate": 4.475540160336576e-07, + "logits/chosen": -0.992321789264679, + "logits/rejected": -1.0310242176055908, + "logps/chosen": -624.4796752929688, + "logps/rejected": -671.109375, + "logps/weighted_chosen": -4.22021484375, + "logps/weighted_rejected": -5.428515434265137, + "loss": 0.5427, + "rewards/accuracies": 0.609375, + "rewards/chosen": -326.1617126464844, + "rewards/margins": 86.7378921508789, + "rewards/rejected": -412.96405029296875, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.3425171375274658, + "rewards/weighted_margins": 1.0147826671600342, + "rewards/weighted_rejected": -2.3572998046875, + "step": 1110 + }, + { + "epoch": 0.5862339701648783, + "grad_norm": 47.25414276123047, + "learning_rate": 4.3847583850294565e-07, + "logits/chosen": -0.9623962640762329, + "logits/rejected": -0.9765838384628296, + "logps/chosen": -671.4148559570312, + "logps/rejected": -715.609375, + "logps/weighted_chosen": -4.4444580078125, + "logps/weighted_rejected": -5.09375, + "loss": 0.5772, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -380.1949157714844, + "rewards/margins": 75.4898452758789, + "rewards/rejected": -455.4906311035156, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.4877197742462158, + "rewards/weighted_margins": 0.929028332233429, + "rewards/weighted_rejected": -2.4179930686950684, + "step": 1120 + }, + { + "epoch": 0.5914682020413504, + "grad_norm": 30.387371063232422, + "learning_rate": 4.294182095423934e-07, + "logits/chosen": -0.939868152141571, + "logits/rejected": -0.9976135492324829, + "logps/chosen": -623.6375122070312, + "logps/rejected": -687.7578125, + "logps/weighted_chosen": -3.7587890625, + "logps/weighted_rejected": -4.962597846984863, + "loss": 0.5553, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -338.00079345703125, + "rewards/margins": 88.7320327758789, + "rewards/rejected": -426.6976623535156, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.33770751953125, + "rewards/weighted_margins": 0.9156738519668579, + "rewards/weighted_rejected": -2.252673387527466, + "step": 1130 + }, + { + "epoch": 0.5967024339178225, + "grad_norm": 15.42784309387207, + "learning_rate": 4.20384154326496e-07, + "logits/chosen": -0.9435394406318665, + "logits/rejected": -0.9906860589981079, + "logps/chosen": -516.5921630859375, + "logps/rejected": -537.7835693359375, + "logps/weighted_chosen": -3.5862059593200684, + "logps/weighted_rejected": -4.702197074890137, + "loss": 0.5867, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -255.84805297851562, + "rewards/margins": 46.25468826293945, + "rewards/rejected": -302.1802673339844, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.0679458379745483, + "rewards/weighted_margins": 0.756103515625, + "rewards/weighted_rejected": -1.824121117591858, + "step": 1140 + }, + { + "epoch": 0.6019366657942947, + "grad_norm": 27.973642349243164, + "learning_rate": 4.1137669015630863e-07, + "logits/chosen": -0.9399688839912415, + "logits/rejected": -0.9874938726425171, + "logps/chosen": -538.5843505859375, + "logps/rejected": -606.5062255859375, + "logps/weighted_chosen": -3.3528809547424316, + "logps/weighted_rejected": -4.451220512390137, + "loss": 0.5538, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -242.4329071044922, + "rewards/margins": 80.2808609008789, + "rewards/rejected": -322.6656188964844, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.035125732421875, + "rewards/weighted_margins": 0.787548840045929, + "rewards/weighted_rejected": -1.822839379310608, + "step": 1150 + }, + { + "epoch": 0.6071708976707668, + "grad_norm": 16.80686378479004, + "learning_rate": 4.023988254516943e-07, + "logits/chosen": -0.9526001214981079, + "logits/rejected": -1.002233862876892, + "logps/chosen": -565.6570434570312, + "logps/rejected": -599.0374755859375, + "logps/weighted_chosen": -3.925537109375, + "logps/weighted_rejected": -4.567919731140137, + "loss": 0.4945, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -264.87188720703125, + "rewards/margins": 67.05058288574219, + "rewards/rejected": -331.87774658203125, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -0.928997814655304, + "rewards/weighted_margins": 0.8846801519393921, + "rewards/weighted_rejected": -1.813256859779358, + "step": 1160 + }, + { + "epoch": 0.6124051295472389, + "grad_norm": 26.579771041870117, + "learning_rate": 3.9345355874653366e-07, + "logits/chosen": -0.964202880859375, + "logits/rejected": -0.984423816204071, + "logps/chosen": -594.9468994140625, + "logps/rejected": -598.2398681640625, + "logps/weighted_chosen": -3.7232666015625, + "logps/weighted_rejected": -4.643334865570068, + "loss": 0.6137, + "rewards/accuracies": 0.578125, + "rewards/chosen": -297.5835876464844, + "rewards/margins": 50.535545349121094, + "rewards/rejected": -348.0869140625, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -1.2086670398712158, + "rewards/weighted_margins": 0.742016613483429, + "rewards/weighted_rejected": -1.9512207508087158, + "step": 1170 + }, + { + "epoch": 0.6176393614237111, + "grad_norm": 30.812177658081055, + "learning_rate": 3.8454387768724157e-07, + "logits/chosen": -1.005767822265625, + "logits/rejected": -1.008856177330017, + "logps/chosen": -506.8109436035156, + "logps/rejected": -517.0929565429688, + "logps/weighted_chosen": -3.8594727516174316, + "logps/weighted_rejected": -4.857763767242432, + "loss": 0.5536, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -233.98828125, + "rewards/margins": 52.480857849121094, + "rewards/rejected": -286.2953186035156, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.0302002429962158, + "rewards/weighted_margins": 0.875408947467804, + "rewards/weighted_rejected": -1.905310034751892, + "step": 1180 + }, + { + "epoch": 0.6228735933001832, + "grad_norm": 21.219039916992188, + "learning_rate": 3.7567275803491525e-07, + "logits/chosen": -1.0049774646759033, + "logits/rejected": -1.033941626548767, + "logps/chosen": -568.109375, + "logps/rejected": -577.7015380859375, + "logps/weighted_chosen": -3.424023389816284, + "logps/weighted_rejected": -4.737890720367432, + "loss": 0.5183, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -265.75665283203125, + "rewards/margins": 58.340232849121094, + "rewards/rejected": -324.1148376464844, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -0.9457031488418579, + "rewards/weighted_margins": 0.9256957769393921, + "rewards/weighted_rejected": -1.871618628501892, + "step": 1190 + }, + { + "epoch": 0.6281078251766553, + "grad_norm": 24.416122436523438, + "learning_rate": 3.66843162671456e-07, + "logits/chosen": -0.992877185344696, + "logits/rejected": -1.0040404796600342, + "logps/chosen": -542.8703002929688, + "logps/rejected": -650.1124877929688, + "logps/weighted_chosen": -4.203027248382568, + "logps/weighted_rejected": -4.642626762390137, + "loss": 0.6415, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -268.76171875, + "rewards/margins": 109.0687484741211, + "rewards/rejected": -377.92889404296875, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -1.240747094154358, + "rewards/weighted_margins": 0.7083190679550171, + "rewards/weighted_rejected": -1.9493834972381592, + "step": 1200 + }, + { + "epoch": 0.6333420570531274, + "grad_norm": 30.961528778076172, + "learning_rate": 3.5805804060998924e-07, + "logits/chosen": -0.988171398639679, + "logits/rejected": -1.0112731456756592, + "logps/chosen": -576.0609130859375, + "logps/rejected": -664.0671997070312, + "logps/weighted_chosen": -3.3628907203674316, + "logps/weighted_rejected": -4.699365139007568, + "loss": 0.5317, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -275.33905029296875, + "rewards/margins": 118.02656555175781, + "rewards/rejected": -393.25311279296875, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -1.125707983970642, + "rewards/weighted_margins": 0.971728503704071, + "rewards/weighted_rejected": -2.0982666015625, + "step": 1210 + }, + { + "epoch": 0.6385762889295996, + "grad_norm": 41.25253677368164, + "learning_rate": 3.493203260099197e-07, + "logits/chosen": -0.99005126953125, + "logits/rejected": -1.052459716796875, + "logps/chosen": -616.2062377929688, + "logps/rejected": -658.71875, + "logps/weighted_chosen": -3.477587938308716, + "logps/weighted_rejected": -4.777050971984863, + "loss": 0.565, + "rewards/accuracies": 0.578125, + "rewards/chosen": -328.61328125, + "rewards/margins": 57.30156326293945, + "rewards/rejected": -385.90899658203125, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.273229956626892, + "rewards/weighted_margins": 0.817840576171875, + "rewards/weighted_rejected": -2.0914306640625, + "step": 1220 + }, + { + "epoch": 0.6438105208060717, + "grad_norm": 44.952823638916016, + "learning_rate": 3.4063293719694407e-07, + "logits/chosen": -0.9899932742118835, + "logits/rejected": -1.038726806640625, + "logps/chosen": -558.2008056640625, + "logps/rejected": -622.3187255859375, + "logps/weighted_chosen": -3.8162598609924316, + "logps/weighted_rejected": -4.878759860992432, + "loss": 0.5962, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -281.5263671875, + "rewards/margins": 83.4378890991211, + "rewards/rejected": -365.0884704589844, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.238305687904358, + "rewards/weighted_margins": 0.806610107421875, + "rewards/weighted_rejected": -2.045300245285034, + "step": 1230 + }, + { + "epoch": 0.6490447526825438, + "grad_norm": 17.675373077392578, + "learning_rate": 3.319987756883559e-07, + "logits/chosen": -1.0293700695037842, + "logits/rejected": -1.05963134765625, + "logps/chosen": -563.71875, + "logps/rejected": -646.96875, + "logps/weighted_chosen": -3.506591796875, + "logps/weighted_rejected": -4.776757717132568, + "loss": 0.5093, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -286.01483154296875, + "rewards/margins": 91.8824234008789, + "rewards/rejected": -377.9115295410156, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -1.059393286705017, + "rewards/weighted_margins": 1.0341796875, + "rewards/weighted_rejected": -2.09356689453125, + "step": 1240 + }, + { + "epoch": 0.654278984559016, + "grad_norm": 30.464399337768555, + "learning_rate": 3.234207252239607e-07, + "logits/chosen": -1.0212494134902954, + "logits/rejected": -1.053070068359375, + "logps/chosen": -624.4281005859375, + "logps/rejected": -653.6124877929688, + "logps/weighted_chosen": -4.078759670257568, + "logps/weighted_rejected": -4.910693168640137, + "loss": 0.5845, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -331.91796875, + "rewards/margins": 68.6617202758789, + "rewards/rejected": -400.6312561035156, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.3435547351837158, + "rewards/weighted_margins": 0.7761596441268921, + "rewards/weighted_rejected": -2.1199707984924316, + "step": 1250 + }, + { + "epoch": 0.6595132164354881, + "grad_norm": 22.67099380493164, + "learning_rate": 3.1490165080293175e-07, + "logits/chosen": -1.032771348953247, + "logits/rejected": -1.0862915515899658, + "logps/chosen": -546.2867431640625, + "logps/rejected": -636.9609375, + "logps/weighted_chosen": -3.594482421875, + "logps/weighted_rejected": -4.584668159484863, + "loss": 0.5515, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -278.97149658203125, + "rewards/margins": 94.80000305175781, + "rewards/rejected": -373.8335876464844, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.1841598749160767, + "rewards/weighted_margins": 0.892077624797821, + "rewards/weighted_rejected": -2.0762572288513184, + "step": 1260 + }, + { + "epoch": 0.6647474483119602, + "grad_norm": 18.550798416137695, + "learning_rate": 3.06444397726922e-07, + "logits/chosen": -1.000738501548767, + "logits/rejected": -1.0697616338729858, + "logps/chosen": -617.4578247070312, + "logps/rejected": -667.5968627929688, + "logps/weighted_chosen": -3.587646484375, + "logps/weighted_rejected": -5.254980564117432, + "loss": 0.5184, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -321.51385498046875, + "rewards/margins": 83.83515930175781, + "rewards/rejected": -405.3265686035156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.1221191883087158, + "rewards/weighted_margins": 1.162841796875, + "rewards/weighted_rejected": -2.2856812477111816, + "step": 1270 + }, + { + "epoch": 0.6699816801884323, + "grad_norm": 22.748411178588867, + "learning_rate": 2.980517906497586e-07, + "logits/chosen": -1.0525604486465454, + "logits/rejected": -1.106359839439392, + "logps/chosen": -608.9031372070312, + "logps/rejected": -702.9249877929688, + "logps/weighted_chosen": -3.8238282203674316, + "logps/weighted_rejected": -5.245898246765137, + "loss": 0.5269, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -316.0335998535156, + "rewards/margins": 114.6685562133789, + "rewards/rejected": -430.79608154296875, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -1.237799048423767, + "rewards/weighted_margins": 0.95733642578125, + "rewards/weighted_rejected": -2.19537353515625, + "step": 1280 + }, + { + "epoch": 0.6752159120649045, + "grad_norm": 25.510196685791016, + "learning_rate": 2.89726632634029e-07, + "logits/chosen": -1.055084228515625, + "logits/rejected": -1.07818603515625, + "logps/chosen": -630.3406372070312, + "logps/rejected": -696.7406005859375, + "logps/weighted_chosen": -3.6622557640075684, + "logps/weighted_rejected": -4.793408393859863, + "loss": 0.5721, + "rewards/accuracies": 0.59375, + "rewards/chosen": -356.0171813964844, + "rewards/margins": 85.5914077758789, + "rewards/rejected": -441.40625, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.368402123451233, + "rewards/weighted_margins": 0.8181518316268921, + "rewards/weighted_rejected": -2.1871337890625, + "step": 1290 + }, + { + "epoch": 0.6804501439413766, + "grad_norm": 3403.25146484375, + "learning_rate": 2.814717042148827e-07, + "logits/chosen": -1.0733153820037842, + "logits/rejected": -1.1039886474609375, + "logps/chosen": -579.2171630859375, + "logps/rejected": -664.2179565429688, + "logps/weighted_chosen": -4.351758003234863, + "logps/weighted_rejected": -5.040478706359863, + "loss": 0.5988, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": -305.47967529296875, + "rewards/margins": 97.9154281616211, + "rewards/rejected": -403.4019470214844, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.3027832508087158, + "rewards/weighted_margins": 0.817980945110321, + "rewards/weighted_rejected": -2.1208739280700684, + "step": 1300 + }, + { + "epoch": 0.6856843758178487, + "grad_norm": 17.00541877746582, + "learning_rate": 2.7328976247135416e-07, + "logits/chosen": -1.098138451576233, + "logits/rejected": -1.1229279041290283, + "logps/chosen": -568.8577880859375, + "logps/rejected": -613.6781005859375, + "logps/weighted_chosen": -3.7420411109924316, + "logps/weighted_rejected": -4.709765434265137, + "loss": 0.6077, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -279.4331970214844, + "rewards/margins": 82.03047180175781, + "rewards/rejected": -361.5140686035156, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.1976807117462158, + "rewards/weighted_margins": 0.7601562738418579, + "rewards/weighted_rejected": -1.9588134288787842, + "step": 1310 + }, + { + "epoch": 0.6909186076943209, + "grad_norm": 19.76185417175293, + "learning_rate": 2.651835401055217e-07, + "logits/chosen": -1.06744384765625, + "logits/rejected": -1.0995299816131592, + "logps/chosen": -551.1812744140625, + "logps/rejected": -621.4281005859375, + "logps/weighted_chosen": -3.578369140625, + "logps/weighted_rejected": -4.4482421875, + "loss": 0.6022, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -273.7007751464844, + "rewards/margins": 82.255859375, + "rewards/rejected": -355.9320373535156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.109167456626892, + "rewards/weighted_margins": 0.736828625202179, + "rewards/weighted_rejected": -1.8462402820587158, + "step": 1320 + }, + { + "epoch": 0.696152839570793, + "grad_norm": 38.951107025146484, + "learning_rate": 2.571557445298055e-07, + "logits/chosen": -1.060308814048767, + "logits/rejected": -1.1077148914337158, + "logps/chosen": -517.7781372070312, + "logps/rejected": -573.0328369140625, + "logps/weighted_chosen": -3.490673780441284, + "logps/weighted_rejected": -4.511181831359863, + "loss": 0.5684, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -239.5070343017578, + "rewards/margins": 69.423828125, + "rewards/rejected": -308.96209716796875, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.9827636480331421, + "rewards/weighted_margins": 0.767047107219696, + "rewards/weighted_rejected": -1.749639868736267, + "step": 1330 + }, + { + "epoch": 0.7013870714472651, + "grad_norm": 24.909101486206055, + "learning_rate": 2.49209056962716e-07, + "logits/chosen": -1.0824463367462158, + "logits/rejected": -1.101318359375, + "logps/chosen": -593.8343505859375, + "logps/rejected": -619.0203247070312, + "logps/weighted_chosen": -3.7747559547424316, + "logps/weighted_rejected": -4.873144626617432, + "loss": 0.5711, + "rewards/accuracies": 0.59375, + "rewards/chosen": -279.1812438964844, + "rewards/margins": 72.595703125, + "rewards/rejected": -351.7855529785156, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -1.0869140625, + "rewards/weighted_margins": 0.7144775390625, + "rewards/weighted_rejected": -1.8008911609649658, + "step": 1340 + }, + { + "epoch": 0.7066213033237373, + "grad_norm": 24.351770401000977, + "learning_rate": 2.41346131533347e-07, + "logits/chosen": -1.13226318359375, + "logits/rejected": -1.141271948814392, + "logps/chosen": -624.3922119140625, + "logps/rejected": -672.2625122070312, + "logps/weighted_chosen": -3.3324952125549316, + "logps/weighted_rejected": -4.617163181304932, + "loss": 0.5572, + "rewards/accuracies": 0.609375, + "rewards/chosen": -317.93731689453125, + "rewards/margins": 79.771484375, + "rewards/rejected": -397.5132751464844, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -1.1239502429962158, + "rewards/weighted_margins": 0.762219250202179, + "rewards/weighted_rejected": -1.88616943359375, + "step": 1350 + }, + { + "epoch": 0.7118555352002094, + "grad_norm": 51.18987274169922, + "learning_rate": 2.3356959439491898e-07, + "logits/chosen": -1.053808569908142, + "logits/rejected": -1.1220916509628296, + "logps/chosen": -584.7609252929688, + "logps/rejected": -641.2109375, + "logps/weighted_chosen": -4.107861518859863, + "logps/weighted_rejected": -4.883447170257568, + "loss": 0.5525, + "rewards/accuracies": 0.609375, + "rewards/chosen": -314.8609313964844, + "rewards/margins": 84.24922180175781, + "rewards/rejected": -399.08319091796875, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -1.143286108970642, + "rewards/weighted_margins": 0.9397217035293579, + "rewards/weighted_rejected": -2.082202196121216, + "step": 1360 + }, + { + "epoch": 0.7170897670766815, + "grad_norm": 25.061872482299805, + "learning_rate": 2.258820428476645e-07, + "logits/chosen": -1.083398461341858, + "logits/rejected": -1.124755859375, + "logps/chosen": -615.3109130859375, + "logps/rejected": -711.2015380859375, + "logps/weighted_chosen": -3.571972608566284, + "logps/weighted_rejected": -4.434179782867432, + "loss": 0.5486, + "rewards/accuracies": 0.625, + "rewards/chosen": -336.1929626464844, + "rewards/margins": 105.92304992675781, + "rewards/rejected": -442.1929626464844, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.143713355064392, + "rewards/weighted_margins": 0.7840820550918579, + "rewards/weighted_rejected": -1.9280884265899658, + "step": 1370 + }, + { + "epoch": 0.7223239989531536, + "grad_norm": 50.347843170166016, + "learning_rate": 2.1828604447135245e-07, + "logits/chosen": -1.015539526939392, + "logits/rejected": -1.0683166980743408, + "logps/chosen": -660.08203125, + "logps/rejected": -700.8531494140625, + "logps/weighted_chosen": -4.080639839172363, + "logps/weighted_rejected": -5.272363185882568, + "loss": 0.5525, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -390.23712158203125, + "rewards/margins": 54.01288986206055, + "rewards/rejected": -444.32110595703125, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.2563965320587158, + "rewards/weighted_margins": 0.91107177734375, + "rewards/weighted_rejected": -2.1673583984375, + "step": 1380 + }, + { + "epoch": 0.7275582308296258, + "grad_norm": 29.76629638671875, + "learning_rate": 2.1078413626773545e-07, + "logits/chosen": -1.0745728015899658, + "logits/rejected": -1.095086693763733, + "logps/chosen": -615.3843994140625, + "logps/rejected": -720.0593872070312, + "logps/weighted_chosen": -3.6241729259490967, + "logps/weighted_rejected": -5.315381050109863, + "loss": 0.5552, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -335.61053466796875, + "rewards/margins": 115.576171875, + "rewards/rejected": -451.1439514160156, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -1.17816162109375, + "rewards/weighted_margins": 0.8593689203262329, + "rewards/weighted_rejected": -2.037463426589966, + "step": 1390 + }, + { + "epoch": 0.7327924627060979, + "grad_norm": 78.16152954101562, + "learning_rate": 2.0337882381321347e-07, + "logits/chosen": -1.062066674232483, + "logits/rejected": -1.0702636241912842, + "logps/chosen": -643.6328125, + "logps/rejected": -690.3914184570312, + "logps/weighted_chosen": -3.64990234375, + "logps/weighted_rejected": -4.711035251617432, + "loss": 0.5461, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -352.107421875, + "rewards/margins": 84.32890319824219, + "rewards/rejected": -436.3828125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -1.304632544517517, + "rewards/weighted_margins": 0.9240967035293579, + "rewards/weighted_rejected": -2.2285399436950684, + "step": 1400 + }, + { + "epoch": 0.73802669458257, + "grad_norm": 30.649791717529297, + "learning_rate": 1.960725804219905e-07, + "logits/chosen": -1.016119360923767, + "logits/rejected": -1.067724585533142, + "logps/chosen": -629.0554809570312, + "logps/rejected": -716.56640625, + "logps/weighted_chosen": -4.213110446929932, + "logps/weighted_rejected": -4.397546291351318, + "loss": 0.5731, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -329.6851501464844, + "rewards/margins": 103.38749694824219, + "rewards/rejected": -433.10235595703125, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.220544457435608, + "rewards/weighted_margins": 0.807751476764679, + "rewards/weighted_rejected": -2.0281982421875, + "step": 1410 + }, + { + "epoch": 0.7432609264590422, + "grad_norm": 22.40865707397461, + "learning_rate": 1.8886784632000824e-07, + "logits/chosen": -1.037255883216858, + "logits/rejected": -1.0631592273712158, + "logps/chosen": -600.8796997070312, + "logps/rejected": -739.5546875, + "logps/weighted_chosen": -3.5133299827575684, + "logps/weighted_rejected": -5.098974704742432, + "loss": 0.5074, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -314.95819091796875, + "rewards/margins": 156.93203735351562, + "rewards/rejected": -471.8910217285156, + "rewards/weighted_accuracies": 0.746874988079071, + "rewards/weighted_chosen": -1.09588623046875, + "rewards/weighted_margins": 1.1151854991912842, + "rewards/weighted_rejected": -2.209277391433716, + "step": 1420 + }, + { + "epoch": 0.7484951583355143, + "grad_norm": 33.5097541809082, + "learning_rate": 1.8176702782993025e-07, + "logits/chosen": -1.0573241710662842, + "logits/rejected": -1.0565185546875, + "logps/chosen": -581.8117065429688, + "logps/rejected": -670.4046630859375, + "logps/weighted_chosen": -3.524365186691284, + "logps/weighted_rejected": -4.905322074890137, + "loss": 0.5604, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -320.2757873535156, + "rewards/margins": 95.93476867675781, + "rewards/rejected": -416.09686279296875, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -1.2260253429412842, + "rewards/weighted_margins": 0.9058593511581421, + "rewards/weighted_rejected": -2.131915330886841, + "step": 1430 + }, + { + "epoch": 0.7537293902119864, + "grad_norm": 25.396400451660156, + "learning_rate": 1.7477249656745034e-07, + "logits/chosen": -0.9870361089706421, + "logits/rejected": -1.025244116783142, + "logps/chosen": -535.6265869140625, + "logps/rejected": -582.4281005859375, + "logps/weighted_chosen": -3.7501220703125, + "logps/weighted_rejected": -5.042870998382568, + "loss": 0.5337, + "rewards/accuracies": 0.609375, + "rewards/chosen": -284.6830139160156, + "rewards/margins": 71.47187805175781, + "rewards/rejected": -356.1968688964844, + "rewards/weighted_accuracies": 0.7562500238418579, + "rewards/weighted_chosen": -1.081658959388733, + "rewards/weighted_margins": 1.011804223060608, + "rewards/weighted_rejected": -2.094250440597534, + "step": 1440 + }, + { + "epoch": 0.7589636220884585, + "grad_norm": 46.104244232177734, + "learning_rate": 1.6788658864919118e-07, + "logits/chosen": -0.998852550983429, + "logits/rejected": -1.0865967273712158, + "logps/chosen": -691.5070190429688, + "logps/rejected": -765.0437622070312, + "logps/weighted_chosen": -3.697497606277466, + "logps/weighted_rejected": -4.519140720367432, + "loss": 0.5339, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -376.79376220703125, + "rewards/margins": 104.693359375, + "rewards/rejected": -481.35467529296875, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -1.15362548828125, + "rewards/weighted_margins": 0.9962402582168579, + "rewards/weighted_rejected": -2.150378465652466, + "step": 1450 + }, + { + "epoch": 0.7641978539649307, + "grad_norm": 29.475303649902344, + "learning_rate": 1.611116039124613e-07, + "logits/chosen": -0.993756115436554, + "logits/rejected": -1.0471680164337158, + "logps/chosen": -612.1336059570312, + "logps/rejected": -652.9812622070312, + "logps/weighted_chosen": -4.120263576507568, + "logps/weighted_rejected": -5.172119140625, + "loss": 0.5626, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -349.39178466796875, + "rewards/margins": 74.2535171508789, + "rewards/rejected": -423.4437561035156, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -1.174108862876892, + "rewards/weighted_margins": 0.957659900188446, + "rewards/weighted_rejected": -2.132617235183716, + "step": 1460 + }, + { + "epoch": 0.7694320858414028, + "grad_norm": 40.777061462402344, + "learning_rate": 1.5444980514712723e-07, + "logits/chosen": -1.0843079090118408, + "logits/rejected": -1.1043212413787842, + "logps/chosen": -677.7750244140625, + "logps/rejected": -797.6781005859375, + "logps/weighted_chosen": -3.82275390625, + "logps/weighted_rejected": -4.651171684265137, + "loss": 0.6086, + "rewards/accuracies": 0.625, + "rewards/chosen": -366.34765625, + "rewards/margins": 125.67655944824219, + "rewards/rejected": -492.0234375, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.312963843345642, + "rewards/weighted_margins": 0.756854236125946, + "rewards/weighted_rejected": -2.069854736328125, + "step": 1470 + }, + { + "epoch": 0.7746663177178749, + "grad_norm": 29.14368438720703, + "learning_rate": 1.4790341733986083e-07, + "logits/chosen": -1.0463683605194092, + "logits/rejected": -1.0748412609100342, + "logps/chosen": -621.4453125, + "logps/rejected": -694.0281372070312, + "logps/weighted_chosen": -4.119336128234863, + "logps/weighted_rejected": -4.574511528015137, + "loss": 0.5669, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -334.2621154785156, + "rewards/margins": 97.23515319824219, + "rewards/rejected": -431.4808654785156, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -1.203649878501892, + "rewards/weighted_margins": 0.847003161907196, + "rewards/weighted_rejected": -2.0507445335388184, + "step": 1480 + }, + { + "epoch": 0.7799005495943471, + "grad_norm": 95.21968841552734, + "learning_rate": 1.4147462693101108e-07, + "logits/chosen": -1.0290710926055908, + "logits/rejected": -1.058990478515625, + "logps/chosen": -640.1702880859375, + "logps/rejected": -748.3312377929688, + "logps/weighted_chosen": -3.6262450218200684, + "logps/weighted_rejected": -4.870263576507568, + "loss": 0.5476, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -353.05682373046875, + "rewards/margins": 124.0137710571289, + "rewards/rejected": -476.9703063964844, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.14178466796875, + "rewards/weighted_margins": 1.012396216392517, + "rewards/weighted_rejected": -2.1542115211486816, + "step": 1490 + }, + { + "epoch": 0.7851347814708192, + "grad_norm": 29.23267936706543, + "learning_rate": 1.3516558108435177e-07, + "logits/chosen": -1.0289306640625, + "logits/rejected": -1.0320098400115967, + "logps/chosen": -576.7921752929688, + "logps/rejected": -693.203125, + "logps/weighted_chosen": -3.387939453125, + "logps/weighted_rejected": -5.066210746765137, + "loss": 0.538, + "rewards/accuracies": 0.609375, + "rewards/chosen": -297.62811279296875, + "rewards/margins": 130.5636749267578, + "rewards/rejected": -428.2289123535156, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -0.9833618402481079, + "rewards/weighted_margins": 0.922503650188446, + "rewards/weighted_rejected": -1.9057738780975342, + "step": 1500 + }, + { + "epoch": 0.7851347814708192, + "eval_logits/chosen": -1.1175518035888672, + "eval_logits/rejected": -1.1384687423706055, + "eval_logps/chosen": -604.7919921875, + "eval_logps/rejected": -676.4500122070312, + "eval_logps/weighted_chosen": -3.6202943325042725, + "eval_logps/weighted_rejected": -4.712391376495361, + "eval_loss": 0.5499775409698486, + "eval_rewards/accuracies": 0.593500018119812, + "eval_rewards/chosen": -316.02337646484375, + "eval_rewards/margins": 92.78912353515625, + "eval_rewards/rejected": -408.82000732421875, + "eval_rewards/weighted_accuracies": 0.7145000100135803, + "eval_rewards/weighted_chosen": -1.0725815296173096, + "eval_rewards/weighted_margins": 0.8416025638580322, + "eval_rewards/weighted_rejected": -1.9141839742660522, + "eval_runtime": 1154.371, + "eval_samples_per_second": 1.733, + "eval_steps_per_second": 0.433, + "step": 1500 + }, + { + "epoch": 0.7903690133472913, + "grad_norm": 33.04483413696289, + "learning_rate": 1.2897838696994505e-07, + "logits/chosen": -1.023168921470642, + "logits/rejected": -1.0470459461212158, + "logps/chosen": -568.0797119140625, + "logps/rejected": -649.578125, + "logps/weighted_chosen": -4.047997951507568, + "logps/weighted_rejected": -4.915575981140137, + "loss": 0.595, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -295.654296875, + "rewards/margins": 91.7796859741211, + "rewards/rejected": -387.3218688964844, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.196380615234375, + "rewards/weighted_margins": 0.7684265375137329, + "rewards/weighted_rejected": -1.9652099609375, + "step": 1510 + }, + { + "epoch": 0.7956032452237635, + "grad_norm": 20.796615600585938, + "learning_rate": 1.229151110603649e-07, + "logits/chosen": -1.0638824701309204, + "logits/rejected": -1.1071808338165283, + "logps/chosen": -613.9664306640625, + "logps/rejected": -678.8499755859375, + "logps/weighted_chosen": -3.6717286109924316, + "logps/weighted_rejected": -4.715087890625, + "loss": 0.6142, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -316.6216735839844, + "rewards/margins": 89.4507827758789, + "rewards/rejected": -406.25701904296875, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -1.19403076171875, + "rewards/weighted_margins": 0.705639660358429, + "rewards/weighted_rejected": -1.89990234375, + "step": 1520 + }, + { + "epoch": 0.8008374771002356, + "grad_norm": 17.729528427124023, + "learning_rate": 1.1697777844051104e-07, + "logits/chosen": -1.0761749744415283, + "logits/rejected": -1.089080810546875, + "logps/chosen": -648.828125, + "logps/rejected": -752.1140747070312, + "logps/weighted_chosen": -3.615478515625, + "logps/weighted_rejected": -4.789502143859863, + "loss": 0.5181, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -345.47882080078125, + "rewards/margins": 124.6578140258789, + "rewards/rejected": -469.9800720214844, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -1.0861327648162842, + "rewards/weighted_margins": 0.8956298828125, + "rewards/weighted_rejected": -1.982019066810608, + "step": 1530 + }, + { + "epoch": 0.8060717089767077, + "grad_norm": 24.399063110351562, + "learning_rate": 1.111683721312477e-07, + "logits/chosen": -1.0483185052871704, + "logits/rejected": -1.0707489252090454, + "logps/chosen": -612.3883056640625, + "logps/rejected": -698.0546875, + "logps/weighted_chosen": -3.4658203125, + "logps/weighted_rejected": -4.220166206359863, + "loss": 0.558, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -336.87677001953125, + "rewards/margins": 101.2386703491211, + "rewards/rejected": -438.15234375, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -1.160925269126892, + "rewards/weighted_margins": 0.888659656047821, + "rewards/weighted_rejected": -2.049755811691284, + "step": 1540 + }, + { + "epoch": 0.8113059408531798, + "grad_norm": 42.796451568603516, + "learning_rate": 1.0548883242709033e-07, + "logits/chosen": -0.978894054889679, + "logits/rejected": -1.035614013671875, + "logps/chosen": -627.6140747070312, + "logps/rejected": -770.4288940429688, + "logps/weighted_chosen": -4.208154201507568, + "logps/weighted_rejected": -5.158984184265137, + "loss": 0.4957, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -343.4136657714844, + "rewards/margins": 157.42578125, + "rewards/rejected": -500.8896484375, + "rewards/weighted_accuracies": 0.762499988079071, + "rewards/weighted_chosen": -1.1079833507537842, + "rewards/weighted_margins": 1.043493628501892, + "rewards/weighted_rejected": -2.151293992996216, + "step": 1550 + }, + { + "epoch": 0.816540172729652, + "grad_norm": 24.997255325317383, + "learning_rate": 9.994105624816379e-08, + "logits/chosen": -1.037078857421875, + "logits/rejected": -1.088891625404358, + "logps/chosen": -700.9812622070312, + "logps/rejected": -796.5750122070312, + "logps/weighted_chosen": -4.061865329742432, + "logps/weighted_rejected": -5.445410251617432, + "loss": 0.5357, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -399.009765625, + "rewards/margins": 114.9976577758789, + "rewards/rejected": -514.0843505859375, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.3728148937225342, + "rewards/weighted_margins": 0.9924713373184204, + "rewards/weighted_rejected": -2.3653807640075684, + "step": 1560 + }, + { + "epoch": 0.821774404606124, + "grad_norm": 36.98191833496094, + "learning_rate": 9.452689650664514e-08, + "logits/chosen": -1.0295531749725342, + "logits/rejected": -1.0981414318084717, + "logps/chosen": -703.7531127929688, + "logps/rejected": -729.4656372070312, + "logps/weighted_chosen": -3.6918702125549316, + "logps/weighted_rejected": -4.560595512390137, + "loss": 0.5276, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -407.9234313964844, + "rewards/margins": 66.05390930175781, + "rewards/rejected": -474.13751220703125, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.312036156654358, + "rewards/weighted_margins": 0.935559093952179, + "rewards/weighted_rejected": -2.2478270530700684, + "step": 1570 + }, + { + "epoch": 0.8270086364825961, + "grad_norm": 25.41311264038086, + "learning_rate": 8.924816148790748e-08, + "logits/chosen": -1.0425536632537842, + "logits/rejected": -1.086340308189392, + "logps/chosen": -719.7468872070312, + "logps/rejected": -814.3968505859375, + "logps/weighted_chosen": -4.014013767242432, + "logps/weighted_rejected": -5.042284965515137, + "loss": 0.5361, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -425.23712158203125, + "rewards/margins": 110.99531555175781, + "rewards/rejected": -536.3671875, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.307092308998108, + "rewards/weighted_margins": 1.052404761314392, + "rewards/weighted_rejected": -2.360107421875, + "step": 1580 + }, + { + "epoch": 0.8322428683590684, + "grad_norm": 20.04231071472168, + "learning_rate": 8.410661424656607e-08, + "logits/chosen": -1.0793273448944092, + "logits/rejected": -1.0862334966659546, + "logps/chosen": -724.1187744140625, + "logps/rejected": -809.2515869140625, + "logps/weighted_chosen": -3.901904344558716, + "logps/weighted_rejected": -5.187939643859863, + "loss": 0.5287, + "rewards/accuracies": 0.578125, + "rewards/chosen": -430.724609375, + "rewards/margins": 118.25859069824219, + "rewards/rejected": -549.1734619140625, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -1.4421265125274658, + "rewards/weighted_margins": 1.137597680091858, + "rewards/weighted_rejected": -2.579272508621216, + "step": 1590 + }, + { + "epoch": 0.8374771002355405, + "grad_norm": 18.9372615814209, + "learning_rate": 7.910397201763308e-08, + "logits/chosen": -1.035308837890625, + "logits/rejected": -1.0541870594024658, + "logps/chosen": -706.3390502929688, + "logps/rejected": -799.5609130859375, + "logps/weighted_chosen": -3.9484620094299316, + "logps/weighted_rejected": -4.694140434265137, + "loss": 0.5996, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -439.6175842285156, + "rewards/margins": 100.1382827758789, + "rewards/rejected": -539.5773315429688, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -1.442968726158142, + "rewards/weighted_margins": 0.917126476764679, + "rewards/weighted_rejected": -2.359942674636841, + "step": 1600 + }, + { + "epoch": 0.8427113321120125, + "grad_norm": 42.782772064208984, + "learning_rate": 7.424190564297489e-08, + "logits/chosen": -1.065649390220642, + "logits/rejected": -1.1031494140625, + "logps/chosen": -725.6617431640625, + "logps/rejected": -829.0968627929688, + "logps/weighted_chosen": -3.7089600563049316, + "logps/weighted_rejected": -4.779443264007568, + "loss": 0.5412, + "rewards/accuracies": 0.609375, + "rewards/chosen": -444.4730529785156, + "rewards/margins": 108.79219055175781, + "rewards/rejected": -553.4078369140625, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -1.403631567955017, + "rewards/weighted_margins": 1.0037841796875, + "rewards/weighted_rejected": -2.4067625999450684, + "step": 1610 + }, + { + "epoch": 0.8479455639884846, + "grad_norm": 22.982988357543945, + "learning_rate": 6.952203901326464e-08, + "logits/chosen": -1.019067406654358, + "logits/rejected": -1.0470367670059204, + "logps/chosen": -708.2687377929688, + "logps/rejected": -808.2546997070312, + "logps/weighted_chosen": -3.988232374191284, + "logps/weighted_rejected": -5.289990425109863, + "loss": 0.5378, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -419.03594970703125, + "rewards/margins": 118.74609375, + "rewards/rejected": -537.8722534179688, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.298193335533142, + "rewards/weighted_margins": 0.996386706829071, + "rewards/weighted_rejected": -2.294604539871216, + "step": 1620 + }, + { + "epoch": 0.8531797958649568, + "grad_norm": 34.882381439208984, + "learning_rate": 6.494594852561558e-08, + "logits/chosen": -1.0084228515625, + "logits/rejected": -1.042303442955017, + "logps/chosen": -723.1671752929688, + "logps/rejected": -805.4453125, + "logps/weighted_chosen": -3.838757276535034, + "logps/weighted_rejected": -5.274987697601318, + "loss": 0.5154, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -432.11346435546875, + "rewards/margins": 102.5433578491211, + "rewards/rejected": -534.5486450195312, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.2553589344024658, + "rewards/weighted_margins": 1.0247802734375, + "rewards/weighted_rejected": -2.280255079269409, + "step": 1630 + }, + { + "epoch": 0.8584140277414289, + "grad_norm": 39.33643341064453, + "learning_rate": 6.051516255707773e-08, + "logits/chosen": -1.0444824695587158, + "logits/rejected": -1.079583764076233, + "logps/chosen": -666.0718994140625, + "logps/rejected": -780.9375, + "logps/weighted_chosen": -3.915283203125, + "logps/weighted_rejected": -5.016845703125, + "loss": 0.5429, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -393.1158142089844, + "rewards/margins": 135.9542999267578, + "rewards/rejected": -528.6539306640625, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -1.3769409656524658, + "rewards/weighted_margins": 1.0125916004180908, + "rewards/weighted_rejected": -2.3889527320861816, + "step": 1640 + }, + { + "epoch": 0.863648259617901, + "grad_norm": 21.693418502807617, + "learning_rate": 5.6231160954171796e-08, + "logits/chosen": -0.9886413812637329, + "logits/rejected": -1.064294457435608, + "logps/chosen": -679.0531005859375, + "logps/rejected": -852.2890625, + "logps/weighted_chosen": -4.170117378234863, + "logps/weighted_rejected": -4.453076362609863, + "loss": 0.5474, + "rewards/accuracies": 0.640625, + "rewards/chosen": -408.7554626464844, + "rewards/margins": 169.57284545898438, + "rewards/rejected": -578.2144775390625, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -1.352716088294983, + "rewards/weighted_margins": 0.973614513874054, + "rewards/weighted_rejected": -2.326489210128784, + "step": 1650 + }, + { + "epoch": 0.8688824914943732, + "grad_norm": 29.99761199951172, + "learning_rate": 5.209537453863289e-08, + "logits/chosen": -1.0624526739120483, + "logits/rejected": -1.073492407798767, + "logps/chosen": -588.7734375, + "logps/rejected": -707.7062377929688, + "logps/weighted_chosen": -3.745361328125, + "logps/weighted_rejected": -4.813672065734863, + "loss": 0.5391, + "rewards/accuracies": 0.609375, + "rewards/chosen": -318.62890625, + "rewards/margins": 139.2451171875, + "rewards/rejected": -457.93359375, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -1.222619652748108, + "rewards/weighted_margins": 1.0465819835662842, + "rewards/weighted_rejected": -2.2699646949768066, + "step": 1660 + }, + { + "epoch": 0.8741167233708453, + "grad_norm": 48.07187271118164, + "learning_rate": 4.8109184629527344e-08, + "logits/chosen": -1.037384033203125, + "logits/rejected": -1.0457366704940796, + "logps/chosen": -638.6046752929688, + "logps/rejected": -779.0484619140625, + "logps/weighted_chosen": -3.819580078125, + "logps/weighted_rejected": -5.112841606140137, + "loss": 0.5366, + "rewards/accuracies": 0.625, + "rewards/chosen": -367.76287841796875, + "rewards/margins": 153.4968719482422, + "rewards/rejected": -521.04296875, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.20782470703125, + "rewards/weighted_margins": 1.06304931640625, + "rewards/weighted_rejected": -2.2711548805236816, + "step": 1670 + }, + { + "epoch": 0.8793509552473174, + "grad_norm": 26.007558822631836, + "learning_rate": 4.427392258190399e-08, + "logits/chosen": -1.080664038658142, + "logits/rejected": -1.092126488685608, + "logps/chosen": -695.2984619140625, + "logps/rejected": -768.0953369140625, + "logps/weighted_chosen": -3.857311964035034, + "logps/weighted_rejected": -5.328369140625, + "loss": 0.5647, + "rewards/accuracies": 0.565625011920929, + "rewards/chosen": -405.20623779296875, + "rewards/margins": 102.0152359008789, + "rewards/rejected": -507.4429626464844, + "rewards/weighted_accuracies": 0.7437499761581421, + "rewards/weighted_chosen": -1.279486060142517, + "rewards/weighted_margins": 0.929272472858429, + "rewards/weighted_rejected": -2.20843505859375, + "step": 1680 + }, + { + "epoch": 0.8845851871237895, + "grad_norm": 27.376941680908203, + "learning_rate": 4.059086934213141e-08, + "logits/chosen": -1.0285918712615967, + "logits/rejected": -1.064416527748108, + "logps/chosen": -683.9781494140625, + "logps/rejected": -730.2593994140625, + "logps/weighted_chosen": -3.7125487327575684, + "logps/weighted_rejected": -4.866064548492432, + "loss": 0.4997, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -392.7562561035156, + "rewards/margins": 87.4644546508789, + "rewards/rejected": -480.1617126464844, + "rewards/weighted_accuracies": 0.753125011920929, + "rewards/weighted_chosen": -1.2972290515899658, + "rewards/weighted_margins": 1.099523901939392, + "rewards/weighted_rejected": -2.3959717750549316, + "step": 1690 + }, + { + "epoch": 0.8898194190002617, + "grad_norm": 23.167646408081055, + "learning_rate": 3.7061255020073346e-08, + "logits/chosen": -1.0998427867889404, + "logits/rejected": -1.1160888671875, + "logps/chosen": -686.8656005859375, + "logps/rejected": -774.8515625, + "logps/weighted_chosen": -3.696582078933716, + "logps/weighted_rejected": -4.601147651672363, + "loss": 0.519, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -400.40350341796875, + "rewards/margins": 112.77461242675781, + "rewards/rejected": -513.212890625, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -1.2873656749725342, + "rewards/weighted_margins": 1.022790551185608, + "rewards/weighted_rejected": -2.310229539871216, + "step": 1700 + }, + { + "epoch": 0.8950536508767338, + "grad_norm": 26.41366195678711, + "learning_rate": 3.3686258478241027e-08, + "logits/chosen": -1.10308837890625, + "logits/rejected": -1.1109344959259033, + "logps/chosen": -737.3968505859375, + "logps/rejected": -790.0671997070312, + "logps/weighted_chosen": -3.860668897628784, + "logps/weighted_rejected": -4.782372951507568, + "loss": 0.583, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -425.9263610839844, + "rewards/margins": 85.51679992675781, + "rewards/rejected": -511.4320373535156, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.4254882335662842, + "rewards/weighted_margins": 0.78106689453125, + "rewards/weighted_rejected": -2.206738233566284, + "step": 1710 + }, + { + "epoch": 0.9002878827532059, + "grad_norm": 25.273611068725586, + "learning_rate": 3.0467006938063366e-08, + "logits/chosen": -1.0660889148712158, + "logits/rejected": -1.098059058189392, + "logps/chosen": -671.5203247070312, + "logps/rejected": -752.9375, + "logps/weighted_chosen": -3.584338426589966, + "logps/weighted_rejected": -4.9091796875, + "loss": 0.5652, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -398.796875, + "rewards/margins": 94.3648452758789, + "rewards/rejected": -493.16796875, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -1.3784363269805908, + "rewards/weighted_margins": 1.0235106945037842, + "rewards/weighted_rejected": -2.401867628097534, + "step": 1720 + }, + { + "epoch": 0.9055221146296781, + "grad_norm": 41.00505447387695, + "learning_rate": 2.7404575603403646e-08, + "logits/chosen": -1.0376098155975342, + "logits/rejected": -1.067657470703125, + "logps/chosen": -645.05859375, + "logps/rejected": -789.4781494140625, + "logps/weighted_chosen": -4.015649318695068, + "logps/weighted_rejected": -5.325341701507568, + "loss": 0.4583, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -370.9443359375, + "rewards/margins": 149.03262329101562, + "rewards/rejected": -520.0062255859375, + "rewards/weighted_accuracies": 0.7562500238418579, + "rewards/weighted_chosen": -1.2229797840118408, + "rewards/weighted_margins": 1.238745093345642, + "rewards/weighted_rejected": -2.4620728492736816, + "step": 1730 + }, + { + "epoch": 0.9107563465061502, + "grad_norm": 28.638446807861328, + "learning_rate": 2.4499987301450698e-08, + "logits/chosen": -1.068115234375, + "logits/rejected": -1.125952124595642, + "logps/chosen": -704.9406127929688, + "logps/rejected": -853.0062255859375, + "logps/weighted_chosen": -3.899169921875, + "logps/weighted_rejected": -5.107470512390137, + "loss": 0.4743, + "rewards/accuracies": 0.640625, + "rewards/chosen": -385.1449279785156, + "rewards/margins": 183.0597686767578, + "rewards/rejected": -568.173828125, + "rewards/weighted_accuracies": 0.768750011920929, + "rewards/weighted_chosen": -1.2654540538787842, + "rewards/weighted_margins": 1.137670874595642, + "rewards/weighted_rejected": -2.402661085128784, + "step": 1740 + }, + { + "epoch": 0.9159905783826223, + "grad_norm": 32.34984588623047, + "learning_rate": 2.1754212141102347e-08, + "logits/chosen": -1.071801781654358, + "logits/rejected": -1.0734984874725342, + "logps/chosen": -663.3663940429688, + "logps/rejected": -792.84375, + "logps/weighted_chosen": -4.340795993804932, + "logps/weighted_rejected": -5.438916206359863, + "loss": 0.4879, + "rewards/accuracies": 0.640625, + "rewards/chosen": -384.6597595214844, + "rewards/margins": 138.44686889648438, + "rewards/rejected": -523.0758056640625, + "rewards/weighted_accuracies": 0.762499988079071, + "rewards/weighted_chosen": -1.283929467201233, + "rewards/weighted_margins": 1.1354491710662842, + "rewards/weighted_rejected": -2.4184937477111816, + "step": 1750 + }, + { + "epoch": 0.9212248102590945, + "grad_norm": 28.857627868652344, + "learning_rate": 1.9168167188957586e-08, + "logits/chosen": -1.012640357017517, + "logits/rejected": -1.081701636314392, + "logps/chosen": -658.5015869140625, + "logps/rejected": -722.2390747070312, + "logps/weighted_chosen": -3.9429688453674316, + "logps/weighted_rejected": -4.90234375, + "loss": 0.5441, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -377.6923828125, + "rewards/margins": 97.7406234741211, + "rewards/rejected": -475.71484375, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -1.31634521484375, + "rewards/weighted_margins": 1.0785338878631592, + "rewards/weighted_rejected": -2.396069288253784, + "step": 1760 + }, + { + "epoch": 0.9264590421355666, + "grad_norm": 39.25354766845703, + "learning_rate": 1.6742716163022865e-08, + "logits/chosen": -1.085870385169983, + "logits/rejected": -1.09857177734375, + "logps/chosen": -688.9390869140625, + "logps/rejected": -837.5187377929688, + "logps/weighted_chosen": -3.732714891433716, + "logps/weighted_rejected": -5.170654296875, + "loss": 0.4973, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -392.45037841796875, + "rewards/margins": 168.8874969482422, + "rewards/rejected": -561.3019409179688, + "rewards/weighted_accuracies": 0.784375011920929, + "rewards/weighted_chosen": -1.3793151378631592, + "rewards/weighted_margins": 1.1719238758087158, + "rewards/weighted_rejected": -2.551684617996216, + "step": 1770 + }, + { + "epoch": 0.9316932740120387, + "grad_norm": 28.262041091918945, + "learning_rate": 1.4478669144238343e-08, + "logits/chosen": -1.0385589599609375, + "logits/rejected": -1.031134009361267, + "logps/chosen": -629.2132568359375, + "logps/rejected": -778.0023193359375, + "logps/weighted_chosen": -3.922070264816284, + "logps/weighted_rejected": -5.3369140625, + "loss": 0.5496, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -366.00799560546875, + "rewards/margins": 156.92031860351562, + "rewards/rejected": -522.5992431640625, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -1.3271605968475342, + "rewards/weighted_margins": 1.061981201171875, + "rewards/weighted_rejected": -2.388622999191284, + "step": 1780 + }, + { + "epoch": 0.9369275058885108, + "grad_norm": 49.04172897338867, + "learning_rate": 1.23767823059166e-08, + "logits/chosen": -1.039398193359375, + "logits/rejected": -1.055413842201233, + "logps/chosen": -701.0413818359375, + "logps/rejected": -830.3093872070312, + "logps/weighted_chosen": -3.6467041969299316, + "logps/weighted_rejected": -4.895654201507568, + "loss": 0.5462, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -409.703125, + "rewards/margins": 139.953125, + "rewards/rejected": -549.7257690429688, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -1.362951636314392, + "rewards/weighted_margins": 1.0340454578399658, + "rewards/weighted_rejected": -2.3976073265075684, + "step": 1790 + }, + { + "epoch": 0.942161737764983, + "grad_norm": 22.44223976135254, + "learning_rate": 1.0437757661187486e-08, + "logits/chosen": -1.063806176185608, + "logits/rejected": -1.0848114490509033, + "logps/chosen": -749.4093627929688, + "logps/rejected": -854.6468505859375, + "logps/weighted_chosen": -4.386376857757568, + "logps/weighted_rejected": -5.128662109375, + "loss": 0.5093, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -433.10076904296875, + "rewards/margins": 140.12149047851562, + "rewards/rejected": -573.2366943359375, + "rewards/weighted_accuracies": 0.731249988079071, + "rewards/weighted_chosen": -1.32525634765625, + "rewards/weighted_margins": 1.0437133312225342, + "rewards/weighted_rejected": -2.3693480491638184, + "step": 1800 + }, + { + "epoch": 0.9473959696414551, + "grad_norm": 50.09382247924805, + "learning_rate": 8.662242828530953e-09, + "logits/chosen": -1.0404754877090454, + "logits/rejected": -1.0818588733673096, + "logps/chosen": -646.4109497070312, + "logps/rejected": -787.7718505859375, + "logps/weighted_chosen": -4.325634956359863, + "logps/weighted_rejected": -5.2548828125, + "loss": 0.5664, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -384.98907470703125, + "rewards/margins": 149.36874389648438, + "rewards/rejected": -534.6765747070312, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -1.4359314441680908, + "rewards/weighted_margins": 0.99676513671875, + "rewards/weighted_rejected": -2.4330811500549316, + "step": 1810 + }, + { + "epoch": 0.9526302015179272, + "grad_norm": 31.461109161376953, + "learning_rate": 7.050830815478082e-09, + "logits/chosen": -1.0684020519256592, + "logits/rejected": -1.0823822021484375, + "logps/chosen": -642.1265869140625, + "logps/rejected": -759.2999877929688, + "logps/weighted_chosen": -4.116991996765137, + "logps/weighted_rejected": -5.237597465515137, + "loss": 0.5132, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -381.7503967285156, + "rewards/margins": 132.55233764648438, + "rewards/rejected": -514.3597412109375, + "rewards/weighted_accuracies": 0.731249988079071, + "rewards/weighted_chosen": -1.338903784751892, + "rewards/weighted_margins": 1.0992553234100342, + "rewards/weighted_rejected": -2.4381957054138184, + "step": 1820 + }, + { + "epoch": 0.9578644333943994, + "grad_norm": 26.419347763061523, + "learning_rate": 5.604059820551177e-09, + "logits/chosen": -1.067419409751892, + "logits/rejected": -1.0749084949493408, + "logps/chosen": -693.1859130859375, + "logps/rejected": -796.1453247070312, + "logps/weighted_chosen": -4.111572265625, + "logps/weighted_rejected": -5.332861423492432, + "loss": 0.5009, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -394.80682373046875, + "rewards/margins": 140.7951202392578, + "rewards/rejected": -535.8195190429688, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -1.3236510753631592, + "rewards/weighted_margins": 1.1140258312225342, + "rewards/weighted_rejected": -2.43792724609375, + "step": 1830 + }, + { + "epoch": 0.9630986652708715, + "grad_norm": 53.944435119628906, + "learning_rate": 4.322413053509943e-09, + "logits/chosen": -1.050134301185608, + "logits/rejected": -1.075842261314392, + "logps/chosen": -727.1593627929688, + "logps/rejected": -827.8078002929688, + "logps/weighted_chosen": -3.7392578125, + "logps/weighted_rejected": -5.269579887390137, + "loss": 0.5188, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -425.8460998535156, + "rewards/margins": 129.8203125, + "rewards/rejected": -555.5859375, + "rewards/weighted_accuracies": 0.746874988079071, + "rewards/weighted_chosen": -1.332269310951233, + "rewards/weighted_margins": 1.074914574623108, + "rewards/weighted_rejected": -2.407489061355591, + "step": 1840 + }, + { + "epoch": 0.9683328971473436, + "grad_norm": 25.737455368041992, + "learning_rate": 3.206318573963418e-09, + "logits/chosen": -1.0623962879180908, + "logits/rejected": -1.0946044921875, + "logps/chosen": -675.3171997070312, + "logps/rejected": -770.2156372070312, + "logps/weighted_chosen": -4.039794921875, + "logps/weighted_rejected": -5.077490329742432, + "loss": 0.5267, + "rewards/accuracies": 0.59375, + "rewards/chosen": -389.2749938964844, + "rewards/margins": 121.53242492675781, + "rewards/rejected": -510.8343811035156, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -1.363183617591858, + "rewards/weighted_margins": 1.0187866687774658, + "rewards/weighted_rejected": -2.381420850753784, + "step": 1850 + }, + { + "epoch": 0.9735671290238157, + "grad_norm": 21.196102142333984, + "learning_rate": 2.256149148401387e-09, + "logits/chosen": -1.0624481439590454, + "logits/rejected": -1.064080834388733, + "logps/chosen": -666.1961059570312, + "logps/rejected": -834.4312744140625, + "logps/weighted_chosen": -4.077294826507568, + "logps/weighted_rejected": -4.933495998382568, + "loss": 0.5029, + "rewards/accuracies": 0.640625, + "rewards/chosen": -393.24139404296875, + "rewards/margins": 176.5304718017578, + "rewards/rejected": -569.5242309570312, + "rewards/weighted_accuracies": 0.7593749761581421, + "rewards/weighted_chosen": -1.389379858970642, + "rewards/weighted_margins": 1.0392944812774658, + "rewards/weighted_rejected": -2.4274535179138184, + "step": 1860 + }, + { + "epoch": 0.9788013609002879, + "grad_norm": 28.988407135009766, + "learning_rate": 1.4722221256933676e-09, + "logits/chosen": -1.091333031654358, + "logits/rejected": -1.099829077720642, + "logps/chosen": -689.6468505859375, + "logps/rejected": -761.02734375, + "logps/weighted_chosen": -3.801464796066284, + "logps/weighted_rejected": -5.50390625, + "loss": 0.5502, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -418.20428466796875, + "rewards/margins": 90.15116882324219, + "rewards/rejected": -508.333984375, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -1.4282042980194092, + "rewards/weighted_margins": 0.9919677972793579, + "rewards/weighted_rejected": -2.419872999191284, + "step": 1870 + }, + { + "epoch": 0.98403559277676, + "grad_norm": 31.547182083129883, + "learning_rate": 8.547993310970003e-10, + "logits/chosen": -1.0585174560546875, + "logits/rejected": -1.088720679283142, + "logps/chosen": -652.5765380859375, + "logps/rejected": -762.1343994140625, + "logps/weighted_chosen": -4.279052734375, + "logps/weighted_rejected": -5.315331935882568, + "loss": 0.5933, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -371.46563720703125, + "rewards/margins": 122.81367492675781, + "rewards/rejected": -494.56329345703125, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -1.31964111328125, + "rewards/weighted_margins": 0.9644409418106079, + "rewards/weighted_rejected": -2.283703565597534, + "step": 1880 + }, + { + "epoch": 0.9892698246532321, + "grad_norm": 33.57475662231445, + "learning_rate": 4.040869788100032e-10, + "logits/chosen": -1.0377686023712158, + "logits/rejected": -1.065820336341858, + "logps/chosen": -664.56640625, + "logps/rejected": -729.34375, + "logps/weighted_chosen": -3.905956983566284, + "logps/weighted_rejected": -4.855029106140137, + "loss": 0.5181, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -385.83087158203125, + "rewards/margins": 97.8460922241211, + "rewards/rejected": -483.4437561035156, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -1.3222167491912842, + "rewards/weighted_margins": 1.018835425376892, + "rewards/weighted_rejected": -2.341479539871216, + "step": 1890 + }, + { + "epoch": 0.9945040565297043, + "grad_norm": 23.152149200439453, + "learning_rate": 1.202356030968743e-10, + "logits/chosen": -1.079199194908142, + "logits/rejected": -1.0971558094024658, + "logps/chosen": -726.5609130859375, + "logps/rejected": -814.0968627929688, + "logps/weighted_chosen": -3.6655516624450684, + "logps/weighted_rejected": -4.904101371765137, + "loss": 0.5332, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -419.51837158203125, + "rewards/margins": 128.578125, + "rewards/rejected": -548.2413940429688, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -1.3955078125, + "rewards/weighted_margins": 1.1153442859649658, + "rewards/weighted_rejected": -2.50994873046875, + "step": 1900 + }, + { + "epoch": 0.9997382884061764, + "grad_norm": 23.48067283630371, + "learning_rate": 3.3400080112211405e-12, + "logits/chosen": -1.0469787120819092, + "logits/rejected": -1.074462890625, + "logps/chosen": -742.5452880859375, + "logps/rejected": -856.6265869140625, + "logps/weighted_chosen": -3.984057664871216, + "logps/weighted_rejected": -5.161230564117432, + "loss": 0.4937, + "rewards/accuracies": 0.640625, + "rewards/chosen": -442.21466064453125, + "rewards/margins": 135.20547485351562, + "rewards/rejected": -577.2562255859375, + "rewards/weighted_accuracies": 0.765625, + "rewards/weighted_chosen": -1.3606140613555908, + "rewards/weighted_margins": 1.040490746498108, + "rewards/weighted_rejected": -2.401928663253784, + "step": 1910 + } + ], + "logging_steps": 10, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1911/training_args.bin b/checkpoint-1911/training_args.bin new file mode 100644 index 0000000..f626c5c --- /dev/null +++ b/checkpoint-1911/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe50c447c403d41daaff89efed0e09938eb1d87a3da0072f9c7935887bef87f +size 8721 diff --git a/checkpoint-1911/zero_to_fp32.py b/checkpoint-1911/zero_to_fp32.py new file mode 100644 index 0000000..0e75914 --- /dev/null +++ b/checkpoint-1911/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-500/chat_template.jinja b/checkpoint-500/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-500/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-500/generation_config.json b/checkpoint-500/generation_config.json new file mode 100644 index 0000000..fc3c54a --- /dev/null +++ b/checkpoint-500/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-500/latest b/checkpoint-500/latest new file mode 100644 index 0000000..f0b47ce --- /dev/null +++ b/checkpoint-500/latest @@ -0,0 +1 @@ +global_step500 \ No newline at end of file diff --git a/checkpoint-500/model-00001-of-00004.safetensors b/checkpoint-500/model-00001-of-00004.safetensors new file mode 100644 index 0000000..cffbdca --- /dev/null +++ b/checkpoint-500/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91a01525b444d03ce57fdca64d3d28a6f2c8e030544d1bcb066abc59b132764 +size 4976698672 diff --git a/checkpoint-500/model-00002-of-00004.safetensors b/checkpoint-500/model-00002-of-00004.safetensors new file mode 100644 index 0000000..1c5a7ff --- /dev/null +++ b/checkpoint-500/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c8b4b646c621408e699300051c9cf3ce7ae7a3b007fe629c6a9433ee16018aa +size 4999802720 diff --git a/checkpoint-500/model-00003-of-00004.safetensors b/checkpoint-500/model-00003-of-00004.safetensors new file mode 100644 index 0000000..0529ef4 --- /dev/null +++ b/checkpoint-500/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b60d583226972242d56b9c03e0b6e2214a00ac28056a08800a6bead96b462c +size 4915916176 diff --git a/checkpoint-500/model-00004-of-00004.safetensors b/checkpoint-500/model-00004-of-00004.safetensors new file mode 100644 index 0000000..59796a0 --- /dev/null +++ b/checkpoint-500/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb7851e7abb189938ac4a45b7c93ab05718f3bcc6d42f94473952febc39dc20 +size 1168138808 diff --git a/checkpoint-500/model.safetensors.index.json b/checkpoint-500/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-500/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth new file mode 100644 index 0000000..6ae1c3a --- /dev/null +++ b/checkpoint-500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fe1a45c0111b18df213058c73c3a4e717295b975e92faf7b2e048e6504b3f3 +size 14917 diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth new file mode 100644 index 0000000..58bf4e2 --- /dev/null +++ b/checkpoint-500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf26da988f2f17ca0d366aece1dfdb5c3bcab91066168b7062b361b8c3ac2d6 +size 14917 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000..a2af8e9 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f8728b8c285bed1ca96ea99a5e658a8a9c58f9dd1ce1805f1213195612503b +size 1465 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000..eeffc45 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,1127 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.26171159382360637, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005234231876472127, + "grad_norm": 132.6717987060547, + "learning_rate": 0.0, + "logits/chosen": -0.40118408203125, + "logits/rejected": -0.41802978515625, + "logps/chosen": -297.609375, + "logps/rejected": -247.84375, + "logps/weighted_chosen": -4.7568359375, + "logps/weighted_rejected": -3.47998046875, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005234231876472127, + "grad_norm": 226.00839233398438, + "learning_rate": 4.6875e-08, + "logits/chosen": -0.3175845742225647, + "logits/rejected": -0.3532341718673706, + "logps/chosen": -275.5841979980469, + "logps/rejected": -255.84548950195312, + "logps/weighted_chosen": -2.651665687561035, + "logps/weighted_rejected": -2.88427734375, + "loss": 0.6921, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0731336772441864, + "rewards/margins": -0.0670572891831398, + "rewards/rejected": -0.006076388992369175, + "rewards/weighted_accuracies": 0.2951388955116272, + "rewards/weighted_chosen": -0.0015835232334211469, + "rewards/weighted_margins": -0.0009324815473519266, + "rewards/weighted_rejected": -0.0006510416860692203, + "step": 10 + }, + { + "epoch": 0.010468463752944255, + "grad_norm": 14.726158142089844, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -0.29781341552734375, + "logits/rejected": -0.3136836886405945, + "logps/chosen": -294.4296875, + "logps/rejected": -272.58984375, + "logps/weighted_chosen": -2.458728075027466, + "logps/weighted_rejected": -2.455883741378784, + "loss": 0.6924, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.14013671875, + "rewards/margins": -0.13369140028953552, + "rewards/rejected": -0.0064453124068677425, + "rewards/weighted_accuracies": 0.34062498807907104, + "rewards/weighted_chosen": -0.0013603210682049394, + "rewards/weighted_margins": -0.0010925292735919356, + "rewards/weighted_rejected": -0.0002677917364053428, + "step": 20 + }, + { + "epoch": 0.015702695629416383, + "grad_norm": 77.6125259399414, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -0.2918853759765625, + "logits/rejected": -0.3377639651298523, + "logps/chosen": -298.05859375, + "logps/rejected": -268.0132751464844, + "logps/weighted_chosen": -2.4350829124450684, + "logps/weighted_rejected": -2.7343993186950684, + "loss": 0.6926, + "rewards/accuracies": 0.24062499403953552, + "rewards/chosen": -0.06621094048023224, + "rewards/margins": -0.1640625, + "rewards/rejected": 0.09785155951976776, + "rewards/weighted_accuracies": 0.3187499940395355, + "rewards/weighted_chosen": 0.0014068603049963713, + "rewards/weighted_margins": -0.0015777588123455644, + "rewards/weighted_rejected": 0.0029846192337572575, + "step": 30 + }, + { + "epoch": 0.02093692750588851, + "grad_norm": 30.666196823120117, + "learning_rate": 2.03125e-07, + "logits/chosen": -0.30072021484375, + "logits/rejected": -0.3433845639228821, + "logps/chosen": -278.68829345703125, + "logps/rejected": -253.90780639648438, + "logps/weighted_chosen": -2.506396532058716, + "logps/weighted_rejected": -2.8416504859924316, + "loss": 0.6908, + "rewards/accuracies": 0.3062500059604645, + "rewards/chosen": 0.063232421875, + "rewards/margins": 0.04838867112994194, + "rewards/rejected": 0.014843749813735485, + "rewards/weighted_accuracies": 0.40312498807907104, + "rewards/weighted_chosen": 0.0042968750931322575, + "rewards/weighted_margins": 0.0019538879860192537, + "rewards/weighted_rejected": 0.0023429871071130037, + "step": 40 + }, + { + "epoch": 0.02617115938236064, + "grad_norm": 18.60569953918457, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -0.2819870114326477, + "logits/rejected": -0.32059136033058167, + "logps/chosen": -280.31951904296875, + "logps/rejected": -267.4359436035156, + "logps/weighted_chosen": -2.4267334938049316, + "logps/weighted_rejected": -2.529711961746216, + "loss": 0.6891, + "rewards/accuracies": 0.3187499940395355, + "rewards/chosen": -0.03535156324505806, + "rewards/margins": -0.13984374701976776, + "rewards/rejected": 0.1044921875, + "rewards/weighted_accuracies": 0.3968749940395355, + "rewards/weighted_chosen": 0.0039031982887536287, + "rewards/weighted_margins": 0.005755615420639515, + "rewards/weighted_rejected": -0.0018524170154705644, + "step": 50 + }, + { + "epoch": 0.031405391258832765, + "grad_norm": 38.21036911010742, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -0.31453245878219604, + "logits/rejected": -0.30809077620506287, + "logps/chosen": -277.66015625, + "logps/rejected": -261.7445373535156, + "logps/weighted_chosen": -2.8622069358825684, + "logps/weighted_rejected": -2.7553467750549316, + "loss": 0.6894, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": 0.04150390625, + "rewards/margins": 0.08027343451976776, + "rewards/rejected": -0.03876953199505806, + "rewards/weighted_accuracies": 0.4312500059604645, + "rewards/weighted_chosen": 0.0006561279296875, + "rewards/weighted_margins": 0.006243896670639515, + "rewards/weighted_rejected": -0.005587768740952015, + "step": 60 + }, + { + "epoch": 0.036639623135304895, + "grad_norm": 69.19047546386719, + "learning_rate": 3.59375e-07, + "logits/chosen": -0.3177490234375, + "logits/rejected": -0.3246749937534332, + "logps/chosen": -289.76251220703125, + "logps/rejected": -244.92578125, + "logps/weighted_chosen": -2.3438963890075684, + "logps/weighted_rejected": -2.7010498046875, + "loss": 0.6841, + "rewards/accuracies": 0.49687498807907104, + "rewards/chosen": 0.29765623807907104, + "rewards/margins": 0.4546875059604645, + "rewards/rejected": -0.15703125298023224, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": 0.01530532818287611, + "rewards/weighted_margins": 0.01918792724609375, + "rewards/weighted_rejected": -0.0038825988303869963, + "step": 70 + }, + { + "epoch": 0.04187385501177702, + "grad_norm": 51.98476791381836, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -0.2850998044013977, + "logits/rejected": -0.30662041902542114, + "logps/chosen": -289.234375, + "logps/rejected": -270.375, + "logps/weighted_chosen": -2.5325684547424316, + "logps/weighted_rejected": -2.796435594558716, + "loss": 0.6747, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.512499988079071, + "rewards/margins": 0.6001952886581421, + "rewards/rejected": -0.08769531548023224, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.036380767822265625, + "rewards/weighted_margins": 0.04396667331457138, + "rewards/weighted_rejected": -0.007586670108139515, + "step": 80 + }, + { + "epoch": 0.04710808688824915, + "grad_norm": 30.52783203125, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -0.3142959475517273, + "logits/rejected": -0.3075408935546875, + "logps/chosen": -280.11407470703125, + "logps/rejected": -257.95233154296875, + "logps/weighted_chosen": -2.719482421875, + "logps/weighted_rejected": -2.88037109375, + "loss": 0.6687, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.5205078125, + "rewards/margins": 0.737109363079071, + "rewards/rejected": -0.21660156548023224, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": 0.06780395656824112, + "rewards/weighted_margins": 0.07340697944164276, + "rewards/weighted_rejected": -0.0056396485306322575, + "step": 90 + }, + { + "epoch": 0.05234231876472128, + "grad_norm": 69.397705078125, + "learning_rate": 5.156249999999999e-07, + "logits/chosen": -0.28213196992874146, + "logits/rejected": -0.3543289303779602, + "logps/chosen": -290.71875, + "logps/rejected": -286.73126220703125, + "logps/weighted_chosen": -2.2228636741638184, + "logps/weighted_rejected": -2.8367552757263184, + "loss": 0.6848, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": 0.24521484971046448, + "rewards/margins": 1.0690429210662842, + "rewards/rejected": -0.8238281011581421, + "rewards/weighted_accuracies": 0.5843750238418579, + "rewards/weighted_chosen": 0.05242309719324112, + "rewards/weighted_margins": 0.05032653734087944, + "rewards/weighted_rejected": 0.0021240233909338713, + "step": 100 + }, + { + "epoch": 0.05757655064119341, + "grad_norm": 36.600040435791016, + "learning_rate": 5.677083333333333e-07, + "logits/chosen": -0.33063429594039917, + "logits/rejected": -0.319937139749527, + "logps/chosen": -296.82501220703125, + "logps/rejected": -262.2984313964844, + "logps/weighted_chosen": -2.8468017578125, + "logps/weighted_rejected": -2.9306397438049316, + "loss": 0.6773, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.474609375, + "rewards/margins": 1.053613305091858, + "rewards/rejected": -1.528222680091858, + "rewards/weighted_accuracies": 0.534375011920929, + "rewards/weighted_chosen": 0.013439941219985485, + "rewards/weighted_margins": 0.05541381984949112, + "rewards/weighted_rejected": -0.04198913648724556, + "step": 110 + }, + { + "epoch": 0.06281078251766553, + "grad_norm": 57.109580993652344, + "learning_rate": 6.197916666666666e-07, + "logits/chosen": -0.33633461594581604, + "logits/rejected": -0.36155110597610474, + "logps/chosen": -295.3687438964844, + "logps/rejected": -256.1953125, + "logps/weighted_chosen": -2.161865234375, + "logps/weighted_rejected": -2.4251465797424316, + "loss": 0.6791, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": -0.72900390625, + "rewards/margins": 1.641210913658142, + "rewards/rejected": -2.3702149391174316, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": 0.007176590152084827, + "rewards/weighted_margins": 0.05286560207605362, + "rewards/weighted_rejected": -0.04570160061120987, + "step": 120 + }, + { + "epoch": 0.06804501439413765, + "grad_norm": 39.176841735839844, + "learning_rate": 6.718749999999999e-07, + "logits/chosen": -0.29625242948532104, + "logits/rejected": -0.2914108335971832, + "logps/chosen": -306.6781311035156, + "logps/rejected": -280.15936279296875, + "logps/weighted_chosen": -2.188079833984375, + "logps/weighted_rejected": -2.5787596702575684, + "loss": 0.6659, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.644238293170929, + "rewards/margins": 1.972265601158142, + "rewards/rejected": -2.616406202316284, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": 0.01349639892578125, + "rewards/weighted_margins": 0.0841522216796875, + "rewards/weighted_rejected": -0.07064209133386612, + "step": 130 + }, + { + "epoch": 0.07327924627060979, + "grad_norm": 52.14993667602539, + "learning_rate": 7.239583333333333e-07, + "logits/chosen": -0.3304199278354645, + "logits/rejected": -0.3464847505092621, + "logps/chosen": -301.4390563964844, + "logps/rejected": -277.9515686035156, + "logps/weighted_chosen": -2.554003953933716, + "logps/weighted_rejected": -2.881591796875, + "loss": 0.6581, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -2.746875047683716, + "rewards/margins": 2.744921922683716, + "rewards/rejected": -5.491991996765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.02762756310403347, + "rewards/weighted_margins": 0.11510010063648224, + "rewards/weighted_rejected": -0.14276733994483948, + "step": 140 + }, + { + "epoch": 0.07851347814708191, + "grad_norm": 22.611814498901367, + "learning_rate": 7.760416666666666e-07, + "logits/chosen": -0.2870376706123352, + "logits/rejected": -0.2975311279296875, + "logps/chosen": -287.859375, + "logps/rejected": -257.54296875, + "logps/weighted_chosen": -3.089892625808716, + "logps/weighted_rejected": -3.1946043968200684, + "loss": 0.6544, + "rewards/accuracies": 0.625, + "rewards/chosen": -3.7095704078674316, + "rewards/margins": 2.942578077316284, + "rewards/rejected": -6.652148246765137, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.005145263858139515, + "rewards/weighted_margins": 0.16416625678539276, + "rewards/weighted_rejected": -0.16951599717140198, + "step": 150 + }, + { + "epoch": 0.08374771002355404, + "grad_norm": 15.511767387390137, + "learning_rate": 8.28125e-07, + "logits/chosen": -0.3232177793979645, + "logits/rejected": -0.3726806640625, + "logps/chosen": -308.91796875, + "logps/rejected": -282.15704345703125, + "logps/weighted_chosen": -2.5903563499450684, + "logps/weighted_rejected": -2.742602586746216, + "loss": 0.6211, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -4.956835746765137, + "rewards/margins": 3.9961915016174316, + "rewards/rejected": -8.953222274780273, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.002410888671875, + "rewards/weighted_margins": 0.23797607421875, + "rewards/weighted_rejected": -0.24028320610523224, + "step": 160 + }, + { + "epoch": 0.08898194190002617, + "grad_norm": 167.33956909179688, + "learning_rate": 8.802083333333333e-07, + "logits/chosen": -0.36021536588668823, + "logits/rejected": -0.3597045838832855, + "logps/chosen": -311.03045654296875, + "logps/rejected": -270.46875, + "logps/weighted_chosen": -2.8318848609924316, + "logps/weighted_rejected": -3.139453172683716, + "loss": 0.6949, + "rewards/accuracies": 0.609375, + "rewards/chosen": -8.0087890625, + "rewards/margins": 4.345898628234863, + "rewards/rejected": -12.354199409484863, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.01859130896627903, + "rewards/weighted_margins": 0.20853272080421448, + "rewards/weighted_rejected": -0.22731323540210724, + "step": 170 + }, + { + "epoch": 0.0942161737764983, + "grad_norm": 64.57138061523438, + "learning_rate": 9.322916666666666e-07, + "logits/chosen": -0.33618468046188354, + "logits/rejected": -0.3534431457519531, + "logps/chosen": -284.2171936035156, + "logps/rejected": -272.12969970703125, + "logps/weighted_chosen": -2.694580078125, + "logps/weighted_rejected": -3.225878953933716, + "loss": 0.6814, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -10.43701171875, + "rewards/margins": 5.353320121765137, + "rewards/rejected": -15.7919921875, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.08297424018383026, + "rewards/weighted_margins": 0.26459962129592896, + "rewards/weighted_rejected": -0.347381591796875, + "step": 180 + }, + { + "epoch": 0.09945040565297043, + "grad_norm": 49.0852165222168, + "learning_rate": 9.84375e-07, + "logits/chosen": -0.354086309671402, + "logits/rejected": -0.38891831040382385, + "logps/chosen": -319.17498779296875, + "logps/rejected": -283.31561279296875, + "logps/weighted_chosen": -2.5078492164611816, + "logps/weighted_rejected": -3.016357421875, + "loss": 0.6496, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -13.561426162719727, + "rewards/margins": 5.937890529632568, + "rewards/rejected": -19.498828887939453, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.16942748427391052, + "rewards/weighted_margins": 0.24410399794578552, + "rewards/weighted_rejected": -0.41356199979782104, + "step": 190 + }, + { + "epoch": 0.10468463752944256, + "grad_norm": 53.46296691894531, + "learning_rate": 9.99959085414323e-07, + "logits/chosen": -0.37868577241897583, + "logits/rejected": -0.4114578366279602, + "logps/chosen": -324.7124938964844, + "logps/rejected": -279.72967529296875, + "logps/weighted_chosen": -2.8757567405700684, + "logps/weighted_rejected": -3.3623046875, + "loss": 0.639, + "rewards/accuracies": 0.609375, + "rewards/chosen": -15.428125381469727, + "rewards/margins": 6.552148342132568, + "rewards/rejected": -21.975000381469727, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.18135985732078552, + "rewards/weighted_margins": 0.29008787870407104, + "rewards/weighted_rejected": -0.471527099609375, + "step": 200 + }, + { + "epoch": 0.10991886940591468, + "grad_norm": 24.815481185913086, + "learning_rate": 9.997587035630105e-07, + "logits/chosen": -0.3853309750556946, + "logits/rejected": -0.4257049560546875, + "logps/chosen": -302.82891845703125, + "logps/rejected": -308.671875, + "logps/weighted_chosen": -2.632519483566284, + "logps/weighted_rejected": -3.3669190406799316, + "loss": 0.6558, + "rewards/accuracies": 0.65625, + "rewards/chosen": -15.814453125, + "rewards/margins": 8.331445693969727, + "rewards/rejected": -24.146093368530273, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.225901797413826, + "rewards/weighted_margins": 0.23236694931983948, + "rewards/weighted_rejected": -0.45829468965530396, + "step": 210 + }, + { + "epoch": 0.11515310128238682, + "grad_norm": 24.175745010375977, + "learning_rate": 9.99391406364405e-07, + "logits/chosen": -0.37365952134132385, + "logits/rejected": -0.3758789002895355, + "logps/chosen": -309.34686279296875, + "logps/rejected": -293.98126220703125, + "logps/weighted_chosen": -3.002514600753784, + "logps/weighted_rejected": -3.453906297683716, + "loss": 0.6732, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -17.203418731689453, + "rewards/margins": 7.933203220367432, + "rewards/rejected": -25.137109756469727, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.259225457906723, + "rewards/weighted_margins": 0.29540252685546875, + "rewards/weighted_rejected": -0.5546798706054688, + "step": 220 + }, + { + "epoch": 0.12038733315885894, + "grad_norm": 85.15988159179688, + "learning_rate": 9.988573164927884e-07, + "logits/chosen": -0.3097473084926605, + "logits/rejected": -0.3477935791015625, + "logps/chosen": -286.5078125, + "logps/rejected": -281.8453063964844, + "logps/weighted_chosen": -2.66943359375, + "logps/weighted_rejected": -3.1229491233825684, + "loss": 0.6646, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -17.690723419189453, + "rewards/margins": 12.424609184265137, + "rewards/rejected": -30.110157012939453, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.2716217041015625, + "rewards/weighted_margins": 0.32661741971969604, + "rewards/weighted_rejected": -0.5983597040176392, + "step": 230 + }, + { + "epoch": 0.12562156503533106, + "grad_norm": 26.17377471923828, + "learning_rate": 9.98156612329838e-07, + "logits/chosen": -0.39516907930374146, + "logits/rejected": -0.44511109590530396, + "logps/chosen": -286.74884033203125, + "logps/rejected": -318.22735595703125, + "logps/weighted_chosen": -2.6696534156799316, + "logps/weighted_rejected": -3.4151854515075684, + "loss": 0.643, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -21.990428924560547, + "rewards/margins": 14.028905868530273, + "rewards/rejected": -36.013282775878906, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.2329559326171875, + "rewards/weighted_margins": 0.3950134217739105, + "rewards/weighted_rejected": -0.6281493902206421, + "step": 240 + }, + { + "epoch": 0.13085579691180318, + "grad_norm": 56.73057174682617, + "learning_rate": 9.97289527905053e-07, + "logits/chosen": -0.40631332993507385, + "logits/rejected": -0.4203124940395355, + "logps/chosen": -290.1703186035156, + "logps/rejected": -291.6328125, + "logps/weighted_chosen": -3.051513671875, + "logps/weighted_rejected": -3.3163819313049316, + "loss": 0.677, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -25.742870330810547, + "rewards/margins": 9.973828315734863, + "rewards/rejected": -35.72148513793945, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.2856689393520355, + "rewards/weighted_margins": 0.253326416015625, + "rewards/weighted_rejected": -0.5388733148574829, + "step": 250 + }, + { + "epoch": 0.1360900287882753, + "grad_norm": 17.766258239746094, + "learning_rate": 9.962563528175875e-07, + "logits/chosen": -0.3611465394496918, + "logits/rejected": -0.39628905057907104, + "logps/chosen": -324.36639404296875, + "logps/rejected": -297.765625, + "logps/weighted_chosen": -2.652392625808716, + "logps/weighted_rejected": -3.535571336746216, + "loss": 0.6414, + "rewards/accuracies": 0.578125, + "rewards/chosen": -25.621288299560547, + "rewards/margins": 11.306055068969727, + "rewards/rejected": -36.93359375, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.2533508241176605, + "rewards/weighted_margins": 0.2956604063510895, + "rewards/weighted_rejected": -0.5490142703056335, + "step": 260 + }, + { + "epoch": 0.14132426066474746, + "grad_norm": 17.552453994750977, + "learning_rate": 9.950574321395277e-07, + "logits/chosen": -0.41735154390335083, + "logits/rejected": -0.441476434469223, + "logps/chosen": -314.5093688964844, + "logps/rejected": -295.7093811035156, + "logps/weighted_chosen": -2.864941358566284, + "logps/weighted_rejected": -3.25732421875, + "loss": 0.661, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.1123046875, + "rewards/margins": 7.519726753234863, + "rewards/rejected": -32.62890625, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": -0.3035888671875, + "rewards/weighted_margins": 0.2833190858364105, + "rewards/weighted_rejected": -0.5868393182754517, + "step": 270 + }, + { + "epoch": 0.14655849254121958, + "grad_norm": 47.66518020629883, + "learning_rate": 9.936931663006413e-07, + "logits/chosen": -0.4760284423828125, + "logits/rejected": -0.46795654296875, + "logps/chosen": -323.48126220703125, + "logps/rejected": -313.2875061035156, + "logps/weighted_chosen": -2.794970750808716, + "logps/weighted_rejected": -3.3581910133361816, + "loss": 0.6169, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -20.707616806030273, + "rewards/margins": 13.166601181030273, + "rewards/rejected": -33.86640548706055, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.10174255073070526, + "rewards/weighted_margins": 0.34544676542282104, + "rewards/weighted_rejected": -0.447021484375, + "step": 280 + }, + { + "epoch": 0.1517927244176917, + "grad_norm": 32.503883361816406, + "learning_rate": 9.921640109546357e-07, + "logits/chosen": -0.44742050766944885, + "logits/rejected": -0.5166229009628296, + "logps/chosen": -292.1796875, + "logps/rejected": -289.6234436035156, + "logps/weighted_chosen": -2.7469239234924316, + "logps/weighted_rejected": -3.9541258811950684, + "loss": 0.6249, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -25.293359756469727, + "rewards/margins": 12.698633193969727, + "rewards/rejected": -37.994529724121094, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -0.15215758979320526, + "rewards/weighted_margins": 0.4393859803676605, + "rewards/weighted_rejected": -0.5915588140487671, + "step": 290 + }, + { + "epoch": 0.15702695629416383, + "grad_norm": 17.32170867919922, + "learning_rate": 9.90470476826975e-07, + "logits/chosen": -0.5146636962890625, + "logits/rejected": -0.515917956829071, + "logps/chosen": -302.3570251464844, + "logps/rejected": -313.68438720703125, + "logps/weighted_chosen": -2.6830201148986816, + "logps/weighted_rejected": -3.202099561691284, + "loss": 0.6526, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -32.978126525878906, + "rewards/margins": 13.435937881469727, + "rewards/rejected": -46.408203125, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.23505249619483948, + "rewards/weighted_margins": 0.33623045682907104, + "rewards/weighted_rejected": -0.5710296630859375, + "step": 300 + }, + { + "epoch": 0.16226118817063595, + "grad_norm": 25.855854034423828, + "learning_rate": 9.886131295443002e-07, + "logits/chosen": -0.6332122683525085, + "logits/rejected": -0.6879852414131165, + "logps/chosen": -315.02264404296875, + "logps/rejected": -296.54998779296875, + "logps/weighted_chosen": -2.8891844749450684, + "logps/weighted_rejected": -3.3497071266174316, + "loss": 0.6099, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -33.83808517456055, + "rewards/margins": 12.542577743530273, + "rewards/rejected": -46.39081954956055, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.20775146782398224, + "rewards/weighted_margins": 0.507794201374054, + "rewards/weighted_rejected": -0.715728759765625, + "step": 310 + }, + { + "epoch": 0.16749542004710807, + "grad_norm": 19.11484718322754, + "learning_rate": 9.865925894455166e-07, + "logits/chosen": -0.730267345905304, + "logits/rejected": -0.746167004108429, + "logps/chosen": -338.2242126464844, + "logps/rejected": -307.18280029296875, + "logps/weighted_chosen": -2.9883790016174316, + "logps/weighted_rejected": -3.5892090797424316, + "loss": 0.6942, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -38.233009338378906, + "rewards/margins": 11.564062118530273, + "rewards/rejected": -49.80937576293945, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.3507080078125, + "rewards/weighted_margins": 0.3366760313510895, + "rewards/weighted_rejected": -0.6871337890625, + "step": 320 + }, + { + "epoch": 0.17272965192358022, + "grad_norm": 57.19697570800781, + "learning_rate": 9.84409531374603e-07, + "logits/chosen": -0.6843910217285156, + "logits/rejected": -0.6659576296806335, + "logps/chosen": -345.46875, + "logps/rejected": -316.2515563964844, + "logps/weighted_chosen": -3.05517578125, + "logps/weighted_rejected": -3.5519776344299316, + "loss": 0.6569, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -40.732032775878906, + "rewards/margins": 12.651952743530273, + "rewards/rejected": -53.38984298706055, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.3262878358364105, + "rewards/weighted_margins": 0.346893310546875, + "rewards/weighted_rejected": -0.6730865240097046, + "step": 330 + }, + { + "epoch": 0.17796388380005235, + "grad_norm": 52.49288558959961, + "learning_rate": 9.820646844552219e-07, + "logits/chosen": -0.6993133425712585, + "logits/rejected": -0.7529846429824829, + "logps/chosen": -313.59295654296875, + "logps/rejected": -322.1499938964844, + "logps/weighted_chosen": -3.0488524436950684, + "logps/weighted_rejected": -3.440136671066284, + "loss": 0.6287, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -37.06660079956055, + "rewards/margins": 19.494531631469727, + "rewards/rejected": -56.556640625, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.30719298124313354, + "rewards/weighted_margins": 0.448944091796875, + "rewards/weighted_rejected": -0.755999743938446, + "step": 340 + }, + { + "epoch": 0.18319811567652447, + "grad_norm": 15.657389640808105, + "learning_rate": 9.795588318471964e-07, + "logits/chosen": -0.7813507318496704, + "logits/rejected": -0.7874206304550171, + "logps/chosen": -299.80157470703125, + "logps/rejected": -331.4375, + "logps/weighted_chosen": -2.84619140625, + "logps/weighted_rejected": -3.315380811691284, + "loss": 0.6405, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -40.32304763793945, + "rewards/margins": 14.830663681030273, + "rewards/rejected": -55.15625, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.315826416015625, + "rewards/weighted_margins": 0.386627197265625, + "rewards/weighted_rejected": -0.702471911907196, + "step": 350 + }, + { + "epoch": 0.1884323475529966, + "grad_norm": 16.19976806640625, + "learning_rate": 9.768928104849415e-07, + "logits/chosen": -0.801177978515625, + "logits/rejected": -0.799664318561554, + "logps/chosen": -323.5171813964844, + "logps/rejected": -305.046875, + "logps/weighted_chosen": -3.1164307594299316, + "logps/weighted_rejected": -3.3475098609924316, + "loss": 0.6865, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -40.823829650878906, + "rewards/margins": 15.389843940734863, + "rewards/rejected": -56.216407775878906, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.31828004121780396, + "rewards/weighted_margins": 0.3831420838832855, + "rewards/weighted_rejected": -0.7014526128768921, + "step": 360 + }, + { + "epoch": 0.19366657942946872, + "grad_norm": 89.87427520751953, + "learning_rate": 9.740675107979355e-07, + "logits/chosen": -0.7640800476074219, + "logits/rejected": -0.7867538332939148, + "logps/chosen": -361.13751220703125, + "logps/rejected": -334.97967529296875, + "logps/weighted_chosen": -2.5084471702575684, + "logps/weighted_rejected": -3.4689698219299316, + "loss": 0.6531, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -46.098045349121094, + "rewards/margins": 14.188085556030273, + "rewards/rejected": -60.26640701293945, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.36387938261032104, + "rewards/weighted_margins": 0.3567260801792145, + "rewards/weighted_rejected": -0.720538318157196, + "step": 370 + }, + { + "epoch": 0.19890081130594087, + "grad_norm": 22.484216690063477, + "learning_rate": 9.71083876413323e-07, + "logits/chosen": -0.7209137082099915, + "logits/rejected": -0.7318176031112671, + "logps/chosen": -353.6031188964844, + "logps/rejected": -339.16485595703125, + "logps/weighted_chosen": -2.70361328125, + "logps/weighted_rejected": -3.5843749046325684, + "loss": 0.6589, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -51.237892150878906, + "rewards/margins": 18.424999237060547, + "rewards/rejected": -69.64530944824219, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.4475158751010895, + "rewards/weighted_margins": 0.3267761170864105, + "rewards/weighted_rejected": -0.7747405767440796, + "step": 380 + }, + { + "epoch": 0.204135043182413, + "grad_norm": 21.885372161865234, + "learning_rate": 9.67942903840751e-07, + "logits/chosen": -0.7708206176757812, + "logits/rejected": -0.8207153081893921, + "logps/chosen": -355.18438720703125, + "logps/rejected": -350.47186279296875, + "logps/weighted_chosen": -2.8836669921875, + "logps/weighted_rejected": -3.5904297828674316, + "loss": 0.6028, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -50.973045349121094, + "rewards/margins": 25.190038681030273, + "rewards/rejected": -76.1617202758789, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.39097899198532104, + "rewards/weighted_margins": 0.4941650331020355, + "rewards/weighted_rejected": -0.884967029094696, + "step": 390 + }, + { + "epoch": 0.2093692750588851, + "grad_norm": 26.357742309570312, + "learning_rate": 9.646456421395447e-07, + "logits/chosen": -0.805267333984375, + "logits/rejected": -0.8178039789199829, + "logps/chosen": -377.52813720703125, + "logps/rejected": -392.0296936035156, + "logps/weighted_chosen": -2.7947998046875, + "logps/weighted_rejected": -3.697582960128784, + "loss": 0.6296, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -59.2109375, + "rewards/margins": 29.121875762939453, + "rewards/rejected": -88.32890319824219, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.41761475801467896, + "rewards/weighted_margins": 0.38171082735061646, + "rewards/weighted_rejected": -0.7994705438613892, + "step": 400 + }, + { + "epoch": 0.21460350693535724, + "grad_norm": 21.382999420166016, + "learning_rate": 9.611931925683266e-07, + "logits/chosen": -0.7703964114189148, + "logits/rejected": -0.808850109577179, + "logps/chosen": -367.3140563964844, + "logps/rejected": -348.0687561035156, + "logps/weighted_chosen": -2.711962938308716, + "logps/weighted_rejected": -3.4615721702575684, + "loss": 0.5758, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -61.02734375, + "rewards/margins": 23.316797256469727, + "rewards/rejected": -84.34687805175781, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.40519410371780396, + "rewards/weighted_margins": 0.521441638469696, + "rewards/weighted_rejected": -0.9261535406112671, + "step": 410 + }, + { + "epoch": 0.21983773881182936, + "grad_norm": 23.030996322631836, + "learning_rate": 9.575867082172085e-07, + "logits/chosen": -0.7789466977119446, + "logits/rejected": -0.8260132074356079, + "logps/chosen": -372.22344970703125, + "logps/rejected": -367.0171813964844, + "logps/weighted_chosen": -3.114550828933716, + "logps/weighted_rejected": -3.364208936691284, + "loss": 0.6211, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -67.16816711425781, + "rewards/margins": 29.731639862060547, + "rewards/rejected": -96.90156555175781, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5122925043106079, + "rewards/weighted_margins": 0.5218566656112671, + "rewards/weighted_rejected": -1.0339782238006592, + "step": 420 + }, + { + "epoch": 0.22507197068830148, + "grad_norm": 16.442333221435547, + "learning_rate": 9.538273936226673e-07, + "logits/chosen": -0.830523669719696, + "logits/rejected": -0.8667358160018921, + "logps/chosen": -328.4546813964844, + "logps/rejected": -347.9593811035156, + "logps/weighted_chosen": -3.373584032058716, + "logps/weighted_rejected": -3.832958936691284, + "loss": 0.6425, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -62.920310974121094, + "rewards/margins": 20.668750762939453, + "rewards/rejected": -83.5894546508789, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.459890753030777, + "rewards/weighted_margins": 0.39284056425094604, + "rewards/weighted_rejected": -0.8525451421737671, + "step": 430 + }, + { + "epoch": 0.23030620256477363, + "grad_norm": 21.955875396728516, + "learning_rate": 9.499165043652391e-07, + "logits/chosen": -0.8598114252090454, + "logits/rejected": -0.868182361125946, + "logps/chosen": -358.21563720703125, + "logps/rejected": -356.26251220703125, + "logps/weighted_chosen": -3.4171142578125, + "logps/weighted_rejected": -3.6997313499450684, + "loss": 0.624, + "rewards/accuracies": 0.625, + "rewards/chosen": -66.96504211425781, + "rewards/margins": 22.563282012939453, + "rewards/rejected": -89.5503921508789, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.5841079950332642, + "rewards/weighted_margins": 0.434326171875, + "rewards/weighted_rejected": -1.018707275390625, + "step": 440 + }, + { + "epoch": 0.23554043444124576, + "grad_norm": 75.56902313232422, + "learning_rate": 9.458553466501665e-07, + "logits/chosen": -0.9330536127090454, + "logits/rejected": -0.9642333984375, + "logps/chosen": -352.6187438964844, + "logps/rejected": -336.0218811035156, + "logps/weighted_chosen": -3.4129395484924316, + "logps/weighted_rejected": -3.74462890625, + "loss": 0.6566, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -66.412109375, + "rewards/margins": 25.757617950439453, + "rewards/rejected": -92.181640625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.689007580280304, + "rewards/weighted_margins": 0.4539245665073395, + "rewards/weighted_rejected": -1.143212914466858, + "step": 450 + }, + { + "epoch": 0.24077466631771788, + "grad_norm": 19.516427993774414, + "learning_rate": 9.416452768711366e-07, + "logits/chosen": -0.945111095905304, + "logits/rejected": -0.9787231683731079, + "logps/chosen": -369.3671875, + "logps/rejected": -358.9624938964844, + "logps/weighted_chosen": -3.1959471702575684, + "logps/weighted_rejected": -3.948193311691284, + "loss": 0.6392, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -77.17265319824219, + "rewards/margins": 21.916015625, + "rewards/rejected": -99.0796890258789, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.67822265625, + "rewards/weighted_margins": 0.532788097858429, + "rewards/weighted_rejected": -1.2112305164337158, + "step": 460 + }, + { + "epoch": 0.24600889819419, + "grad_norm": 19.182979583740234, + "learning_rate": 9.372877011572557e-07, + "logits/chosen": -0.9224609136581421, + "logits/rejected": -0.9388214349746704, + "logps/chosen": -391.6937561035156, + "logps/rejected": -377.0625, + "logps/weighted_chosen": -3.224560499191284, + "logps/weighted_rejected": -3.783252000808716, + "loss": 0.6162, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -80.95976257324219, + "rewards/margins": 21.617578506469727, + "rewards/rejected": -102.59883117675781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.684155285358429, + "rewards/weighted_margins": 0.5555480718612671, + "rewards/weighted_rejected": -1.23956298828125, + "step": 470 + }, + { + "epoch": 0.2512431300706621, + "grad_norm": 31.75469970703125, + "learning_rate": 9.327840749034141e-07, + "logits/chosen": -0.969561755657196, + "logits/rejected": -0.998791515827179, + "logps/chosen": -362.1859436035156, + "logps/rejected": -385.29998779296875, + "logps/weighted_chosen": -3.0771241188049316, + "logps/weighted_rejected": -4.388257026672363, + "loss": 0.6296, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -75.1123046875, + "rewards/margins": 33.66425704956055, + "rewards/rejected": -108.75, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.606555163860321, + "rewards/weighted_margins": 0.612231433391571, + "rewards/weighted_rejected": -1.2182190418243408, + "step": 480 + }, + { + "epoch": 0.2564773619471343, + "grad_norm": 37.024818420410156, + "learning_rate": 9.281359022841965e-07, + "logits/chosen": -0.846588134765625, + "logits/rejected": -0.859790027141571, + "logps/chosen": -352.46405029296875, + "logps/rejected": -355.24688720703125, + "logps/weighted_chosen": -3.219531297683716, + "logps/weighted_rejected": -4.648681640625, + "loss": 0.5897, + "rewards/accuracies": 0.640625, + "rewards/chosen": -77.3949203491211, + "rewards/margins": 32.93046951293945, + "rewards/rejected": -110.32890319824219, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.7288268804550171, + "rewards/weighted_margins": 0.6741577386856079, + "rewards/weighted_rejected": -1.403161644935608, + "step": 490 + }, + { + "epoch": 0.26171159382360637, + "grad_norm": 17.740766525268555, + "learning_rate": 9.233447357514989e-07, + "logits/chosen": -0.8205505609512329, + "logits/rejected": -0.863543689250946, + "logps/chosen": -375.52032470703125, + "logps/rejected": -378.3500061035156, + "logps/weighted_chosen": -3.53125, + "logps/weighted_rejected": -4.106689453125, + "loss": 0.6305, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -77.591796875, + "rewards/margins": 30.978906631469727, + "rewards/rejected": -108.54609680175781, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.6407378911972046, + "rewards/weighted_margins": 0.6662231683731079, + "rewards/weighted_rejected": -1.30645751953125, + "step": 500 + }, + { + "epoch": 0.26171159382360637, + "eval_logits/chosen": -0.9400458931922913, + "eval_logits/rejected": -0.955981433391571, + "eval_logps/chosen": -371.72900390625, + "eval_logps/rejected": -379.6419982910156, + "eval_logps/weighted_chosen": -3.214712381362915, + "eval_logps/weighted_rejected": -4.0158867835998535, + "eval_loss": 0.6316163539886475, + "eval_rewards/accuracies": 0.6349999904632568, + "eval_rewards/chosen": -82.98784637451172, + "eval_rewards/margins": 28.939437866210938, + "eval_rewards/rejected": -111.93875122070312, + "eval_rewards/weighted_accuracies": 0.6725000143051147, + "eval_rewards/weighted_chosen": -0.6669993996620178, + "eval_rewards/weighted_margins": 0.5506796836853027, + "eval_rewards/weighted_rejected": -1.2176789045333862, + "eval_runtime": 1162.5522, + "eval_samples_per_second": 1.72, + "eval_steps_per_second": 0.43, + "step": 500 + } + ], + "logging_steps": 10, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000..f626c5c --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe50c447c403d41daaff89efed0e09938eb1d87a3da0072f9c7935887bef87f +size 8721 diff --git a/checkpoint-500/zero_to_fp32.py b/checkpoint-500/zero_to_fp32.py new file mode 100644 index 0000000..0e75914 --- /dev/null +++ b/checkpoint-500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/config.json b/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..1048745 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62cda7039fdb68297f19cd6e4ff6231bf4276e8d1490bee26d47a248a93bca06 +size 4976698672 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..256655f --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da90fd805e109a8d8e42902c0608f6f49b9c7d23d089b483e31e2815ab5a9561 +size 4999802720 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..d648b73 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8adc12e04e3b30a33fde52b873d7fc71c439b78f45dd662efc85ee25dc84bfa +size 4915916176 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..441141d --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9955deefb62c1bb8725dfa07a4c3c4649abc3b65dac0f0295c2e906f5b27c6e1 +size 1168138808 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..f626c5c --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe50c447c403d41daaff89efed0e09938eb1d87a3da0072f9c7935887bef87f +size 8721