From 388604ad985ba5b75c28d7011b152f9d1bd34214 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 1 Jun 2026 00:19:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: HCY123902/llama-3-8b-dpo-tw15-beta-1e-0 Source: Original Platform --- .gitattributes | 37 + README.md | 69 + chat_template.jinja | 5 + checkpoint-1000/chat_template.jinja | 5 + checkpoint-1000/config.json | 29 + checkpoint-1000/generation_config.json | 6 + checkpoint-1000/latest | 1 + .../model-00001-of-00004.safetensors | 3 + .../model-00002-of-00004.safetensors | 3 + .../model-00003-of-00004.safetensors | 3 + .../model-00004-of-00004.safetensors | 3 + checkpoint-1000/model.safetensors.index.json | 299 ++ checkpoint-1000/rng_state_0.pth | 3 + checkpoint-1000/rng_state_1.pth | 3 + checkpoint-1000/scheduler.pt | 3 + checkpoint-1000/special_tokens_map.json | 23 + checkpoint-1000/tokenizer.json | 3 + checkpoint-1000/tokenizer_config.json | 2063 +++++++++++ checkpoint-1000/trainer_state.json | 2199 +++++++++++ checkpoint-1000/training_args.bin | 3 + checkpoint-1000/zero_to_fp32.py | 760 ++++ checkpoint-1500/chat_template.jinja | 5 + checkpoint-1500/config.json | 29 + checkpoint-1500/generation_config.json | 6 + .../zero_pp_rank_0_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_1_mp_rank_00_model_states.pt | 3 + checkpoint-1500/latest | 1 + .../model-00001-of-00004.safetensors | 3 + .../model-00002-of-00004.safetensors | 3 + .../model-00003-of-00004.safetensors | 3 + .../model-00004-of-00004.safetensors | 3 + checkpoint-1500/model.safetensors.index.json | 299 ++ checkpoint-1500/rng_state_0.pth | 3 + checkpoint-1500/rng_state_1.pth | 3 + checkpoint-1500/scheduler.pt | 3 + checkpoint-1500/special_tokens_map.json | 23 + checkpoint-1500/tokenizer.json | 3 + checkpoint-1500/tokenizer_config.json | 2063 +++++++++++ checkpoint-1500/trainer_state.json | 3271 +++++++++++++++++ checkpoint-1500/training_args.bin | 3 + checkpoint-1500/zero_to_fp32.py | 760 ++++ checkpoint-500/chat_template.jinja | 5 + checkpoint-500/config.json | 29 + checkpoint-500/generation_config.json | 6 + checkpoint-500/latest | 1 + .../model-00001-of-00004.safetensors | 3 + .../model-00002-of-00004.safetensors | 3 + .../model-00003-of-00004.safetensors | 3 + .../model-00004-of-00004.safetensors | 3 + checkpoint-500/model.safetensors.index.json | 299 ++ checkpoint-500/rng_state_0.pth | 3 + checkpoint-500/rng_state_1.pth | 3 + checkpoint-500/scheduler.pt | 3 + checkpoint-500/special_tokens_map.json | 23 + checkpoint-500/tokenizer.json | 3 + checkpoint-500/tokenizer_config.json | 2063 +++++++++++ checkpoint-500/trainer_state.json | 1127 ++++++ checkpoint-500/training_args.bin | 3 + checkpoint-500/zero_to_fp32.py | 760 ++++ config.json | 29 + model.safetensors.index.json | 299 ++ special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2063 +++++++++++ training_args.bin | 3 + 65 files changed, 18773 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 checkpoint-1000/chat_template.jinja create mode 100644 checkpoint-1000/config.json create mode 100644 checkpoint-1000/generation_config.json create mode 100644 checkpoint-1000/latest create mode 100644 checkpoint-1000/model-00001-of-00004.safetensors create mode 100644 checkpoint-1000/model-00002-of-00004.safetensors create mode 100644 checkpoint-1000/model-00003-of-00004.safetensors create mode 100644 checkpoint-1000/model-00004-of-00004.safetensors create mode 100644 checkpoint-1000/model.safetensors.index.json create mode 100644 checkpoint-1000/rng_state_0.pth create mode 100644 checkpoint-1000/rng_state_1.pth create mode 100644 checkpoint-1000/scheduler.pt create mode 100644 checkpoint-1000/special_tokens_map.json create mode 100644 checkpoint-1000/tokenizer.json create mode 100644 checkpoint-1000/tokenizer_config.json create mode 100644 checkpoint-1000/trainer_state.json create mode 100644 checkpoint-1000/training_args.bin create mode 100644 checkpoint-1000/zero_to_fp32.py create mode 100644 checkpoint-1500/chat_template.jinja create mode 100644 checkpoint-1500/config.json create mode 100644 checkpoint-1500/generation_config.json create mode 100644 checkpoint-1500/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt create mode 100644 checkpoint-1500/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt create mode 100644 checkpoint-1500/latest create mode 100644 checkpoint-1500/model-00001-of-00004.safetensors create mode 100644 checkpoint-1500/model-00002-of-00004.safetensors create mode 100644 checkpoint-1500/model-00003-of-00004.safetensors create mode 100644 checkpoint-1500/model-00004-of-00004.safetensors create mode 100644 checkpoint-1500/model.safetensors.index.json create mode 100644 checkpoint-1500/rng_state_0.pth create mode 100644 checkpoint-1500/rng_state_1.pth create mode 100644 checkpoint-1500/scheduler.pt create mode 100644 checkpoint-1500/special_tokens_map.json create mode 100644 checkpoint-1500/tokenizer.json create mode 100644 checkpoint-1500/tokenizer_config.json create mode 100644 checkpoint-1500/trainer_state.json create mode 100644 checkpoint-1500/training_args.bin create mode 100644 checkpoint-1500/zero_to_fp32.py create mode 100644 checkpoint-500/chat_template.jinja create mode 100644 checkpoint-500/config.json create mode 100644 checkpoint-500/generation_config.json create mode 100644 checkpoint-500/latest create mode 100644 checkpoint-500/model-00001-of-00004.safetensors create mode 100644 checkpoint-500/model-00002-of-00004.safetensors create mode 100644 checkpoint-500/model-00003-of-00004.safetensors create mode 100644 checkpoint-500/model-00004-of-00004.safetensors create mode 100644 checkpoint-500/model.safetensors.index.json create mode 100644 checkpoint-500/rng_state_0.pth create mode 100644 checkpoint-500/rng_state_1.pth create mode 100644 checkpoint-500/scheduler.pt create mode 100644 checkpoint-500/special_tokens_map.json create mode 100644 checkpoint-500/tokenizer.json create mode 100644 checkpoint-500/tokenizer_config.json create mode 100644 checkpoint-500/trainer_state.json create mode 100644 checkpoint-500/training_args.bin create mode 100644 checkpoint-500/zero_to_fp32.py create mode 100644 config.json create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f8fe2af --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..66e67b6 --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +--- +base_model: princeton-nlp/Llama-3-Base-8B-SFT +library_name: transformers +model_name: llama-3-8b-dpo-tw15-beta-1e-0 +tags: +- generated_from_trainer +- dpo +- trl +licence: license +--- + +# Model Card for llama-3-8b-dpo-tw15-beta-1e-0 + +This model is a fine-tuned version of [princeton-nlp/Llama-3-Base-8B-SFT](https://huggingface.co/princeton-nlp/Llama-3-Base-8B-SFT). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="HCY123902/llama-3-8b-dpo-tw15-beta-1e-0", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/2320032466hchy/attention_dpo/runs/1mzwq2le) + + +This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290). + +### Framework versions + +- TRL: 0.20.0 +- Transformers: 4.54.1 +- Pytorch: 2.7.1+cu128 +- Datasets: 3.6.0 +- Tokenizers: 0.21.1 + +## Citations + +Cite DPO as: + +```bibtex +@inproceedings{rafailov2023direct, + title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, + author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn}, + year = 2023, + booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, + url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html}, + editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, +} +``` + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1000/chat_template.jinja b/checkpoint-1000/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-1000/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json new file mode 100644 index 0000000..fc3c54a --- /dev/null +++ b/checkpoint-1000/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-1000/latest b/checkpoint-1000/latest new file mode 100644 index 0000000..e2d3435 --- /dev/null +++ b/checkpoint-1000/latest @@ -0,0 +1 @@ +global_step1000 \ No newline at end of file diff --git a/checkpoint-1000/model-00001-of-00004.safetensors b/checkpoint-1000/model-00001-of-00004.safetensors new file mode 100644 index 0000000..945637d --- /dev/null +++ b/checkpoint-1000/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b3fb81b645bd7d4201bdc69b3e1d0ea77b3118be2b9dba8a7bf2a85fc1452a +size 4976698672 diff --git a/checkpoint-1000/model-00002-of-00004.safetensors b/checkpoint-1000/model-00002-of-00004.safetensors new file mode 100644 index 0000000..8749277 --- /dev/null +++ b/checkpoint-1000/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a417d99d6e32dde04fbf995a5a985d361a0cd93c27cda1cff410c9d232a6607d +size 4999802720 diff --git a/checkpoint-1000/model-00003-of-00004.safetensors b/checkpoint-1000/model-00003-of-00004.safetensors new file mode 100644 index 0000000..94d73f6 --- /dev/null +++ b/checkpoint-1000/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d136b84c58c9469b4c8faa2eba011489d605bfc9fc7ec0bc8e978ed02d5574f2 +size 4915916176 diff --git a/checkpoint-1000/model-00004-of-00004.safetensors b/checkpoint-1000/model-00004-of-00004.safetensors new file mode 100644 index 0000000..c75783a --- /dev/null +++ b/checkpoint-1000/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2309e201fdc41f3f86e37cb90541cc7d42ace9fecc0e48f006de8ded6199de02 +size 1168138808 diff --git a/checkpoint-1000/model.safetensors.index.json b/checkpoint-1000/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-1000/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-1000/rng_state_0.pth b/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000..37c843b --- /dev/null +++ b/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb2e15baa025bd61fe183f8e343e7ff470b9a01aecd8defcf155a1cf00393e3 +size 14917 diff --git a/checkpoint-1000/rng_state_1.pth b/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000..d8eccdd --- /dev/null +++ b/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ee73a4738a457f3198cccec25cf12377bb1eba6c29e95c9fecf83c1487d401 +size 14917 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000..7c34b16 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1b2374548a5ec63cdcc0490db6ed738cd23c8550fb53eb0592574609549746 +size 1465 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1000/tokenizer.json b/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000..99137a3 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,2199 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5234231876472127, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005234231876472127, + "grad_norm": 126.29230499267578, + "learning_rate": 0.0, + "logits/chosen": -0.40118408203125, + "logits/rejected": -0.41802978515625, + "logps/chosen": -297.609375, + "logps/rejected": -247.84375, + "logps/weighted_chosen": -4.5152587890625, + "logps/weighted_rejected": -3.032470703125, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005234231876472127, + "grad_norm": 296.4369812011719, + "learning_rate": 4.6875e-08, + "logits/chosen": -0.3177456259727478, + "logits/rejected": -0.3534359335899353, + "logps/chosen": -275.5711669921875, + "logps/rejected": -255.90451049804688, + "logps/weighted_chosen": -2.350965738296509, + "logps/weighted_rejected": -2.549940347671509, + "loss": 0.6917, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0401475690305233, + "rewards/margins": 0.04296875, + "rewards/rejected": -0.0831163227558136, + "rewards/weighted_accuracies": 0.3229166567325592, + "rewards/weighted_chosen": -0.00032212998485192657, + "rewards/weighted_margins": 0.00019327799964230508, + "rewards/weighted_rejected": -0.0005154079990461469, + "step": 10 + }, + { + "epoch": 0.010468463752944255, + "grad_norm": 21.67967414855957, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -0.29769057035446167, + "logits/rejected": -0.313650518655777, + "logps/chosen": -294.3374938964844, + "logps/rejected": -272.6703186035156, + "logps/weighted_chosen": -2.13031005859375, + "logps/weighted_rejected": -2.103222608566284, + "loss": 0.6908, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.06103515625, + "rewards/margins": -0.01318359375, + "rewards/rejected": -0.0478515625, + "rewards/weighted_accuracies": 0.4437499940395355, + "rewards/weighted_chosen": 0.0014366150135174394, + "rewards/weighted_margins": 0.0021545409690588713, + "rewards/weighted_rejected": -0.0007179260137490928, + "step": 20 + }, + { + "epoch": 0.015702695629416383, + "grad_norm": 76.9887466430664, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -0.2917121946811676, + "logits/rejected": -0.337240606546402, + "logps/chosen": -298.02655029296875, + "logps/rejected": -268.12188720703125, + "logps/weighted_chosen": -2.0724120140075684, + "logps/weighted_rejected": -2.4466919898986816, + "loss": 0.6912, + "rewards/accuracies": 0.28125, + "rewards/chosen": -0.0062500000931322575, + "rewards/margins": -0.02509765699505806, + "rewards/rejected": 0.01884765550494194, + "rewards/weighted_accuracies": 0.4281249940395355, + "rewards/weighted_chosen": 0.0027938843704760075, + "rewards/weighted_margins": 0.0019706725142896175, + "rewards/weighted_rejected": 0.0008232116815634072, + "step": 30 + }, + { + "epoch": 0.02093692750588851, + "grad_norm": 32.98203659057617, + "learning_rate": 2.03125e-07, + "logits/chosen": -0.3011154234409332, + "logits/rejected": -0.3432762026786804, + "logps/chosen": -278.63751220703125, + "logps/rejected": -253.88125610351562, + "logps/weighted_chosen": -2.2070555686950684, + "logps/weighted_rejected": -2.605224609375, + "loss": 0.692, + "rewards/accuracies": 0.26249998807907104, + "rewards/chosen": 0.0034667968284338713, + "rewards/margins": -0.05991210788488388, + "rewards/rejected": 0.06337890774011612, + "rewards/weighted_accuracies": 0.35624998807907104, + "rewards/weighted_chosen": 0.0014549255138263106, + "rewards/weighted_margins": -0.00034332275390625, + "rewards/weighted_rejected": 0.0017982482677325606, + "step": 40 + }, + { + "epoch": 0.02617115938236064, + "grad_norm": 20.751684188842773, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -0.2822524905204773, + "logits/rejected": -0.32080918550491333, + "logps/chosen": -280.31329345703125, + "logps/rejected": -267.58709716796875, + "logps/weighted_chosen": -2.136962890625, + "logps/weighted_rejected": -2.1753907203674316, + "loss": 0.6883, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.07236327975988388, + "rewards/margins": -0.09189452975988388, + "rewards/rejected": 0.01953125, + "rewards/weighted_accuracies": 0.4375, + "rewards/weighted_chosen": 0.0054107666946947575, + "rewards/weighted_margins": 0.0078063965775072575, + "rewards/weighted_rejected": -0.0023956298828125, + "step": 50 + }, + { + "epoch": 0.031405391258832765, + "grad_norm": 40.70024108886719, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -0.3149581849575043, + "logits/rejected": -0.3086872100830078, + "logps/chosen": -277.6031188964844, + "logps/rejected": -261.8031311035156, + "logps/weighted_chosen": -2.5905518531799316, + "logps/weighted_rejected": -2.4834961891174316, + "loss": 0.6874, + "rewards/accuracies": 0.3812499940395355, + "rewards/chosen": 0.03662109375, + "rewards/margins": 0.12646484375, + "rewards/rejected": -0.08984375, + "rewards/weighted_accuracies": 0.5, + "rewards/weighted_chosen": 0.0004280090215615928, + "rewards/weighted_margins": 0.01105651818215847, + "rewards/weighted_rejected": -0.01062927208840847, + "step": 60 + }, + { + "epoch": 0.036639623135304895, + "grad_norm": 67.51947021484375, + "learning_rate": 3.59375e-07, + "logits/chosen": -0.318746954202652, + "logits/rejected": -0.32574766874313354, + "logps/chosen": -289.90313720703125, + "logps/rejected": -245.04452514648438, + "logps/weighted_chosen": -2.098431348800659, + "logps/weighted_rejected": -2.392407178878784, + "loss": 0.6841, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": 0.16708984971046448, + "rewards/margins": 0.4442382752895355, + "rewards/rejected": -0.27714842557907104, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": 0.015575408935546875, + "rewards/weighted_margins": 0.02174072340130806, + "rewards/weighted_rejected": -0.00616531353443861, + "step": 70 + }, + { + "epoch": 0.04187385501177702, + "grad_norm": 68.87100982666016, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -0.286581426858902, + "logits/rejected": -0.3082527220249176, + "logps/chosen": -289.5101623535156, + "logps/rejected": -270.4375, + "logps/weighted_chosen": -2.2385497093200684, + "logps/weighted_rejected": -2.4218382835388184, + "loss": 0.6727, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.31572264432907104, + "rewards/margins": 0.5547851324081421, + "rewards/rejected": -0.23906250298023224, + "rewards/weighted_accuracies": 0.596875011920929, + "rewards/weighted_chosen": 0.03613891452550888, + "rewards/weighted_margins": 0.05283202975988388, + "rewards/weighted_rejected": -0.01669769361615181, + "step": 80 + }, + { + "epoch": 0.04710808688824915, + "grad_norm": 40.29203414916992, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -0.3158706724643707, + "logits/rejected": -0.30914992094039917, + "logps/chosen": -280.5726623535156, + "logps/rejected": -258.17657470703125, + "logps/weighted_chosen": -2.45281982421875, + "logps/weighted_rejected": -2.5444703102111816, + "loss": 0.6683, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": 0.15966796875, + "rewards/margins": 0.599609375, + "rewards/rejected": -0.43994140625, + "rewards/weighted_accuracies": 0.581250011920929, + "rewards/weighted_chosen": 0.05808715894818306, + "rewards/weighted_margins": 0.07471618801355362, + "rewards/weighted_rejected": -0.0166168212890625, + "step": 90 + }, + { + "epoch": 0.05234231876472128, + "grad_norm": 46.855377197265625, + "learning_rate": 5.156249999999999e-07, + "logits/chosen": -0.2856552004814148, + "logits/rejected": -0.3585342466831207, + "logps/chosen": -291.05548095703125, + "logps/rejected": -287.078125, + "logps/weighted_chosen": -1.9577789306640625, + "logps/weighted_rejected": -2.532482862472534, + "loss": 0.6785, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.13925781846046448, + "rewards/margins": 0.9869140386581421, + "rewards/rejected": -1.1261718273162842, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": 0.03715210035443306, + "rewards/weighted_margins": 0.0635833740234375, + "rewards/weighted_rejected": -0.02643737755715847, + "step": 100 + }, + { + "epoch": 0.05757655064119341, + "grad_norm": 55.04579162597656, + "learning_rate": 5.677083333333333e-07, + "logits/chosen": -0.33493995666503906, + "logits/rejected": -0.3254844546318054, + "logps/chosen": -297.2953186035156, + "logps/rejected": -262.6773376464844, + "logps/weighted_chosen": -2.606689453125, + "logps/weighted_rejected": -2.648364305496216, + "loss": 0.6821, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -0.9228515625, + "rewards/margins": 0.8955078125, + "rewards/rejected": -1.818359375, + "rewards/weighted_accuracies": 0.518750011920929, + "rewards/weighted_chosen": -0.005747986026108265, + "rewards/weighted_margins": 0.05161895602941513, + "rewards/weighted_rejected": -0.05732421949505806, + "step": 110 + }, + { + "epoch": 0.06281078251766553, + "grad_norm": 22.23135757446289, + "learning_rate": 6.197916666666666e-07, + "logits/chosen": -0.3393222689628601, + "logits/rejected": -0.36481350660324097, + "logps/chosen": -295.6703186035156, + "logps/rejected": -256.3296813964844, + "logps/weighted_chosen": -1.8351562023162842, + "logps/weighted_rejected": -2.124218702316284, + "loss": 0.6752, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.071679711341858, + "rewards/margins": 1.46142578125, + "rewards/rejected": -2.5331053733825684, + "rewards/weighted_accuracies": 0.546875, + "rewards/weighted_chosen": 0.0018810272449627519, + "rewards/weighted_margins": 0.06835174560546875, + "rewards/weighted_rejected": -0.0664466843008995, + "step": 120 + }, + { + "epoch": 0.06804501439413765, + "grad_norm": 57.93917465209961, + "learning_rate": 6.718749999999999e-07, + "logits/chosen": -0.30284881591796875, + "logits/rejected": -0.2989334166049957, + "logps/chosen": -306.5074157714844, + "logps/rejected": -279.8265686035156, + "logps/weighted_chosen": -1.910064697265625, + "logps/weighted_rejected": -2.2278685569763184, + "loss": 0.6738, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.45097655057907104, + "rewards/margins": 1.7268555164337158, + "rewards/rejected": -2.177734375, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": 0.02166290208697319, + "rewards/weighted_margins": 0.07758025825023651, + "rewards/weighted_rejected": -0.05589141696691513, + "step": 130 + }, + { + "epoch": 0.07327924627060979, + "grad_norm": 66.64070892333984, + "learning_rate": 7.239583333333333e-07, + "logits/chosen": -0.34190064668655396, + "logits/rejected": -0.3586837649345398, + "logps/chosen": -300.01483154296875, + "logps/rejected": -276.1703186035156, + "logps/weighted_chosen": -2.202807664871216, + "logps/weighted_rejected": -2.474353075027466, + "loss": 0.6635, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -1.3230469226837158, + "rewards/margins": 2.2220702171325684, + "rewards/rejected": -3.545117139816284, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": 0.0006683349492959678, + "rewards/weighted_margins": 0.10604552924633026, + "rewards/weighted_rejected": -0.1053924560546875, + "step": 140 + }, + { + "epoch": 0.07851347814708191, + "grad_norm": 18.789766311645508, + "learning_rate": 7.760416666666666e-07, + "logits/chosen": -0.2976974546909332, + "logits/rejected": -0.3081321716308594, + "logps/chosen": -286.27813720703125, + "logps/rejected": -255.4640655517578, + "logps/weighted_chosen": -2.7657103538513184, + "logps/weighted_rejected": -2.831347703933716, + "loss": 0.6605, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -2.104687452316284, + "rewards/margins": 2.5054688453674316, + "rewards/rejected": -4.610156059265137, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.0018630981212481856, + "rewards/weighted_margins": 0.158416748046875, + "rewards/weighted_rejected": -0.1603546142578125, + "step": 150 + }, + { + "epoch": 0.08374771002355404, + "grad_norm": 51.51210021972656, + "learning_rate": 8.28125e-07, + "logits/chosen": -0.3341739773750305, + "logits/rejected": -0.3859619200229645, + "logps/chosen": -306.4765625, + "logps/rejected": -279.1148376464844, + "logps/weighted_chosen": -2.3189454078674316, + "logps/weighted_rejected": -2.36669921875, + "loss": 0.636, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -2.575390577316284, + "rewards/margins": 3.349609375, + "rewards/rejected": -5.925000190734863, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": 0.02147369459271431, + "rewards/weighted_margins": 0.22438660264015198, + "rewards/weighted_rejected": -0.203105166554451, + "step": 160 + }, + { + "epoch": 0.08898194190002617, + "grad_norm": 398.3809509277344, + "learning_rate": 8.802083333333333e-07, + "logits/chosen": -0.36855775117874146, + "logits/rejected": -0.37070387601852417, + "logps/chosen": -307.1656188964844, + "logps/rejected": -265.78436279296875, + "logps/weighted_chosen": -2.459460496902466, + "logps/weighted_rejected": -2.757373094558716, + "loss": 0.6811, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -4.097460746765137, + "rewards/margins": 3.488476514816284, + "rewards/rejected": -7.585839748382568, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": 0.018505096435546875, + "rewards/weighted_margins": 0.19701537489891052, + "rewards/weighted_rejected": -0.17839965224266052, + "step": 170 + }, + { + "epoch": 0.0942161737764983, + "grad_norm": 55.77580261230469, + "learning_rate": 9.322916666666666e-07, + "logits/chosen": -0.3392753601074219, + "logits/rejected": -0.35816192626953125, + "logps/chosen": -278.99530029296875, + "logps/rejected": -265.18359375, + "logps/weighted_chosen": -2.362103223800659, + "logps/weighted_rejected": -2.754711866378784, + "loss": 0.6944, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -5.346972465515137, + "rewards/margins": 3.5015625953674316, + "rewards/rejected": -8.848730087280273, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.05782318115234375, + "rewards/weighted_margins": 0.16480103135108948, + "rewards/weighted_rejected": -0.22255554795265198, + "step": 180 + }, + { + "epoch": 0.09945040565297043, + "grad_norm": 38.015960693359375, + "learning_rate": 9.84375e-07, + "logits/chosen": -0.3686843812465668, + "logits/rejected": -0.4041244387626648, + "logps/chosen": -314.3070373535156, + "logps/rejected": -276.7484436035156, + "logps/weighted_chosen": -2.123486280441284, + "logps/weighted_rejected": -2.6261963844299316, + "loss": 0.6392, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -8.581738471984863, + "rewards/margins": 4.317968845367432, + "rewards/rejected": -12.900781631469727, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.129638671875, + "rewards/weighted_margins": 0.2160186767578125, + "rewards/weighted_rejected": -0.345590204000473, + "step": 190 + }, + { + "epoch": 0.10468463752944256, + "grad_norm": 46.52367401123047, + "learning_rate": 9.99959085414323e-07, + "logits/chosen": -0.4128967225551605, + "logits/rejected": -0.4471847414970398, + "logps/chosen": -320.0546875, + "logps/rejected": -273.11248779296875, + "logps/weighted_chosen": -2.5019164085388184, + "logps/weighted_rejected": -2.9936890602111816, + "loss": 0.6473, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -10.737597465515137, + "rewards/margins": 4.738671779632568, + "rewards/rejected": -15.476171493530273, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.11443634331226349, + "rewards/weighted_margins": 0.2610321044921875, + "rewards/weighted_rejected": -0.37534791231155396, + "step": 200 + }, + { + "epoch": 0.10991886940591468, + "grad_norm": 21.238189697265625, + "learning_rate": 9.997587035630105e-07, + "logits/chosen": -0.4288749694824219, + "logits/rejected": -0.4688262939453125, + "logps/chosen": -300.0765686035156, + "logps/rejected": -304.63751220703125, + "logps/weighted_chosen": -2.32427978515625, + "logps/weighted_rejected": -3.0592284202575684, + "loss": 0.6424, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -13.117578506469727, + "rewards/margins": 7.013671875, + "rewards/rejected": -20.133594512939453, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.21423491835594177, + "rewards/weighted_margins": 0.27025145292282104, + "rewards/weighted_rejected": -0.4845077395439148, + "step": 210 + }, + { + "epoch": 0.11515310128238682, + "grad_norm": 24.92041015625, + "learning_rate": 9.99391406364405e-07, + "logits/chosen": -0.42696380615234375, + "logits/rejected": -0.429006963968277, + "logps/chosen": -305.4906311035156, + "logps/rejected": -288.6312561035156, + "logps/weighted_chosen": -2.625018358230591, + "logps/weighted_rejected": -3.102160692214966, + "loss": 0.6601, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -13.349413871765137, + "rewards/margins": 6.373632907867432, + "rewards/rejected": -19.72265625, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.20062866806983948, + "rewards/weighted_margins": 0.316873162984848, + "rewards/weighted_rejected": -0.5174545049667358, + "step": 220 + }, + { + "epoch": 0.12038733315885894, + "grad_norm": 147.95851135253906, + "learning_rate": 9.988573164927884e-07, + "logits/chosen": -0.3811447024345398, + "logits/rejected": -0.4161086976528168, + "logps/chosen": -281.33203125, + "logps/rejected": -274.234375, + "logps/weighted_chosen": -2.32806396484375, + "logps/weighted_rejected": -2.6552734375, + "loss": 0.7195, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -12.46875, + "rewards/margins": 10.046093940734863, + "rewards/rejected": -22.515430450439453, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.24639587104320526, + "rewards/weighted_margins": 0.23908081650733948, + "rewards/weighted_rejected": -0.4853073060512543, + "step": 230 + }, + { + "epoch": 0.12562156503533106, + "grad_norm": 26.882122039794922, + "learning_rate": 9.98156612329838e-07, + "logits/chosen": -0.4748245179653168, + "logits/rejected": -0.5250595211982727, + "logps/chosen": -278.16717529296875, + "logps/rejected": -306.29376220703125, + "logps/weighted_chosen": -2.348803758621216, + "logps/weighted_rejected": -2.9455933570861816, + "loss": 0.6674, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -13.405566215515137, + "rewards/margins": 10.753710746765137, + "rewards/rejected": -24.158985137939453, + "rewards/weighted_accuracies": 0.625, + "rewards/weighted_chosen": -0.14908751845359802, + "rewards/weighted_margins": 0.33162689208984375, + "rewards/weighted_rejected": -0.48021697998046875, + "step": 240 + }, + { + "epoch": 0.13085579691180318, + "grad_norm": 86.49760437011719, + "learning_rate": 9.97289527905053e-07, + "logits/chosen": -0.478302001953125, + "logits/rejected": -0.48861923813819885, + "logps/chosen": -277.0523376464844, + "logps/rejected": -275.80938720703125, + "logps/weighted_chosen": -2.61376953125, + "logps/weighted_rejected": -2.787853956222534, + "loss": 0.7022, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -12.673730850219727, + "rewards/margins": 7.1806640625, + "rewards/rejected": -19.852344512939453, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.12388916313648224, + "rewards/weighted_margins": 0.19627074897289276, + "rewards/weighted_rejected": -0.3203796446323395, + "step": 250 + }, + { + "epoch": 0.1360900287882753, + "grad_norm": 19.698871612548828, + "learning_rate": 9.962563528175875e-07, + "logits/chosen": -0.4065658450126648, + "logits/rejected": -0.4432968199253082, + "logps/chosen": -310.62890625, + "logps/rejected": -281.46405029296875, + "logps/weighted_chosen": -2.184094190597534, + "logps/weighted_rejected": -3.0492796897888184, + "loss": 0.6507, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -11.896581649780273, + "rewards/margins": 8.622265815734863, + "rewards/rejected": -20.520313262939453, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": -0.1260833740234375, + "rewards/weighted_margins": 0.25025635957717896, + "rewards/weighted_rejected": -0.3761749267578125, + "step": 260 + }, + { + "epoch": 0.14132426066474746, + "grad_norm": 16.363121032714844, + "learning_rate": 9.950574321395277e-07, + "logits/chosen": -0.42208632826805115, + "logits/rejected": -0.4458427429199219, + "logps/chosen": -305.9046936035156, + "logps/rejected": -286.06561279296875, + "logps/weighted_chosen": -2.40838623046875, + "logps/weighted_rejected": -2.7938475608825684, + "loss": 0.6573, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -16.622364044189453, + "rewards/margins": 6.233202934265137, + "rewards/rejected": -22.855077743530273, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.189697265625, + "rewards/weighted_margins": 0.27490538358688354, + "rewards/weighted_rejected": -0.4645233154296875, + "step": 270 + }, + { + "epoch": 0.14655849254121958, + "grad_norm": 54.42692947387695, + "learning_rate": 9.936931663006413e-07, + "logits/chosen": -0.45263671875, + "logits/rejected": -0.44363707304000854, + "logps/chosen": -316.171875, + "logps/rejected": -303.3656311035156, + "logps/weighted_chosen": -2.4659423828125, + "logps/weighted_rejected": -3.0541749000549316, + "loss": 0.6068, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -13.402734756469727, + "rewards/margins": 10.619824409484863, + "rewards/rejected": -24.025390625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.05214080959558487, + "rewards/weighted_margins": 0.40336912870407104, + "rewards/weighted_rejected": -0.455657958984375, + "step": 280 + }, + { + "epoch": 0.1517927244176917, + "grad_norm": 39.20017623901367, + "learning_rate": 9.921640109546357e-07, + "logits/chosen": -0.42310255765914917, + "logits/rejected": -0.48920440673828125, + "logps/chosen": -283.7171936035156, + "logps/rejected": -278.1859436035156, + "logps/weighted_chosen": -2.396167039871216, + "logps/weighted_rejected": -3.5881590843200684, + "loss": 0.6649, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -16.9111328125, + "rewards/margins": 9.704492568969727, + "rewards/rejected": -26.62109375, + "rewards/weighted_accuracies": 0.6031249761581421, + "rewards/weighted_chosen": -0.130279541015625, + "rewards/weighted_margins": 0.3882461488246918, + "rewards/weighted_rejected": -0.5187179446220398, + "step": 290 + }, + { + "epoch": 0.15702695629416383, + "grad_norm": 28.03601837158203, + "learning_rate": 9.90470476826975e-07, + "logits/chosen": -0.485189825296402, + "logits/rejected": -0.48862916231155396, + "logps/chosen": -289.09765625, + "logps/rejected": -297.625, + "logps/weighted_chosen": -2.2784485816955566, + "logps/weighted_rejected": -2.771862745285034, + "loss": 0.6608, + "rewards/accuracies": 0.640625, + "rewards/chosen": -19.676952362060547, + "rewards/margins": 10.679491996765137, + "rewards/rejected": -30.360937118530273, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.1669921875, + "rewards/weighted_margins": 0.28967589139938354, + "rewards/weighted_rejected": -0.4566032290458679, + "step": 300 + }, + { + "epoch": 0.16226118817063595, + "grad_norm": 538.50927734375, + "learning_rate": 9.886131295443002e-07, + "logits/chosen": -0.654278576374054, + "logits/rejected": -0.7076683044433594, + "logps/chosen": -341.85467529296875, + "logps/rejected": -309.89764404296875, + "logps/weighted_chosen": -2.629150390625, + "logps/weighted_rejected": -2.8698973655700684, + "loss": 0.6788, + "rewards/accuracies": 0.515625, + "rewards/chosen": -60.568748474121094, + "rewards/margins": -0.7822265625, + "rewards/rejected": -59.785743713378906, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.24791869521141052, + "rewards/weighted_margins": 0.28089600801467896, + "rewards/weighted_rejected": -0.528765857219696, + "step": 310 + }, + { + "epoch": 0.16749542004710807, + "grad_norm": 34.656883239746094, + "learning_rate": 9.865925894455166e-07, + "logits/chosen": -0.7003936767578125, + "logits/rejected": -0.719250500202179, + "logps/chosen": -326.3960876464844, + "logps/rejected": -290.3453063964844, + "logps/weighted_chosen": -2.553356885910034, + "logps/weighted_rejected": -3.10992431640625, + "loss": 0.7054, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -26.532812118530273, + "rewards/margins": 6.3564453125, + "rewards/rejected": -32.88788986206055, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": -0.18020018935203552, + "rewards/weighted_margins": 0.3489990234375, + "rewards/weighted_rejected": -0.5293639898300171, + "step": 320 + }, + { + "epoch": 0.17272965192358022, + "grad_norm": 61.1888542175293, + "learning_rate": 9.84409531374603e-07, + "logits/chosen": -0.6631911993026733, + "logits/rejected": -0.6448425054550171, + "logps/chosen": -324.87579345703125, + "logps/rejected": -291.71875, + "logps/weighted_chosen": -2.5611815452575684, + "logps/weighted_rejected": -3.060229539871216, + "loss": 0.6449, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -20.070018768310547, + "rewards/margins": 8.8251953125, + "rewards/rejected": -28.8876953125, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.140888974070549, + "rewards/weighted_margins": 0.3719635009765625, + "rewards/weighted_rejected": -0.5127013921737671, + "step": 330 + }, + { + "epoch": 0.17796388380005235, + "grad_norm": 27.2315673828125, + "learning_rate": 9.820646844552219e-07, + "logits/chosen": -0.6496349573135376, + "logits/rejected": -0.7006805539131165, + "logps/chosen": -295.5882873535156, + "logps/rejected": -297.4906311035156, + "logps/weighted_chosen": -2.6988892555236816, + "logps/weighted_rejected": -2.898681640625, + "loss": 0.6788, + "rewards/accuracies": 0.6875, + "rewards/chosen": -19.176855087280273, + "rewards/margins": 12.649316787719727, + "rewards/rejected": -31.822460174560547, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.23918533325195312, + "rewards/weighted_margins": 0.2961669862270355, + "rewards/weighted_rejected": -0.5351837277412415, + "step": 340 + }, + { + "epoch": 0.18319811567652447, + "grad_norm": 24.073888778686523, + "learning_rate": 9.795588318471964e-07, + "logits/chosen": -0.7137314081192017, + "logits/rejected": -0.7225399017333984, + "logps/chosen": -277.8890686035156, + "logps/rejected": -304.59063720703125, + "logps/weighted_chosen": -2.4300780296325684, + "logps/weighted_rejected": -2.771411180496216, + "loss": 0.6675, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -18.445703506469727, + "rewards/margins": 9.8720703125, + "rewards/rejected": -28.31640625, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.20159301161766052, + "rewards/weighted_margins": 0.2779785096645355, + "rewards/weighted_rejected": -0.47947996854782104, + "step": 350 + }, + { + "epoch": 0.1884323475529966, + "grad_norm": 12.750471115112305, + "learning_rate": 9.768928104849415e-07, + "logits/chosen": -0.7212737798690796, + "logits/rejected": -0.7225433588027954, + "logps/chosen": -299.53594970703125, + "logps/rejected": -275.5718688964844, + "logps/weighted_chosen": -2.667529344558716, + "logps/weighted_rejected": -2.800830125808716, + "loss": 0.6916, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -16.816015243530273, + "rewards/margins": 9.876562118530273, + "rewards/rejected": -26.690235137939453, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.15018615126609802, + "rewards/weighted_margins": 0.301962286233902, + "rewards/weighted_rejected": -0.45206451416015625, + "step": 360 + }, + { + "epoch": 0.19366657942946872, + "grad_norm": 29.441747665405273, + "learning_rate": 9.740675107979355e-07, + "logits/chosen": -0.6865798830986023, + "logits/rejected": -0.7117553949356079, + "logps/chosen": -331.06561279296875, + "logps/rejected": -300.31719970703125, + "logps/weighted_chosen": -1.9907715320587158, + "logps/weighted_rejected": -2.932177782058716, + "loss": 0.6819, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -16.022266387939453, + "rewards/margins": 9.630078315734863, + "rewards/rejected": -25.654491424560547, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.18877258896827698, + "rewards/weighted_margins": 0.2808380126953125, + "rewards/weighted_rejected": -0.4699081480503082, + "step": 370 + }, + { + "epoch": 0.19890081130594087, + "grad_norm": 29.483524322509766, + "learning_rate": 9.71083876413323e-07, + "logits/chosen": -0.6637862920761108, + "logits/rejected": -0.669873058795929, + "logps/chosen": -322.3882751464844, + "logps/rejected": -300.85858154296875, + "logps/weighted_chosen": -2.189379930496216, + "logps/weighted_rejected": -2.9217162132263184, + "loss": 0.6846, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -20.004688262939453, + "rewards/margins": 11.246289253234863, + "rewards/rejected": -31.24609375, + "rewards/weighted_accuracies": 0.6000000238418579, + "rewards/weighted_chosen": -0.22467346489429474, + "rewards/weighted_margins": 0.2720580995082855, + "rewards/weighted_rejected": -0.4967102110385895, + "step": 380 + }, + { + "epoch": 0.204135043182413, + "grad_norm": 20.563907623291016, + "learning_rate": 9.67942903840751e-07, + "logits/chosen": -0.7051689028739929, + "logits/rejected": -0.7537201046943665, + "logps/chosen": -324.1015625, + "logps/rejected": -310.375, + "logps/weighted_chosen": -2.397631883621216, + "logps/weighted_rejected": -2.950610399246216, + "loss": 0.6478, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -19.788671493530273, + "rewards/margins": 16.317577362060547, + "rewards/rejected": -36.111328125, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.22498169541358948, + "rewards/weighted_margins": 0.3520751893520355, + "rewards/weighted_rejected": -0.5770629644393921, + "step": 390 + }, + { + "epoch": 0.2093692750588851, + "grad_norm": 23.1771183013916, + "learning_rate": 9.646456421395447e-07, + "logits/chosen": -0.7504974603652954, + "logits/rejected": -0.7628723382949829, + "logps/chosen": -341.2171936035156, + "logps/rejected": -343.9375, + "logps/weighted_chosen": -2.2680420875549316, + "logps/weighted_rejected": -3.0065674781799316, + "loss": 0.6746, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -22.944530487060547, + "rewards/margins": 17.315624237060547, + "rewards/rejected": -40.2587890625, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.20337525010108948, + "rewards/weighted_margins": 0.216084286570549, + "rewards/weighted_rejected": -0.4196624755859375, + "step": 400 + }, + { + "epoch": 0.21460350693535724, + "grad_norm": 14.420520782470703, + "learning_rate": 9.611931925683266e-07, + "logits/chosen": -0.7154334783554077, + "logits/rejected": -0.7491073608398438, + "logps/chosen": -331.38983154296875, + "logps/rejected": -303.3890686035156, + "logps/weighted_chosen": -2.190844774246216, + "logps/weighted_rejected": -2.7472167015075684, + "loss": 0.6135, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -25.111621856689453, + "rewards/margins": 14.543554306030273, + "rewards/rejected": -39.658592224121094, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.21762695908546448, + "rewards/weighted_margins": 0.36674195528030396, + "rewards/weighted_rejected": -0.584503173828125, + "step": 410 + }, + { + "epoch": 0.21983773881182936, + "grad_norm": 27.429603576660156, + "learning_rate": 9.575867082172085e-07, + "logits/chosen": -0.7379547357559204, + "logits/rejected": -0.7826202511787415, + "logps/chosen": -337.46405029296875, + "logps/rejected": -320.23907470703125, + "logps/weighted_chosen": -2.6387085914611816, + "logps/weighted_rejected": -2.712329149246216, + "loss": 0.6716, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -32.554298400878906, + "rewards/margins": 17.513866424560547, + "rewards/rejected": -50.060157775878906, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.35613709688186646, + "rewards/weighted_margins": 0.38392335176467896, + "rewards/weighted_rejected": -0.7401062250137329, + "step": 420 + }, + { + "epoch": 0.22507197068830148, + "grad_norm": 18.97144889831543, + "learning_rate": 9.538273936226673e-07, + "logits/chosen": -0.778491199016571, + "logits/rejected": -0.811004638671875, + "logps/chosen": -292.83984375, + "logps/rejected": -304.05352783203125, + "logps/weighted_chosen": -2.796630859375, + "logps/weighted_rejected": -3.2444825172424316, + "loss": 0.6544, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -27.327733993530273, + "rewards/margins": 12.26318359375, + "rewards/rejected": -39.58984375, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.21069030463695526, + "rewards/weighted_margins": 0.3678832948207855, + "rewards/weighted_rejected": -0.57806396484375, + "step": 430 + }, + { + "epoch": 0.23030620256477363, + "grad_norm": 25.43462371826172, + "learning_rate": 9.499165043652391e-07, + "logits/chosen": -0.7674010992050171, + "logits/rejected": -0.7686828374862671, + "logps/chosen": -319.55859375, + "logps/rejected": -309.03436279296875, + "logps/weighted_chosen": -2.82305908203125, + "logps/weighted_rejected": -2.993237257003784, + "loss": 0.631, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -28.386133193969727, + "rewards/margins": 13.9384765625, + "rewards/rejected": -42.326072692871094, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.26903897523880005, + "rewards/weighted_margins": 0.353515625, + "rewards/weighted_rejected": -0.622546374797821, + "step": 440 + }, + { + "epoch": 0.23554043444124576, + "grad_norm": 65.78443908691406, + "learning_rate": 9.458553466501665e-07, + "logits/chosen": -0.8066772222518921, + "logits/rejected": -0.8363037109375, + "logps/chosen": -314.7945251464844, + "logps/rejected": -287.65313720703125, + "logps/weighted_chosen": -2.8233399391174316, + "logps/weighted_rejected": -3.013622999191284, + "loss": 0.6831, + "rewards/accuracies": 0.640625, + "rewards/chosen": -28.642578125, + "rewards/margins": 15.162694931030273, + "rewards/rejected": -43.80976486206055, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.3802246153354645, + "rewards/weighted_margins": 0.3342132568359375, + "rewards/weighted_rejected": -0.7139984369277954, + "step": 450 + }, + { + "epoch": 0.24077466631771788, + "grad_norm": 13.290085792541504, + "learning_rate": 9.416452768711366e-07, + "logits/chosen": -0.7957550287246704, + "logits/rejected": -0.8287414312362671, + "logps/chosen": -323.0093688964844, + "logps/rejected": -306.98907470703125, + "logps/weighted_chosen": -2.544872999191284, + "logps/weighted_rejected": -3.100903272628784, + "loss": 0.6759, + "rewards/accuracies": 0.640625, + "rewards/chosen": -30.906835556030273, + "rewards/margins": 16.355859756469727, + "rewards/rejected": -47.24980545043945, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.343759149312973, + "rewards/weighted_margins": 0.40337830781936646, + "rewards/weighted_rejected": -0.747100830078125, + "step": 460 + }, + { + "epoch": 0.24600889819419, + "grad_norm": 21.766939163208008, + "learning_rate": 9.372877011572557e-07, + "logits/chosen": -0.7200164794921875, + "logits/rejected": -0.742279052734375, + "logps/chosen": -342.75079345703125, + "logps/rejected": -318.60626220703125, + "logps/weighted_chosen": -2.5311522483825684, + "logps/weighted_rejected": -2.990124464035034, + "loss": 0.63, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -32.077247619628906, + "rewards/margins": 11.883398056030273, + "rewards/rejected": -43.959373474121094, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.30525511503219604, + "rewards/weighted_margins": 0.45678406953811646, + "rewards/weighted_rejected": -0.7624969482421875, + "step": 470 + }, + { + "epoch": 0.2512431300706621, + "grad_norm": 28.76239776611328, + "learning_rate": 9.327840749034141e-07, + "logits/chosen": -0.7930053472518921, + "logits/rejected": -0.8311401605606079, + "logps/chosen": -316.79998779296875, + "logps/rejected": -326.5062561035156, + "logps/weighted_chosen": -2.4120116233825684, + "logps/weighted_rejected": -3.591870069503784, + "loss": 0.6639, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -29.640039443969727, + "rewards/margins": 20.350976943969727, + "rewards/rejected": -49.9853515625, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.27521055936813354, + "rewards/weighted_margins": 0.4522705078125, + "rewards/weighted_rejected": -0.7274719476699829, + "step": 480 + }, + { + "epoch": 0.2564773619471343, + "grad_norm": 36.629127502441406, + "learning_rate": 9.281359022841965e-07, + "logits/chosen": -0.72747802734375, + "logits/rejected": -0.7426910400390625, + "logps/chosen": -308.5406188964844, + "logps/rejected": -300.71484375, + "logps/weighted_chosen": -2.6044554710388184, + "logps/weighted_rejected": -3.862866163253784, + "loss": 0.6178, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -33.54804611206055, + "rewards/margins": 22.3515625, + "rewards/rejected": -55.88496017456055, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.38198548555374146, + "rewards/weighted_margins": 0.525561511516571, + "rewards/weighted_rejected": -0.9073349237442017, + "step": 490 + }, + { + "epoch": 0.26171159382360637, + "grad_norm": 20.296154022216797, + "learning_rate": 9.233447357514989e-07, + "logits/chosen": -0.7092193365097046, + "logits/rejected": -0.751629650592804, + "logps/chosen": -337.10467529296875, + "logps/rejected": -328.71875, + "logps/weighted_chosen": -3.054370164871216, + "logps/weighted_rejected": -3.5334715843200684, + "loss": 0.6534, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -39.15234375, + "rewards/margins": 19.770116806030273, + "rewards/rejected": -58.90898513793945, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.470510870218277, + "rewards/weighted_margins": 0.566607654094696, + "rewards/weighted_rejected": -1.0376465320587158, + "step": 500 + }, + { + "epoch": 0.26171159382360637, + "eval_logits/chosen": -0.8052441477775574, + "eval_logits/rejected": -0.8225547075271606, + "eval_logps/chosen": -333.44000244140625, + "eval_logps/rejected": -331.98199462890625, + "eval_logps/weighted_chosen": -2.756896734237671, + "eval_logps/weighted_rejected": -3.441680908203125, + "eval_loss": 0.6561886668205261, + "eval_rewards/accuracies": 0.6370000243186951, + "eval_rewards/chosen": -44.67877960205078, + "eval_rewards/margins": 19.602703094482422, + "eval_rewards/rejected": -64.27362823486328, + "eval_rewards/weighted_accuracies": 0.6445000171661377, + "eval_rewards/weighted_chosen": -0.485819548368454, + "eval_rewards/weighted_margins": 0.4620407819747925, + "eval_rewards/weighted_rejected": -0.9478604793548584, + "eval_runtime": 1263.3333, + "eval_samples_per_second": 1.583, + "eval_steps_per_second": 0.396, + "step": 500 + }, + { + "epoch": 0.2669458257000785, + "grad_norm": 44.70832824707031, + "learning_rate": 9.184121755160232e-07, + "logits/chosen": -0.7849181890487671, + "logits/rejected": -0.8171790838241577, + "logps/chosen": -344.84454345703125, + "logps/rejected": -356.3671875, + "logps/weighted_chosen": -2.9210448265075684, + "logps/weighted_rejected": -3.3969483375549316, + "loss": 0.6735, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -43.604881286621094, + "rewards/margins": 24.157032012939453, + "rewards/rejected": -67.75703430175781, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.43794554471969604, + "rewards/weighted_margins": 0.407052606344223, + "rewards/weighted_rejected": -0.8448547124862671, + "step": 510 + }, + { + "epoch": 0.2721800575765506, + "grad_norm": 34.907981872558594, + "learning_rate": 9.133398690128193e-07, + "logits/chosen": -0.8243468999862671, + "logits/rejected": -0.852618396282196, + "logps/chosen": -370.2640686035156, + "logps/rejected": -357.4296875, + "logps/weighted_chosen": -2.7059326171875, + "logps/weighted_rejected": -3.559033155441284, + "loss": 0.6233, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -48.807029724121094, + "rewards/margins": 28.050586700439453, + "rewards/rejected": -76.85234069824219, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.3096374571323395, + "rewards/weighted_margins": 0.5285431146621704, + "rewards/weighted_rejected": -0.838287353515625, + "step": 520 + }, + { + "epoch": 0.27741428945302277, + "grad_norm": 17.412511825561523, + "learning_rate": 9.081295103510554e-07, + "logits/chosen": -0.7943557500839233, + "logits/rejected": -0.8541763424873352, + "logps/chosen": -339.65313720703125, + "logps/rejected": -351.77813720703125, + "logps/weighted_chosen": -2.331298828125, + "logps/weighted_rejected": -3.5838379859924316, + "loss": 0.5587, + "rewards/accuracies": 0.65625, + "rewards/chosen": -55.494140625, + "rewards/margins": 26.642187118530273, + "rewards/rejected": -82.14140319824219, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.33439940214157104, + "rewards/weighted_margins": 0.7252563238143921, + "rewards/weighted_rejected": -1.0597717761993408, + "step": 530 + }, + { + "epoch": 0.2826485213294949, + "grad_norm": 30.677711486816406, + "learning_rate": 9.027828397481989e-07, + "logits/chosen": -0.7925201654434204, + "logits/rejected": -0.8262939453125, + "logps/chosen": -318.59063720703125, + "logps/rejected": -337.55157470703125, + "logps/weighted_chosen": -3.059436082839966, + "logps/weighted_rejected": -3.750012159347534, + "loss": 0.6464, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -57.99492263793945, + "rewards/margins": 24.116796493530273, + "rewards/rejected": -82.107421875, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.507769763469696, + "rewards/weighted_margins": 0.4883270263671875, + "rewards/weighted_rejected": -0.995800793170929, + "step": 540 + }, + { + "epoch": 0.287882753205967, + "grad_norm": 34.808658599853516, + "learning_rate": 8.973016429487988e-07, + "logits/chosen": -0.8280746340751648, + "logits/rejected": -0.8393570184707642, + "logps/chosen": -340.02032470703125, + "logps/rejected": -340.90625, + "logps/weighted_chosen": -3.016833543777466, + "logps/weighted_rejected": -3.3585205078125, + "loss": 0.6373, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -58.115234375, + "rewards/margins": 28.575389862060547, + "rewards/rejected": -86.7035140991211, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.5330657958984375, + "rewards/weighted_margins": 0.48836976289749146, + "rewards/weighted_rejected": -1.0212554931640625, + "step": 550 + }, + { + "epoch": 0.29311698508243916, + "grad_norm": 21.559553146362305, + "learning_rate": 8.916877506280601e-07, + "logits/chosen": -0.8576828241348267, + "logits/rejected": -0.850115954875946, + "logps/chosen": -343.0625, + "logps/rejected": -340.73126220703125, + "logps/weighted_chosen": -3.004504442214966, + "logps/weighted_rejected": -3.3214111328125, + "loss": 0.6493, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -62.214454650878906, + "rewards/margins": 24.381053924560547, + "rewards/rejected": -86.59492492675781, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5099731683731079, + "rewards/weighted_margins": 0.535810112953186, + "rewards/weighted_rejected": -1.0458984375, + "step": 560 + }, + { + "epoch": 0.29835121695891126, + "grad_norm": 16.77034568786621, + "learning_rate": 8.85943037780415e-07, + "logits/chosen": -0.901629626750946, + "logits/rejected": -0.9031143188476562, + "logps/chosen": -347.7562561035156, + "logps/rejected": -320.90936279296875, + "logps/weighted_chosen": -2.9189209938049316, + "logps/weighted_rejected": -3.346874952316284, + "loss": 0.6796, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -61.52226638793945, + "rewards/margins": 17.764842987060547, + "rewards/rejected": -79.3128890991211, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.5591338872909546, + "rewards/weighted_margins": 0.44241029024124146, + "rewards/weighted_rejected": -1.00177001953125, + "step": 570 + }, + { + "epoch": 0.3035854488353834, + "grad_norm": 23.179088592529297, + "learning_rate": 8.800694230932884e-07, + "logits/chosen": -0.808392345905304, + "logits/rejected": -0.8254486322402954, + "logps/chosen": -345.52032470703125, + "logps/rejected": -338.59844970703125, + "logps/weighted_chosen": -2.4705810546875, + "logps/weighted_rejected": -3.031982421875, + "loss": 0.6672, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -56.009376525878906, + "rewards/margins": 17.356250762939453, + "rewards/rejected": -73.3921890258789, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.3469276428222656, + "rewards/weighted_margins": 0.38211363554000854, + "rewards/weighted_rejected": -0.7289062738418579, + "step": 580 + }, + { + "epoch": 0.30881968071185556, + "grad_norm": 16.077539443969727, + "learning_rate": 8.740688683062723e-07, + "logits/chosen": -0.8602691888809204, + "logits/rejected": -0.874432384967804, + "logps/chosen": -382.03436279296875, + "logps/rejected": -349.27655029296875, + "logps/weighted_chosen": -2.4807372093200684, + "logps/weighted_rejected": -3.101879835128784, + "loss": 0.6615, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -52.95586013793945, + "rewards/margins": 20.580469131469727, + "rewards/rejected": -73.5445327758789, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.31566160917282104, + "rewards/weighted_margins": 0.39473265409469604, + "rewards/weighted_rejected": -0.7103912234306335, + "step": 590 + }, + { + "epoch": 0.31405391258832765, + "grad_norm": 17.0419864654541, + "learning_rate": 8.679433775559215e-07, + "logits/chosen": -0.8191520571708679, + "logits/rejected": -0.8663116693496704, + "logps/chosen": -379.78125, + "logps/rejected": -361.57501220703125, + "logps/weighted_chosen": -2.305920362472534, + "logps/weighted_rejected": -3.3094482421875, + "loss": 0.6241, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -55.09687423706055, + "rewards/margins": 20.994531631469727, + "rewards/rejected": -76.0738296508789, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.31669920682907104, + "rewards/weighted_margins": 0.4563964903354645, + "rewards/weighted_rejected": -0.7728790044784546, + "step": 600 + }, + { + "epoch": 0.3192881444647998, + "grad_norm": 30.722089767456055, + "learning_rate": 8.616949967063871e-07, + "logits/chosen": -0.7851959466934204, + "logits/rejected": -0.82568359375, + "logps/chosen": -323.5859375, + "logps/rejected": -338.609375, + "logps/weighted_chosen": -2.7684326171875, + "logps/weighted_rejected": -3.191943407058716, + "loss": 0.6918, + "rewards/accuracies": 0.684374988079071, + "rewards/chosen": -55.26250076293945, + "rewards/margins": 22.568164825439453, + "rewards/rejected": -77.8238296508789, + "rewards/weighted_accuracies": 0.625, + "rewards/weighted_chosen": -0.423666387796402, + "rewards/weighted_margins": 0.310333251953125, + "rewards/weighted_rejected": -0.733630359172821, + "step": 610 + }, + { + "epoch": 0.3245223763412719, + "grad_norm": 14.894518852233887, + "learning_rate": 8.553258126661154e-07, + "logits/chosen": -0.831768810749054, + "logits/rejected": -0.846484363079071, + "logps/chosen": -338.09686279296875, + "logps/rejected": -336.015625, + "logps/weighted_chosen": -2.84112548828125, + "logps/weighted_rejected": -3.4341063499450684, + "loss": 0.708, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -58.20390701293945, + "rewards/margins": 21.783594131469727, + "rewards/rejected": -79.9749984741211, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.4535583555698395, + "rewards/weighted_margins": 0.33486634492874146, + "rewards/weighted_rejected": -0.788104236125946, + "step": 620 + }, + { + "epoch": 0.32975660821774405, + "grad_norm": 23.813823699951172, + "learning_rate": 8.488379526908368e-07, + "logits/chosen": -0.826812744140625, + "logits/rejected": -0.837506115436554, + "logps/chosen": -352.4593811035156, + "logps/rejected": -357.29376220703125, + "logps/weighted_chosen": -2.6490235328674316, + "logps/weighted_rejected": -3.211181640625, + "loss": 0.6454, + "rewards/accuracies": 0.640625, + "rewards/chosen": -61.892189025878906, + "rewards/margins": 26.424219131469727, + "rewards/rejected": -88.33320617675781, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.3862060606479645, + "rewards/weighted_margins": 0.415771484375, + "rewards/weighted_rejected": -0.8020385503768921, + "step": 630 + }, + { + "epoch": 0.33499084009421615, + "grad_norm": 19.41891098022461, + "learning_rate": 8.422335836730802e-07, + "logits/chosen": -0.7994629144668579, + "logits/rejected": -0.7995041012763977, + "logps/chosen": -333.2593688964844, + "logps/rejected": -366.6499938964844, + "logps/weighted_chosen": -2.6496825218200684, + "logps/weighted_rejected": -3.1250548362731934, + "loss": 0.6734, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -59.58320236206055, + "rewards/margins": 29.476757049560547, + "rewards/rejected": -89.0625, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.47590941190719604, + "rewards/weighted_margins": 0.394134521484375, + "rewards/weighted_rejected": -0.8701080083847046, + "step": 640 + }, + { + "epoch": 0.3402250719706883, + "grad_norm": 16.819276809692383, + "learning_rate": 8.355149114184485e-07, + "logits/chosen": -0.846386730670929, + "logits/rejected": -0.8338836431503296, + "logps/chosen": -370.0062561035156, + "logps/rejected": -376.0843811035156, + "logps/weighted_chosen": -2.787951707839966, + "logps/weighted_rejected": -3.106738328933716, + "loss": 0.6483, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -62.594337463378906, + "rewards/margins": 31.204687118530273, + "rewards/rejected": -93.80000305175781, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.5348541140556335, + "rewards/weighted_margins": 0.446258544921875, + "rewards/weighted_rejected": -0.980926513671875, + "step": 650 + }, + { + "epoch": 0.34545930384716045, + "grad_norm": 18.66504669189453, + "learning_rate": 8.286841799088963e-07, + "logits/chosen": -0.8683761358261108, + "logits/rejected": -0.860211193561554, + "logps/chosen": -344.94061279296875, + "logps/rejected": -343.61407470703125, + "logps/weighted_chosen": -2.3468871116638184, + "logps/weighted_rejected": -2.966168165206909, + "loss": 0.6577, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -60.857032775878906, + "rewards/margins": 19.649999618530273, + "rewards/rejected": -80.50117492675781, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.4206695556640625, + "rewards/weighted_margins": 0.41551512479782104, + "rewards/weighted_rejected": -0.8359512090682983, + "step": 660 + }, + { + "epoch": 0.35069353572363254, + "grad_norm": 26.941055297851562, + "learning_rate": 8.217436705532599e-07, + "logits/chosen": -0.8248077630996704, + "logits/rejected": -0.8512848019599915, + "logps/chosen": -369.5484313964844, + "logps/rejected": -348.30157470703125, + "logps/weighted_chosen": -2.3807740211486816, + "logps/weighted_rejected": -3.006176710128784, + "loss": 0.6373, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -66.71875, + "rewards/margins": 17.846094131469727, + "rewards/rejected": -84.57929992675781, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.3303161561489105, + "rewards/weighted_margins": 0.45032960176467896, + "rewards/weighted_rejected": -0.7810913324356079, + "step": 670 + }, + { + "epoch": 0.3559277676001047, + "grad_norm": 1516.0828857421875, + "learning_rate": 8.14695701425284e-07, + "logits/chosen": -0.8020523190498352, + "logits/rejected": -0.845899224281311, + "logps/chosen": -371.13751220703125, + "logps/rejected": -352.38592529296875, + "logps/weighted_chosen": -2.49072265625, + "logps/weighted_rejected": -3.2708497047424316, + "loss": 0.5885, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -65.716796875, + "rewards/margins": 24.369531631469727, + "rewards/rejected": -90.1097640991211, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.330526739358902, + "rewards/weighted_margins": 0.5371948480606079, + "rewards/weighted_rejected": -0.867462158203125, + "step": 680 + }, + { + "epoch": 0.3611619994765768, + "grad_norm": 295.9232482910156, + "learning_rate": 8.075426264894046e-07, + "logits/chosen": -0.7686309814453125, + "logits/rejected": -0.805737316608429, + "logps/chosen": -370.75, + "logps/rejected": -373.64373779296875, + "logps/weighted_chosen": -2.509265184402466, + "logps/weighted_rejected": -3.719970703125, + "loss": 0.5532, + "rewards/accuracies": 0.6875, + "rewards/chosen": -65.3414077758789, + "rewards/margins": 30.711523056030273, + "rewards/rejected": -96.052734375, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -0.3384948670864105, + "rewards/weighted_margins": 0.6478027105331421, + "rewards/weighted_rejected": -0.9860213994979858, + "step": 690 + }, + { + "epoch": 0.36639623135304894, + "grad_norm": 80.06324768066406, + "learning_rate": 8.002868348145435e-07, + "logits/chosen": -0.7615035772323608, + "logits/rejected": -0.758954644203186, + "logps/chosen": -364.390625, + "logps/rejected": -353.75, + "logps/weighted_chosen": -2.562756299972534, + "logps/weighted_rejected": -2.821521043777466, + "loss": 0.6219, + "rewards/accuracies": 0.609375, + "rewards/chosen": -64.1957015991211, + "rewards/margins": 21.513866424560547, + "rewards/rejected": -85.70429992675781, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.38551026582717896, + "rewards/weighted_margins": 0.45988160371780396, + "rewards/weighted_rejected": -0.8446716070175171, + "step": 700 + }, + { + "epoch": 0.3716304632295211, + "grad_norm": 1476.4896240234375, + "learning_rate": 7.92930749776179e-07, + "logits/chosen": -0.75201416015625, + "logits/rejected": -0.7803069949150085, + "logps/chosen": -337.1851501464844, + "logps/rejected": -346.5375061035156, + "logps/weighted_chosen": -2.752087354660034, + "logps/weighted_rejected": -3.417065382003784, + "loss": 0.6452, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -65.52070617675781, + "rewards/margins": 20.681640625, + "rewards/rejected": -86.203125, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.2934509217739105, + "rewards/weighted_margins": 0.537158191204071, + "rewards/weighted_rejected": -0.8301132321357727, + "step": 710 + }, + { + "epoch": 0.3768646951059932, + "grad_norm": 37.869117736816406, + "learning_rate": 7.854768282469582e-07, + "logits/chosen": -0.814867377281189, + "logits/rejected": -0.8494598269462585, + "logps/chosen": -332.6953125, + "logps/rejected": -362.8187561035156, + "logps/weighted_chosen": -2.5288939476013184, + "logps/weighted_rejected": -3.1172118186950684, + "loss": 0.6496, + "rewards/accuracies": 0.640625, + "rewards/chosen": -55.247657775878906, + "rewards/margins": 29.8330078125, + "rewards/rejected": -85.080078125, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.355978399515152, + "rewards/weighted_margins": 0.4522338807582855, + "rewards/weighted_rejected": -0.8084503412246704, + "step": 720 + }, + { + "epoch": 0.38209892698246534, + "grad_norm": 48.1231575012207, + "learning_rate": 7.779275597761215e-07, + "logits/chosen": -0.7673202753067017, + "logits/rejected": -0.81195068359375, + "logps/chosen": -330.94451904296875, + "logps/rejected": -355.31561279296875, + "logps/weighted_chosen": -2.6014404296875, + "logps/weighted_rejected": -3.1932616233825684, + "loss": 0.5952, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -54.890235900878906, + "rewards/margins": 31.880468368530273, + "rewards/rejected": -86.75312805175781, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.2228240966796875, + "rewards/weighted_margins": 0.6149749755859375, + "rewards/weighted_rejected": -0.8373657464981079, + "step": 730 + }, + { + "epoch": 0.38733315885893743, + "grad_norm": 296.0299377441406, + "learning_rate": 7.702854657580126e-07, + "logits/chosen": -0.8295089602470398, + "logits/rejected": -0.843798816204071, + "logps/chosen": -352.19842529296875, + "logps/rejected": -333.7593688964844, + "logps/weighted_chosen": -2.45697021484375, + "logps/weighted_rejected": -3.375244140625, + "loss": 0.6318, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -57.54804611206055, + "rewards/margins": 21.8720703125, + "rewards/rejected": -79.4625015258789, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.19748535752296448, + "rewards/weighted_margins": 0.5647827386856079, + "rewards/weighted_rejected": -0.762377917766571, + "step": 740 + }, + { + "epoch": 0.3925673907354096, + "grad_norm": 222.2068328857422, + "learning_rate": 7.625530985899547e-07, + "logits/chosen": -0.8145691156387329, + "logits/rejected": -0.8263305425643921, + "logps/chosen": -328.7578125, + "logps/rejected": -331.390625, + "logps/weighted_chosen": -2.581188917160034, + "logps/weighted_rejected": -3.505688428878784, + "loss": 0.6377, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -61.987892150878906, + "rewards/margins": 24.421483993530273, + "rewards/rejected": -86.39921569824219, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.493093878030777, + "rewards/weighted_margins": 0.4742370545864105, + "rewards/weighted_rejected": -0.9672302007675171, + "step": 750 + }, + { + "epoch": 0.39780162261188173, + "grad_norm": 34.57517623901367, + "learning_rate": 7.547330408197694e-07, + "logits/chosen": -0.8249969482421875, + "logits/rejected": -0.8720428347587585, + "logps/chosen": -363.44219970703125, + "logps/rejected": -345.2250061035156, + "logps/weighted_chosen": -2.4618163108825684, + "logps/weighted_rejected": -3.28759765625, + "loss": 0.6383, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -67.7855453491211, + "rewards/margins": 21.181835174560547, + "rewards/rejected": -88.9535140991211, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.3757568299770355, + "rewards/weighted_margins": 0.4518585205078125, + "rewards/weighted_rejected": -0.8270477056503296, + "step": 760 + }, + { + "epoch": 0.40303585448835383, + "grad_norm": 26.55402946472168, + "learning_rate": 7.468279042832271e-07, + "logits/chosen": -0.8424628973007202, + "logits/rejected": -0.8771301507949829, + "logps/chosen": -347.6484375, + "logps/rejected": -388.42266845703125, + "logps/weighted_chosen": -2.660815477371216, + "logps/weighted_rejected": -3.1720213890075684, + "loss": 0.6743, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -67.98554992675781, + "rewards/margins": 30.81640625, + "rewards/rejected": -98.75859069824219, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.5194793939590454, + "rewards/weighted_margins": 0.3566345274448395, + "rewards/weighted_rejected": -0.8764098882675171, + "step": 770 + }, + { + "epoch": 0.408270086364826, + "grad_norm": 14.42599868774414, + "learning_rate": 7.388403292317154e-07, + "logits/chosen": -0.7979522943496704, + "logits/rejected": -0.8573578000068665, + "logps/chosen": -367.4046936035156, + "logps/rejected": -358.2515563964844, + "logps/weighted_chosen": -2.490283250808716, + "logps/weighted_rejected": -3.1418213844299316, + "loss": 0.6398, + "rewards/accuracies": 0.65625, + "rewards/chosen": -66.2933578491211, + "rewards/margins": 26.066015243530273, + "rewards/rejected": -92.384765625, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.461639404296875, + "rewards/weighted_margins": 0.452981561422348, + "rewards/weighted_rejected": -0.914324939250946, + "step": 780 + }, + { + "epoch": 0.4135043182412981, + "grad_norm": 21.45990753173828, + "learning_rate": 7.307729834504154e-07, + "logits/chosen": -0.8032287359237671, + "logits/rejected": -0.8670104742050171, + "logps/chosen": -351.39532470703125, + "logps/rejected": -360.1734313964844, + "logps/weighted_chosen": -2.3328614234924316, + "logps/weighted_rejected": -3.198779344558716, + "loss": 0.6356, + "rewards/accuracies": 0.609375, + "rewards/chosen": -69.6167984008789, + "rewards/margins": 24.462499618530273, + "rewards/rejected": -94.07890319824219, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.3527267575263977, + "rewards/weighted_margins": 0.45725250244140625, + "rewards/weighted_rejected": -0.8100005984306335, + "step": 790 + }, + { + "epoch": 0.4187385501177702, + "grad_norm": 20.13976287841797, + "learning_rate": 7.226285613672847e-07, + "logits/chosen": -0.741473376750946, + "logits/rejected": -0.7820758819580078, + "logps/chosen": -342.1156311035156, + "logps/rejected": -382.6937561035156, + "logps/weighted_chosen": -2.3370361328125, + "logps/weighted_rejected": -3.3424315452575684, + "loss": 0.6236, + "rewards/accuracies": 0.703125, + "rewards/chosen": -64.8167953491211, + "rewards/margins": 39.174217224121094, + "rewards/rejected": -103.96992492675781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.34785765409469604, + "rewards/weighted_margins": 0.5167236328125, + "rewards/weighted_rejected": -0.864398181438446, + "step": 800 + }, + { + "epoch": 0.4239727819942423, + "grad_norm": 21.571788787841797, + "learning_rate": 7.144097831531398e-07, + "logits/chosen": -0.6900985836982727, + "logits/rejected": -0.7189788818359375, + "logps/chosen": -344.78436279296875, + "logps/rejected": -364.53436279296875, + "logps/weighted_chosen": -2.346606492996216, + "logps/weighted_rejected": -3.139209032058716, + "loss": 0.6171, + "rewards/accuracies": 0.640625, + "rewards/chosen": -74.72969055175781, + "rewards/margins": 25.596094131469727, + "rewards/rejected": -100.31640625, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.332855224609375, + "rewards/weighted_margins": 0.513507068157196, + "rewards/weighted_rejected": -0.8466736078262329, + "step": 810 + }, + { + "epoch": 0.42920701387071447, + "grad_norm": 23.761091232299805, + "learning_rate": 7.061193938131396e-07, + "logits/chosen": -0.620227038860321, + "logits/rejected": -0.6747413873672485, + "logps/chosen": -377.5609436035156, + "logps/rejected": -363.4296875, + "logps/weighted_chosen": -2.772265672683716, + "logps/weighted_rejected": -3.069580078125, + "loss": 0.6365, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -77.7945327758789, + "rewards/margins": 20.221874237060547, + "rewards/rejected": -97.98515319824219, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.3748535215854645, + "rewards/weighted_margins": 0.473724365234375, + "rewards/weighted_rejected": -0.8482757806777954, + "step": 820 + }, + { + "epoch": 0.4344412457471866, + "grad_norm": 21.571779251098633, + "learning_rate": 6.977601622699789e-07, + "logits/chosen": -0.689013659954071, + "logits/rejected": -0.7498534917831421, + "logps/chosen": -354.41876220703125, + "logps/rejected": -392.3500061035156, + "logps/weighted_chosen": -2.6583251953125, + "logps/weighted_rejected": -3.377002000808716, + "loss": 0.5618, + "rewards/accuracies": 0.7093750238418579, + "rewards/chosen": -69.55390930175781, + "rewards/margins": 43.986717224121094, + "rewards/rejected": -113.5484390258789, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -0.28594970703125, + "rewards/weighted_margins": 0.6694701910018921, + "rewards/weighted_rejected": -0.955474853515625, + "step": 830 + }, + { + "epoch": 0.4396754776236587, + "grad_norm": 21.845787048339844, + "learning_rate": 6.893348804390882e-07, + "logits/chosen": -0.7911956906318665, + "logits/rejected": -0.8087249994277954, + "logps/chosen": -377.0531311035156, + "logps/rejected": -377.48126220703125, + "logps/weighted_chosen": -2.844287157058716, + "logps/weighted_rejected": -3.24560546875, + "loss": 0.5927, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -81.9203109741211, + "rewards/margins": 35.79375076293945, + "rewards/rejected": -117.70625305175781, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.3613952696323395, + "rewards/weighted_margins": 0.5852203369140625, + "rewards/weighted_rejected": -0.94671630859375, + "step": 840 + }, + { + "epoch": 0.44490970950013087, + "grad_norm": 13.673724174499512, + "learning_rate": 6.808463622961578e-07, + "logits/chosen": -0.765423595905304, + "logits/rejected": -0.8230966329574585, + "logps/chosen": -385.33905029296875, + "logps/rejected": -413.21563720703125, + "logps/weighted_chosen": -2.7145752906799316, + "logps/weighted_rejected": -3.412890672683716, + "loss": 0.5718, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -92.6429672241211, + "rewards/margins": 38.67695236206055, + "rewards/rejected": -131.3136749267578, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.43825072050094604, + "rewards/weighted_margins": 0.649249255657196, + "rewards/weighted_rejected": -1.0877685546875, + "step": 850 + }, + { + "epoch": 0.45014394137660296, + "grad_norm": 17.156784057617188, + "learning_rate": 6.722974429372925e-07, + "logits/chosen": -0.733477771282196, + "logits/rejected": -0.7933975458145142, + "logps/chosen": -418.08282470703125, + "logps/rejected": -417.9937438964844, + "logps/weighted_chosen": -2.5140380859375, + "logps/weighted_rejected": -3.9316039085388184, + "loss": 0.5611, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -114.50508117675781, + "rewards/margins": 41.392189025878906, + "rewards/rejected": -155.9523468017578, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.5683807134628296, + "rewards/weighted_margins": 0.804516613483429, + "rewards/weighted_rejected": -1.373052954673767, + "step": 860 + }, + { + "epoch": 0.4553781732530751, + "grad_norm": 23.082002639770508, + "learning_rate": 6.636909776321128e-07, + "logits/chosen": -0.8063064813613892, + "logits/rejected": -0.802105724811554, + "logps/chosen": -369.12811279296875, + "logps/rejected": -413.70782470703125, + "logps/weighted_chosen": -2.945758104324341, + "logps/weighted_rejected": -3.6049561500549316, + "loss": 0.5946, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -107.4222640991211, + "rewards/margins": 41.763671875, + "rewards/rejected": -149.2078094482422, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.640515148639679, + "rewards/weighted_margins": 0.601641833782196, + "rewards/weighted_rejected": -1.2423064708709717, + "step": 870 + }, + { + "epoch": 0.46061240512954726, + "grad_norm": 27.672487258911133, + "learning_rate": 6.550298408701174e-07, + "logits/chosen": -0.778796374797821, + "logits/rejected": -0.830426037311554, + "logps/chosen": -389.8421936035156, + "logps/rejected": -428.6312561035156, + "logps/weighted_chosen": -3.13909912109375, + "logps/weighted_rejected": -4.012915134429932, + "loss": 0.6358, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -103.14042663574219, + "rewards/margins": 43.176368713378906, + "rewards/rejected": -146.2609405517578, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.5248870849609375, + "rewards/weighted_margins": 0.555926501750946, + "rewards/weighted_rejected": -1.0807831287384033, + "step": 880 + }, + { + "epoch": 0.46584663700601936, + "grad_norm": 20.153644561767578, + "learning_rate": 6.463169254006276e-07, + "logits/chosen": -0.7750915288925171, + "logits/rejected": -0.8219833374023438, + "logps/chosen": -377.8421936035156, + "logps/rejected": -378.10467529296875, + "logps/weighted_chosen": -2.7843995094299316, + "logps/weighted_rejected": -3.576098680496216, + "loss": 0.5705, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -96.46601867675781, + "rewards/margins": 34.888282775878906, + "rewards/rejected": -131.3464813232422, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.39057618379592896, + "rewards/weighted_margins": 0.706072986125946, + "rewards/weighted_rejected": -1.0967223644256592, + "step": 890 + }, + { + "epoch": 0.4710808688824915, + "grad_norm": 42.77175521850586, + "learning_rate": 6.375551412666326e-07, + "logits/chosen": -0.7759063839912415, + "logits/rejected": -0.8005096316337585, + "logps/chosen": -379.19219970703125, + "logps/rejected": -388.72344970703125, + "logps/weighted_chosen": -2.5501952171325684, + "logps/weighted_rejected": -3.5579466819763184, + "loss": 0.6503, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -105.25859069824219, + "rewards/margins": 27.150781631469727, + "rewards/rejected": -132.42733764648438, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.5587005615234375, + "rewards/weighted_margins": 0.5813232660293579, + "rewards/weighted_rejected": -1.1405792236328125, + "step": 900 + }, + { + "epoch": 0.4763151007589636, + "grad_norm": 24.800992965698242, + "learning_rate": 6.287474148328583e-07, + "logits/chosen": -0.7202819585800171, + "logits/rejected": -0.7240753173828125, + "logps/chosen": -371.1812438964844, + "logps/rejected": -371.109375, + "logps/weighted_chosen": -2.950915575027466, + "logps/weighted_rejected": -4.175073146820068, + "loss": 0.6282, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -100.2894515991211, + "rewards/margins": 23.904687881469727, + "rewards/rejected": -124.20625305175781, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.5755615234375, + "rewards/weighted_margins": 0.5273803472518921, + "rewards/weighted_rejected": -1.1032683849334717, + "step": 910 + }, + { + "epoch": 0.48154933263543576, + "grad_norm": 18.644733428955078, + "learning_rate": 6.198966878083857e-07, + "logits/chosen": -0.7572265863418579, + "logits/rejected": -0.7787246704101562, + "logps/chosen": -368.4359436035156, + "logps/rejected": -402.46563720703125, + "logps/weighted_chosen": -2.8515868186950684, + "logps/weighted_rejected": -3.4952635765075684, + "loss": 0.6159, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -97.32890319824219, + "rewards/margins": 38.184959411621094, + "rewards/rejected": -135.45703125, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.4532226622104645, + "rewards/weighted_margins": 0.559436023235321, + "rewards/weighted_rejected": -1.012457251548767, + "step": 920 + }, + { + "epoch": 0.48678356451190785, + "grad_norm": 44.07575988769531, + "learning_rate": 6.110059162641439e-07, + "logits/chosen": -0.7723480463027954, + "logits/rejected": -0.802471935749054, + "logps/chosen": -375.984375, + "logps/rejected": -391.7906188964844, + "logps/weighted_chosen": -2.391467332839966, + "logps/weighted_rejected": -3.1367430686950684, + "loss": 0.6244, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -93.6128921508789, + "rewards/margins": 30.975391387939453, + "rewards/rejected": -124.58906555175781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.4246048033237457, + "rewards/weighted_margins": 0.47894287109375, + "rewards/weighted_rejected": -0.904034435749054, + "step": 930 + }, + { + "epoch": 0.49201779638838, + "grad_norm": 26.321582794189453, + "learning_rate": 6.020780696456059e-07, + "logits/chosen": -0.7484909296035767, + "logits/rejected": -0.7926574945449829, + "logps/chosen": -359.21875, + "logps/rejected": -411.1890563964844, + "logps/weighted_chosen": -2.2330689430236816, + "logps/weighted_rejected": -3.3442625999450684, + "loss": 0.5653, + "rewards/accuracies": 0.6875, + "rewards/chosen": -88.5511703491211, + "rewards/margins": 55.419921875, + "rewards/rejected": -143.99453735351562, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.47887879610061646, + "rewards/weighted_margins": 0.6291259527206421, + "rewards/weighted_rejected": -1.1078612804412842, + "step": 940 + }, + { + "epoch": 0.49725202826485215, + "grad_norm": 44.13637924194336, + "learning_rate": 5.931161297810185e-07, + "logits/chosen": -0.8126861453056335, + "logits/rejected": -0.829357922077179, + "logps/chosen": -376.6875, + "logps/rejected": -399.51251220703125, + "logps/weighted_chosen": -3.1253294944763184, + "logps/weighted_rejected": -3.8741211891174316, + "loss": 0.6461, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -103.4652328491211, + "rewards/margins": 34.713279724121094, + "rewards/rejected": -138.1457061767578, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.616656482219696, + "rewards/weighted_margins": 0.5488006472587585, + "rewards/weighted_rejected": -1.1659362316131592, + "step": 950 + }, + { + "epoch": 0.5024862601413242, + "grad_norm": 24.634550094604492, + "learning_rate": 5.841230898854959e-07, + "logits/chosen": -0.742846667766571, + "logits/rejected": -0.765765368938446, + "logps/chosen": -421.84686279296875, + "logps/rejected": -429.4312438964844, + "logps/weighted_chosen": -2.9749999046325684, + "logps/weighted_rejected": -3.778076171875, + "loss": 0.6955, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -121.6539077758789, + "rewards/margins": 46.04961013793945, + "rewards/rejected": -167.69375610351562, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.820935070514679, + "rewards/weighted_margins": 0.548413097858429, + "rewards/weighted_rejected": -1.369299292564392, + "step": 960 + }, + { + "epoch": 0.5077204920177963, + "grad_norm": 28.748939514160156, + "learning_rate": 5.751019535613102e-07, + "logits/chosen": -0.6985992193222046, + "logits/rejected": -0.7225921750068665, + "logps/chosen": -365.7984313964844, + "logps/rejected": -400.90625, + "logps/weighted_chosen": -2.9286131858825684, + "logps/weighted_rejected": -4.021093845367432, + "loss": 0.6325, + "rewards/accuracies": 0.684374988079071, + "rewards/chosen": -106.0390625, + "rewards/margins": 47.953514099121094, + "rewards/rejected": -153.97265625, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.7157821655273438, + "rewards/weighted_margins": 0.7237914800643921, + "rewards/weighted_rejected": -1.439599633216858, + "step": 970 + }, + { + "epoch": 0.5129547238942685, + "grad_norm": 15.98474407196045, + "learning_rate": 5.660557337947117e-07, + "logits/chosen": -0.6841033697128296, + "logits/rejected": -0.6997619867324829, + "logps/chosen": -409.1937561035156, + "logps/rejected": -406.05938720703125, + "logps/weighted_chosen": -2.480639696121216, + "logps/weighted_rejected": -3.3584961891174316, + "loss": 0.5997, + "rewards/accuracies": 0.640625, + "rewards/chosen": -114.5328140258789, + "rewards/margins": 34.099998474121094, + "rewards/rejected": -148.6570281982422, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.572741687297821, + "rewards/weighted_margins": 0.527575671672821, + "rewards/weighted_rejected": -1.100128173828125, + "step": 980 + }, + { + "epoch": 0.5181889557707406, + "grad_norm": 41.099185943603516, + "learning_rate": 5.569874519496174e-07, + "logits/chosen": -0.7119758725166321, + "logits/rejected": -0.7671966552734375, + "logps/chosen": -381.44842529296875, + "logps/rejected": -410.5015563964844, + "logps/weighted_chosen": -2.8494019508361816, + "logps/weighted_rejected": -3.8323974609375, + "loss": 0.6259, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -106.146484375, + "rewards/margins": 37.33867263793945, + "rewards/rejected": -143.4011688232422, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.4599502682685852, + "rewards/weighted_margins": 0.6096404790878296, + "rewards/weighted_rejected": -1.070257544517517, + "step": 990 + }, + { + "epoch": 0.5234231876472127, + "grad_norm": 34.9498176574707, + "learning_rate": 5.47900136758499e-07, + "logits/chosen": -0.6499813199043274, + "logits/rejected": -0.7194549441337585, + "logps/chosen": -369.4429626464844, + "logps/rejected": -382.0953063964844, + "logps/weighted_chosen": -2.7041993141174316, + "logps/weighted_rejected": -3.534008741378784, + "loss": 0.5974, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -101.8080062866211, + "rewards/margins": 38.08086013793945, + "rewards/rejected": -139.9406280517578, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.44673460721969604, + "rewards/weighted_margins": 0.6419677734375, + "rewards/weighted_rejected": -1.08880615234375, + "step": 1000 + }, + { + "epoch": 0.5234231876472127, + "eval_logits/chosen": -0.7614516615867615, + "eval_logits/rejected": -0.7845029234886169, + "eval_logps/chosen": -403.2145080566406, + "eval_logps/rejected": -419.8420104980469, + "eval_logps/weighted_chosen": -2.8744430541992188, + "eval_logps/weighted_rejected": -3.6894454956054688, + "eval_loss": 0.6146492958068848, + "eval_rewards/accuracies": 0.6269999742507935, + "eval_rewards/chosen": -114.45649719238281, + "eval_rewards/margins": 37.66427993774414, + "eval_rewards/rejected": -152.1232452392578, + "eval_rewards/weighted_accuracies": 0.6679999828338623, + "eval_rewards/weighted_chosen": -0.6033662557601929, + "eval_rewards/weighted_margins": 0.5922585129737854, + "eval_rewards/weighted_rejected": -1.195624828338623, + "eval_runtime": 1076.2039, + "eval_samples_per_second": 1.858, + "eval_steps_per_second": 0.465, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000..55fbd6a --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce12db195466115f95dcb5cdef20e4bd0d806824726d6c0b3824d1ebf482ce4d +size 8721 diff --git a/checkpoint-1000/zero_to_fp32.py b/checkpoint-1000/zero_to_fp32.py new file mode 100644 index 0000000..0e75914 --- /dev/null +++ b/checkpoint-1000/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-1500/chat_template.jinja b/checkpoint-1500/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-1500/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1500/config.json b/checkpoint-1500/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/checkpoint-1500/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-1500/generation_config.json b/checkpoint-1500/generation_config.json new file mode 100644 index 0000000..fc3c54a --- /dev/null +++ b/checkpoint-1500/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-1500/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-1500/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000..1659b28 --- /dev/null +++ b/checkpoint-1500/global_step1500/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:112b16c71e3a472b8174b30affa74b2e86d64399100147118f9084fda05d049f +size 149296 diff --git a/checkpoint-1500/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-1500/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000..a32c90f --- /dev/null +++ b/checkpoint-1500/global_step1500/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9fe3952d6788f8fa09445b9299a846705585cf53b1ab44ec3bb59fad7f2b6bd +size 149296 diff --git a/checkpoint-1500/latest b/checkpoint-1500/latest new file mode 100644 index 0000000..c56ff77 --- /dev/null +++ b/checkpoint-1500/latest @@ -0,0 +1 @@ +global_step1500 \ No newline at end of file diff --git a/checkpoint-1500/model-00001-of-00004.safetensors b/checkpoint-1500/model-00001-of-00004.safetensors new file mode 100644 index 0000000..a430696 --- /dev/null +++ b/checkpoint-1500/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc5f3b0ad1eed6190cc8b1526dcdb3ffee36e94fa9bb209ed296439edf8acea +size 4976698672 diff --git a/checkpoint-1500/model-00002-of-00004.safetensors b/checkpoint-1500/model-00002-of-00004.safetensors new file mode 100644 index 0000000..96ce308 --- /dev/null +++ b/checkpoint-1500/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f36f97c5ed83fd7205b638de5caf5e06e3eddf53d0df015533c8305d778990e +size 4999802720 diff --git a/checkpoint-1500/model-00003-of-00004.safetensors b/checkpoint-1500/model-00003-of-00004.safetensors new file mode 100644 index 0000000..5c7c99d --- /dev/null +++ b/checkpoint-1500/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5f800671cf009d7dc2462b54f47a583563e4afdbeefaccc631f8489f5ab4a5 +size 4915916176 diff --git a/checkpoint-1500/model-00004-of-00004.safetensors b/checkpoint-1500/model-00004-of-00004.safetensors new file mode 100644 index 0000000..a32ba75 --- /dev/null +++ b/checkpoint-1500/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27e886926a4ab90f20aa8e677a5604233c3c62d969ccb381c9e8ffffb789116 +size 1168138808 diff --git a/checkpoint-1500/model.safetensors.index.json b/checkpoint-1500/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-1500/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-1500/rng_state_0.pth b/checkpoint-1500/rng_state_0.pth new file mode 100644 index 0000000..8d84687 --- /dev/null +++ b/checkpoint-1500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ea5dcfd1b4a49b41b4fa01a8b24bba6186957162c3fd555ebff28620c7268b +size 14917 diff --git a/checkpoint-1500/rng_state_1.pth b/checkpoint-1500/rng_state_1.pth new file mode 100644 index 0000000..54f0119 --- /dev/null +++ b/checkpoint-1500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a5af18bb5eae8b7fd6bdef66259014d98ba87ffb16d614bba38f2c32030798 +size 14917 diff --git a/checkpoint-1500/scheduler.pt b/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000..fabfbf2 --- /dev/null +++ b/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5360a9ca3afd27044b0e3751f896f0dd514aa42a145ec88e8857a2bb4c8588f4 +size 1465 diff --git a/checkpoint-1500/special_tokens_map.json b/checkpoint-1500/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/checkpoint-1500/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1500/tokenizer.json b/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/checkpoint-1500/tokenizer_config.json b/checkpoint-1500/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/checkpoint-1500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000..d244570 --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,3271 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7851347814708192, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005234231876472127, + "grad_norm": 126.29230499267578, + "learning_rate": 0.0, + "logits/chosen": -0.40118408203125, + "logits/rejected": -0.41802978515625, + "logps/chosen": -297.609375, + "logps/rejected": -247.84375, + "logps/weighted_chosen": -4.5152587890625, + "logps/weighted_rejected": -3.032470703125, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005234231876472127, + "grad_norm": 296.4369812011719, + "learning_rate": 4.6875e-08, + "logits/chosen": -0.3177456259727478, + "logits/rejected": -0.3534359335899353, + "logps/chosen": -275.5711669921875, + "logps/rejected": -255.90451049804688, + "logps/weighted_chosen": -2.350965738296509, + "logps/weighted_rejected": -2.549940347671509, + "loss": 0.6917, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0401475690305233, + "rewards/margins": 0.04296875, + "rewards/rejected": -0.0831163227558136, + "rewards/weighted_accuracies": 0.3229166567325592, + "rewards/weighted_chosen": -0.00032212998485192657, + "rewards/weighted_margins": 0.00019327799964230508, + "rewards/weighted_rejected": -0.0005154079990461469, + "step": 10 + }, + { + "epoch": 0.010468463752944255, + "grad_norm": 21.67967414855957, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -0.29769057035446167, + "logits/rejected": -0.313650518655777, + "logps/chosen": -294.3374938964844, + "logps/rejected": -272.6703186035156, + "logps/weighted_chosen": -2.13031005859375, + "logps/weighted_rejected": -2.103222608566284, + "loss": 0.6908, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.06103515625, + "rewards/margins": -0.01318359375, + "rewards/rejected": -0.0478515625, + "rewards/weighted_accuracies": 0.4437499940395355, + "rewards/weighted_chosen": 0.0014366150135174394, + "rewards/weighted_margins": 0.0021545409690588713, + "rewards/weighted_rejected": -0.0007179260137490928, + "step": 20 + }, + { + "epoch": 0.015702695629416383, + "grad_norm": 76.9887466430664, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -0.2917121946811676, + "logits/rejected": -0.337240606546402, + "logps/chosen": -298.02655029296875, + "logps/rejected": -268.12188720703125, + "logps/weighted_chosen": -2.0724120140075684, + "logps/weighted_rejected": -2.4466919898986816, + "loss": 0.6912, + "rewards/accuracies": 0.28125, + "rewards/chosen": -0.0062500000931322575, + "rewards/margins": -0.02509765699505806, + "rewards/rejected": 0.01884765550494194, + "rewards/weighted_accuracies": 0.4281249940395355, + "rewards/weighted_chosen": 0.0027938843704760075, + "rewards/weighted_margins": 0.0019706725142896175, + "rewards/weighted_rejected": 0.0008232116815634072, + "step": 30 + }, + { + "epoch": 0.02093692750588851, + "grad_norm": 32.98203659057617, + "learning_rate": 2.03125e-07, + "logits/chosen": -0.3011154234409332, + "logits/rejected": -0.3432762026786804, + "logps/chosen": -278.63751220703125, + "logps/rejected": -253.88125610351562, + "logps/weighted_chosen": -2.2070555686950684, + "logps/weighted_rejected": -2.605224609375, + "loss": 0.692, + "rewards/accuracies": 0.26249998807907104, + "rewards/chosen": 0.0034667968284338713, + "rewards/margins": -0.05991210788488388, + "rewards/rejected": 0.06337890774011612, + "rewards/weighted_accuracies": 0.35624998807907104, + "rewards/weighted_chosen": 0.0014549255138263106, + "rewards/weighted_margins": -0.00034332275390625, + "rewards/weighted_rejected": 0.0017982482677325606, + "step": 40 + }, + { + "epoch": 0.02617115938236064, + "grad_norm": 20.751684188842773, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -0.2822524905204773, + "logits/rejected": -0.32080918550491333, + "logps/chosen": -280.31329345703125, + "logps/rejected": -267.58709716796875, + "logps/weighted_chosen": -2.136962890625, + "logps/weighted_rejected": -2.1753907203674316, + "loss": 0.6883, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.07236327975988388, + "rewards/margins": -0.09189452975988388, + "rewards/rejected": 0.01953125, + "rewards/weighted_accuracies": 0.4375, + "rewards/weighted_chosen": 0.0054107666946947575, + "rewards/weighted_margins": 0.0078063965775072575, + "rewards/weighted_rejected": -0.0023956298828125, + "step": 50 + }, + { + "epoch": 0.031405391258832765, + "grad_norm": 40.70024108886719, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -0.3149581849575043, + "logits/rejected": -0.3086872100830078, + "logps/chosen": -277.6031188964844, + "logps/rejected": -261.8031311035156, + "logps/weighted_chosen": -2.5905518531799316, + "logps/weighted_rejected": -2.4834961891174316, + "loss": 0.6874, + "rewards/accuracies": 0.3812499940395355, + "rewards/chosen": 0.03662109375, + "rewards/margins": 0.12646484375, + "rewards/rejected": -0.08984375, + "rewards/weighted_accuracies": 0.5, + "rewards/weighted_chosen": 0.0004280090215615928, + "rewards/weighted_margins": 0.01105651818215847, + "rewards/weighted_rejected": -0.01062927208840847, + "step": 60 + }, + { + "epoch": 0.036639623135304895, + "grad_norm": 67.51947021484375, + "learning_rate": 3.59375e-07, + "logits/chosen": -0.318746954202652, + "logits/rejected": -0.32574766874313354, + "logps/chosen": -289.90313720703125, + "logps/rejected": -245.04452514648438, + "logps/weighted_chosen": -2.098431348800659, + "logps/weighted_rejected": -2.392407178878784, + "loss": 0.6841, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": 0.16708984971046448, + "rewards/margins": 0.4442382752895355, + "rewards/rejected": -0.27714842557907104, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": 0.015575408935546875, + "rewards/weighted_margins": 0.02174072340130806, + "rewards/weighted_rejected": -0.00616531353443861, + "step": 70 + }, + { + "epoch": 0.04187385501177702, + "grad_norm": 68.87100982666016, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -0.286581426858902, + "logits/rejected": -0.3082527220249176, + "logps/chosen": -289.5101623535156, + "logps/rejected": -270.4375, + "logps/weighted_chosen": -2.2385497093200684, + "logps/weighted_rejected": -2.4218382835388184, + "loss": 0.6727, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.31572264432907104, + "rewards/margins": 0.5547851324081421, + "rewards/rejected": -0.23906250298023224, + "rewards/weighted_accuracies": 0.596875011920929, + "rewards/weighted_chosen": 0.03613891452550888, + "rewards/weighted_margins": 0.05283202975988388, + "rewards/weighted_rejected": -0.01669769361615181, + "step": 80 + }, + { + "epoch": 0.04710808688824915, + "grad_norm": 40.29203414916992, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -0.3158706724643707, + "logits/rejected": -0.30914992094039917, + "logps/chosen": -280.5726623535156, + "logps/rejected": -258.17657470703125, + "logps/weighted_chosen": -2.45281982421875, + "logps/weighted_rejected": -2.5444703102111816, + "loss": 0.6683, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": 0.15966796875, + "rewards/margins": 0.599609375, + "rewards/rejected": -0.43994140625, + "rewards/weighted_accuracies": 0.581250011920929, + "rewards/weighted_chosen": 0.05808715894818306, + "rewards/weighted_margins": 0.07471618801355362, + "rewards/weighted_rejected": -0.0166168212890625, + "step": 90 + }, + { + "epoch": 0.05234231876472128, + "grad_norm": 46.855377197265625, + "learning_rate": 5.156249999999999e-07, + "logits/chosen": -0.2856552004814148, + "logits/rejected": -0.3585342466831207, + "logps/chosen": -291.05548095703125, + "logps/rejected": -287.078125, + "logps/weighted_chosen": -1.9577789306640625, + "logps/weighted_rejected": -2.532482862472534, + "loss": 0.6785, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.13925781846046448, + "rewards/margins": 0.9869140386581421, + "rewards/rejected": -1.1261718273162842, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": 0.03715210035443306, + "rewards/weighted_margins": 0.0635833740234375, + "rewards/weighted_rejected": -0.02643737755715847, + "step": 100 + }, + { + "epoch": 0.05757655064119341, + "grad_norm": 55.04579162597656, + "learning_rate": 5.677083333333333e-07, + "logits/chosen": -0.33493995666503906, + "logits/rejected": -0.3254844546318054, + "logps/chosen": -297.2953186035156, + "logps/rejected": -262.6773376464844, + "logps/weighted_chosen": -2.606689453125, + "logps/weighted_rejected": -2.648364305496216, + "loss": 0.6821, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -0.9228515625, + "rewards/margins": 0.8955078125, + "rewards/rejected": -1.818359375, + "rewards/weighted_accuracies": 0.518750011920929, + "rewards/weighted_chosen": -0.005747986026108265, + "rewards/weighted_margins": 0.05161895602941513, + "rewards/weighted_rejected": -0.05732421949505806, + "step": 110 + }, + { + "epoch": 0.06281078251766553, + "grad_norm": 22.23135757446289, + "learning_rate": 6.197916666666666e-07, + "logits/chosen": -0.3393222689628601, + "logits/rejected": -0.36481350660324097, + "logps/chosen": -295.6703186035156, + "logps/rejected": -256.3296813964844, + "logps/weighted_chosen": -1.8351562023162842, + "logps/weighted_rejected": -2.124218702316284, + "loss": 0.6752, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.071679711341858, + "rewards/margins": 1.46142578125, + "rewards/rejected": -2.5331053733825684, + "rewards/weighted_accuracies": 0.546875, + "rewards/weighted_chosen": 0.0018810272449627519, + "rewards/weighted_margins": 0.06835174560546875, + "rewards/weighted_rejected": -0.0664466843008995, + "step": 120 + }, + { + "epoch": 0.06804501439413765, + "grad_norm": 57.93917465209961, + "learning_rate": 6.718749999999999e-07, + "logits/chosen": -0.30284881591796875, + "logits/rejected": -0.2989334166049957, + "logps/chosen": -306.5074157714844, + "logps/rejected": -279.8265686035156, + "logps/weighted_chosen": -1.910064697265625, + "logps/weighted_rejected": -2.2278685569763184, + "loss": 0.6738, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.45097655057907104, + "rewards/margins": 1.7268555164337158, + "rewards/rejected": -2.177734375, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": 0.02166290208697319, + "rewards/weighted_margins": 0.07758025825023651, + "rewards/weighted_rejected": -0.05589141696691513, + "step": 130 + }, + { + "epoch": 0.07327924627060979, + "grad_norm": 66.64070892333984, + "learning_rate": 7.239583333333333e-07, + "logits/chosen": -0.34190064668655396, + "logits/rejected": -0.3586837649345398, + "logps/chosen": -300.01483154296875, + "logps/rejected": -276.1703186035156, + "logps/weighted_chosen": -2.202807664871216, + "logps/weighted_rejected": -2.474353075027466, + "loss": 0.6635, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -1.3230469226837158, + "rewards/margins": 2.2220702171325684, + "rewards/rejected": -3.545117139816284, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": 0.0006683349492959678, + "rewards/weighted_margins": 0.10604552924633026, + "rewards/weighted_rejected": -0.1053924560546875, + "step": 140 + }, + { + "epoch": 0.07851347814708191, + "grad_norm": 18.789766311645508, + "learning_rate": 7.760416666666666e-07, + "logits/chosen": -0.2976974546909332, + "logits/rejected": -0.3081321716308594, + "logps/chosen": -286.27813720703125, + "logps/rejected": -255.4640655517578, + "logps/weighted_chosen": -2.7657103538513184, + "logps/weighted_rejected": -2.831347703933716, + "loss": 0.6605, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -2.104687452316284, + "rewards/margins": 2.5054688453674316, + "rewards/rejected": -4.610156059265137, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.0018630981212481856, + "rewards/weighted_margins": 0.158416748046875, + "rewards/weighted_rejected": -0.1603546142578125, + "step": 150 + }, + { + "epoch": 0.08374771002355404, + "grad_norm": 51.51210021972656, + "learning_rate": 8.28125e-07, + "logits/chosen": -0.3341739773750305, + "logits/rejected": -0.3859619200229645, + "logps/chosen": -306.4765625, + "logps/rejected": -279.1148376464844, + "logps/weighted_chosen": -2.3189454078674316, + "logps/weighted_rejected": -2.36669921875, + "loss": 0.636, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -2.575390577316284, + "rewards/margins": 3.349609375, + "rewards/rejected": -5.925000190734863, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": 0.02147369459271431, + "rewards/weighted_margins": 0.22438660264015198, + "rewards/weighted_rejected": -0.203105166554451, + "step": 160 + }, + { + "epoch": 0.08898194190002617, + "grad_norm": 398.3809509277344, + "learning_rate": 8.802083333333333e-07, + "logits/chosen": -0.36855775117874146, + "logits/rejected": -0.37070387601852417, + "logps/chosen": -307.1656188964844, + "logps/rejected": -265.78436279296875, + "logps/weighted_chosen": -2.459460496902466, + "logps/weighted_rejected": -2.757373094558716, + "loss": 0.6811, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -4.097460746765137, + "rewards/margins": 3.488476514816284, + "rewards/rejected": -7.585839748382568, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": 0.018505096435546875, + "rewards/weighted_margins": 0.19701537489891052, + "rewards/weighted_rejected": -0.17839965224266052, + "step": 170 + }, + { + "epoch": 0.0942161737764983, + "grad_norm": 55.77580261230469, + "learning_rate": 9.322916666666666e-07, + "logits/chosen": -0.3392753601074219, + "logits/rejected": -0.35816192626953125, + "logps/chosen": -278.99530029296875, + "logps/rejected": -265.18359375, + "logps/weighted_chosen": -2.362103223800659, + "logps/weighted_rejected": -2.754711866378784, + "loss": 0.6944, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -5.346972465515137, + "rewards/margins": 3.5015625953674316, + "rewards/rejected": -8.848730087280273, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.05782318115234375, + "rewards/weighted_margins": 0.16480103135108948, + "rewards/weighted_rejected": -0.22255554795265198, + "step": 180 + }, + { + "epoch": 0.09945040565297043, + "grad_norm": 38.015960693359375, + "learning_rate": 9.84375e-07, + "logits/chosen": -0.3686843812465668, + "logits/rejected": -0.4041244387626648, + "logps/chosen": -314.3070373535156, + "logps/rejected": -276.7484436035156, + "logps/weighted_chosen": -2.123486280441284, + "logps/weighted_rejected": -2.6261963844299316, + "loss": 0.6392, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -8.581738471984863, + "rewards/margins": 4.317968845367432, + "rewards/rejected": -12.900781631469727, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.129638671875, + "rewards/weighted_margins": 0.2160186767578125, + "rewards/weighted_rejected": -0.345590204000473, + "step": 190 + }, + { + "epoch": 0.10468463752944256, + "grad_norm": 46.52367401123047, + "learning_rate": 9.99959085414323e-07, + "logits/chosen": -0.4128967225551605, + "logits/rejected": -0.4471847414970398, + "logps/chosen": -320.0546875, + "logps/rejected": -273.11248779296875, + "logps/weighted_chosen": -2.5019164085388184, + "logps/weighted_rejected": -2.9936890602111816, + "loss": 0.6473, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -10.737597465515137, + "rewards/margins": 4.738671779632568, + "rewards/rejected": -15.476171493530273, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.11443634331226349, + "rewards/weighted_margins": 0.2610321044921875, + "rewards/weighted_rejected": -0.37534791231155396, + "step": 200 + }, + { + "epoch": 0.10991886940591468, + "grad_norm": 21.238189697265625, + "learning_rate": 9.997587035630105e-07, + "logits/chosen": -0.4288749694824219, + "logits/rejected": -0.4688262939453125, + "logps/chosen": -300.0765686035156, + "logps/rejected": -304.63751220703125, + "logps/weighted_chosen": -2.32427978515625, + "logps/weighted_rejected": -3.0592284202575684, + "loss": 0.6424, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -13.117578506469727, + "rewards/margins": 7.013671875, + "rewards/rejected": -20.133594512939453, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.21423491835594177, + "rewards/weighted_margins": 0.27025145292282104, + "rewards/weighted_rejected": -0.4845077395439148, + "step": 210 + }, + { + "epoch": 0.11515310128238682, + "grad_norm": 24.92041015625, + "learning_rate": 9.99391406364405e-07, + "logits/chosen": -0.42696380615234375, + "logits/rejected": -0.429006963968277, + "logps/chosen": -305.4906311035156, + "logps/rejected": -288.6312561035156, + "logps/weighted_chosen": -2.625018358230591, + "logps/weighted_rejected": -3.102160692214966, + "loss": 0.6601, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -13.349413871765137, + "rewards/margins": 6.373632907867432, + "rewards/rejected": -19.72265625, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.20062866806983948, + "rewards/weighted_margins": 0.316873162984848, + "rewards/weighted_rejected": -0.5174545049667358, + "step": 220 + }, + { + "epoch": 0.12038733315885894, + "grad_norm": 147.95851135253906, + "learning_rate": 9.988573164927884e-07, + "logits/chosen": -0.3811447024345398, + "logits/rejected": -0.4161086976528168, + "logps/chosen": -281.33203125, + "logps/rejected": -274.234375, + "logps/weighted_chosen": -2.32806396484375, + "logps/weighted_rejected": -2.6552734375, + "loss": 0.7195, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -12.46875, + "rewards/margins": 10.046093940734863, + "rewards/rejected": -22.515430450439453, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.24639587104320526, + "rewards/weighted_margins": 0.23908081650733948, + "rewards/weighted_rejected": -0.4853073060512543, + "step": 230 + }, + { + "epoch": 0.12562156503533106, + "grad_norm": 26.882122039794922, + "learning_rate": 9.98156612329838e-07, + "logits/chosen": -0.4748245179653168, + "logits/rejected": -0.5250595211982727, + "logps/chosen": -278.16717529296875, + "logps/rejected": -306.29376220703125, + "logps/weighted_chosen": -2.348803758621216, + "logps/weighted_rejected": -2.9455933570861816, + "loss": 0.6674, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -13.405566215515137, + "rewards/margins": 10.753710746765137, + "rewards/rejected": -24.158985137939453, + "rewards/weighted_accuracies": 0.625, + "rewards/weighted_chosen": -0.14908751845359802, + "rewards/weighted_margins": 0.33162689208984375, + "rewards/weighted_rejected": -0.48021697998046875, + "step": 240 + }, + { + "epoch": 0.13085579691180318, + "grad_norm": 86.49760437011719, + "learning_rate": 9.97289527905053e-07, + "logits/chosen": -0.478302001953125, + "logits/rejected": -0.48861923813819885, + "logps/chosen": -277.0523376464844, + "logps/rejected": -275.80938720703125, + "logps/weighted_chosen": -2.61376953125, + "logps/weighted_rejected": -2.787853956222534, + "loss": 0.7022, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -12.673730850219727, + "rewards/margins": 7.1806640625, + "rewards/rejected": -19.852344512939453, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.12388916313648224, + "rewards/weighted_margins": 0.19627074897289276, + "rewards/weighted_rejected": -0.3203796446323395, + "step": 250 + }, + { + "epoch": 0.1360900287882753, + "grad_norm": 19.698871612548828, + "learning_rate": 9.962563528175875e-07, + "logits/chosen": -0.4065658450126648, + "logits/rejected": -0.4432968199253082, + "logps/chosen": -310.62890625, + "logps/rejected": -281.46405029296875, + "logps/weighted_chosen": -2.184094190597534, + "logps/weighted_rejected": -3.0492796897888184, + "loss": 0.6507, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -11.896581649780273, + "rewards/margins": 8.622265815734863, + "rewards/rejected": -20.520313262939453, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": -0.1260833740234375, + "rewards/weighted_margins": 0.25025635957717896, + "rewards/weighted_rejected": -0.3761749267578125, + "step": 260 + }, + { + "epoch": 0.14132426066474746, + "grad_norm": 16.363121032714844, + "learning_rate": 9.950574321395277e-07, + "logits/chosen": -0.42208632826805115, + "logits/rejected": -0.4458427429199219, + "logps/chosen": -305.9046936035156, + "logps/rejected": -286.06561279296875, + "logps/weighted_chosen": -2.40838623046875, + "logps/weighted_rejected": -2.7938475608825684, + "loss": 0.6573, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -16.622364044189453, + "rewards/margins": 6.233202934265137, + "rewards/rejected": -22.855077743530273, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.189697265625, + "rewards/weighted_margins": 0.27490538358688354, + "rewards/weighted_rejected": -0.4645233154296875, + "step": 270 + }, + { + "epoch": 0.14655849254121958, + "grad_norm": 54.42692947387695, + "learning_rate": 9.936931663006413e-07, + "logits/chosen": -0.45263671875, + "logits/rejected": -0.44363707304000854, + "logps/chosen": -316.171875, + "logps/rejected": -303.3656311035156, + "logps/weighted_chosen": -2.4659423828125, + "logps/weighted_rejected": -3.0541749000549316, + "loss": 0.6068, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -13.402734756469727, + "rewards/margins": 10.619824409484863, + "rewards/rejected": -24.025390625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.05214080959558487, + "rewards/weighted_margins": 0.40336912870407104, + "rewards/weighted_rejected": -0.455657958984375, + "step": 280 + }, + { + "epoch": 0.1517927244176917, + "grad_norm": 39.20017623901367, + "learning_rate": 9.921640109546357e-07, + "logits/chosen": -0.42310255765914917, + "logits/rejected": -0.48920440673828125, + "logps/chosen": -283.7171936035156, + "logps/rejected": -278.1859436035156, + "logps/weighted_chosen": -2.396167039871216, + "logps/weighted_rejected": -3.5881590843200684, + "loss": 0.6649, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -16.9111328125, + "rewards/margins": 9.704492568969727, + "rewards/rejected": -26.62109375, + "rewards/weighted_accuracies": 0.6031249761581421, + "rewards/weighted_chosen": -0.130279541015625, + "rewards/weighted_margins": 0.3882461488246918, + "rewards/weighted_rejected": -0.5187179446220398, + "step": 290 + }, + { + "epoch": 0.15702695629416383, + "grad_norm": 28.03601837158203, + "learning_rate": 9.90470476826975e-07, + "logits/chosen": -0.485189825296402, + "logits/rejected": -0.48862916231155396, + "logps/chosen": -289.09765625, + "logps/rejected": -297.625, + "logps/weighted_chosen": -2.2784485816955566, + "logps/weighted_rejected": -2.771862745285034, + "loss": 0.6608, + "rewards/accuracies": 0.640625, + "rewards/chosen": -19.676952362060547, + "rewards/margins": 10.679491996765137, + "rewards/rejected": -30.360937118530273, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.1669921875, + "rewards/weighted_margins": 0.28967589139938354, + "rewards/weighted_rejected": -0.4566032290458679, + "step": 300 + }, + { + "epoch": 0.16226118817063595, + "grad_norm": 538.50927734375, + "learning_rate": 9.886131295443002e-07, + "logits/chosen": -0.654278576374054, + "logits/rejected": -0.7076683044433594, + "logps/chosen": -341.85467529296875, + "logps/rejected": -309.89764404296875, + "logps/weighted_chosen": -2.629150390625, + "logps/weighted_rejected": -2.8698973655700684, + "loss": 0.6788, + "rewards/accuracies": 0.515625, + "rewards/chosen": -60.568748474121094, + "rewards/margins": -0.7822265625, + "rewards/rejected": -59.785743713378906, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.24791869521141052, + "rewards/weighted_margins": 0.28089600801467896, + "rewards/weighted_rejected": -0.528765857219696, + "step": 310 + }, + { + "epoch": 0.16749542004710807, + "grad_norm": 34.656883239746094, + "learning_rate": 9.865925894455166e-07, + "logits/chosen": -0.7003936767578125, + "logits/rejected": -0.719250500202179, + "logps/chosen": -326.3960876464844, + "logps/rejected": -290.3453063964844, + "logps/weighted_chosen": -2.553356885910034, + "logps/weighted_rejected": -3.10992431640625, + "loss": 0.7054, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -26.532812118530273, + "rewards/margins": 6.3564453125, + "rewards/rejected": -32.88788986206055, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": -0.18020018935203552, + "rewards/weighted_margins": 0.3489990234375, + "rewards/weighted_rejected": -0.5293639898300171, + "step": 320 + }, + { + "epoch": 0.17272965192358022, + "grad_norm": 61.1888542175293, + "learning_rate": 9.84409531374603e-07, + "logits/chosen": -0.6631911993026733, + "logits/rejected": -0.6448425054550171, + "logps/chosen": -324.87579345703125, + "logps/rejected": -291.71875, + "logps/weighted_chosen": -2.5611815452575684, + "logps/weighted_rejected": -3.060229539871216, + "loss": 0.6449, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -20.070018768310547, + "rewards/margins": 8.8251953125, + "rewards/rejected": -28.8876953125, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.140888974070549, + "rewards/weighted_margins": 0.3719635009765625, + "rewards/weighted_rejected": -0.5127013921737671, + "step": 330 + }, + { + "epoch": 0.17796388380005235, + "grad_norm": 27.2315673828125, + "learning_rate": 9.820646844552219e-07, + "logits/chosen": -0.6496349573135376, + "logits/rejected": -0.7006805539131165, + "logps/chosen": -295.5882873535156, + "logps/rejected": -297.4906311035156, + "logps/weighted_chosen": -2.6988892555236816, + "logps/weighted_rejected": -2.898681640625, + "loss": 0.6788, + "rewards/accuracies": 0.6875, + "rewards/chosen": -19.176855087280273, + "rewards/margins": 12.649316787719727, + "rewards/rejected": -31.822460174560547, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.23918533325195312, + "rewards/weighted_margins": 0.2961669862270355, + "rewards/weighted_rejected": -0.5351837277412415, + "step": 340 + }, + { + "epoch": 0.18319811567652447, + "grad_norm": 24.073888778686523, + "learning_rate": 9.795588318471964e-07, + "logits/chosen": -0.7137314081192017, + "logits/rejected": -0.7225399017333984, + "logps/chosen": -277.8890686035156, + "logps/rejected": -304.59063720703125, + "logps/weighted_chosen": -2.4300780296325684, + "logps/weighted_rejected": -2.771411180496216, + "loss": 0.6675, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -18.445703506469727, + "rewards/margins": 9.8720703125, + "rewards/rejected": -28.31640625, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.20159301161766052, + "rewards/weighted_margins": 0.2779785096645355, + "rewards/weighted_rejected": -0.47947996854782104, + "step": 350 + }, + { + "epoch": 0.1884323475529966, + "grad_norm": 12.750471115112305, + "learning_rate": 9.768928104849415e-07, + "logits/chosen": -0.7212737798690796, + "logits/rejected": -0.7225433588027954, + "logps/chosen": -299.53594970703125, + "logps/rejected": -275.5718688964844, + "logps/weighted_chosen": -2.667529344558716, + "logps/weighted_rejected": -2.800830125808716, + "loss": 0.6916, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -16.816015243530273, + "rewards/margins": 9.876562118530273, + "rewards/rejected": -26.690235137939453, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.15018615126609802, + "rewards/weighted_margins": 0.301962286233902, + "rewards/weighted_rejected": -0.45206451416015625, + "step": 360 + }, + { + "epoch": 0.19366657942946872, + "grad_norm": 29.441747665405273, + "learning_rate": 9.740675107979355e-07, + "logits/chosen": -0.6865798830986023, + "logits/rejected": -0.7117553949356079, + "logps/chosen": -331.06561279296875, + "logps/rejected": -300.31719970703125, + "logps/weighted_chosen": -1.9907715320587158, + "logps/weighted_rejected": -2.932177782058716, + "loss": 0.6819, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -16.022266387939453, + "rewards/margins": 9.630078315734863, + "rewards/rejected": -25.654491424560547, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.18877258896827698, + "rewards/weighted_margins": 0.2808380126953125, + "rewards/weighted_rejected": -0.4699081480503082, + "step": 370 + }, + { + "epoch": 0.19890081130594087, + "grad_norm": 29.483524322509766, + "learning_rate": 9.71083876413323e-07, + "logits/chosen": -0.6637862920761108, + "logits/rejected": -0.669873058795929, + "logps/chosen": -322.3882751464844, + "logps/rejected": -300.85858154296875, + "logps/weighted_chosen": -2.189379930496216, + "logps/weighted_rejected": -2.9217162132263184, + "loss": 0.6846, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -20.004688262939453, + "rewards/margins": 11.246289253234863, + "rewards/rejected": -31.24609375, + "rewards/weighted_accuracies": 0.6000000238418579, + "rewards/weighted_chosen": -0.22467346489429474, + "rewards/weighted_margins": 0.2720580995082855, + "rewards/weighted_rejected": -0.4967102110385895, + "step": 380 + }, + { + "epoch": 0.204135043182413, + "grad_norm": 20.563907623291016, + "learning_rate": 9.67942903840751e-07, + "logits/chosen": -0.7051689028739929, + "logits/rejected": -0.7537201046943665, + "logps/chosen": -324.1015625, + "logps/rejected": -310.375, + "logps/weighted_chosen": -2.397631883621216, + "logps/weighted_rejected": -2.950610399246216, + "loss": 0.6478, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -19.788671493530273, + "rewards/margins": 16.317577362060547, + "rewards/rejected": -36.111328125, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.22498169541358948, + "rewards/weighted_margins": 0.3520751893520355, + "rewards/weighted_rejected": -0.5770629644393921, + "step": 390 + }, + { + "epoch": 0.2093692750588851, + "grad_norm": 23.1771183013916, + "learning_rate": 9.646456421395447e-07, + "logits/chosen": -0.7504974603652954, + "logits/rejected": -0.7628723382949829, + "logps/chosen": -341.2171936035156, + "logps/rejected": -343.9375, + "logps/weighted_chosen": -2.2680420875549316, + "logps/weighted_rejected": -3.0065674781799316, + "loss": 0.6746, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -22.944530487060547, + "rewards/margins": 17.315624237060547, + "rewards/rejected": -40.2587890625, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.20337525010108948, + "rewards/weighted_margins": 0.216084286570549, + "rewards/weighted_rejected": -0.4196624755859375, + "step": 400 + }, + { + "epoch": 0.21460350693535724, + "grad_norm": 14.420520782470703, + "learning_rate": 9.611931925683266e-07, + "logits/chosen": -0.7154334783554077, + "logits/rejected": -0.7491073608398438, + "logps/chosen": -331.38983154296875, + "logps/rejected": -303.3890686035156, + "logps/weighted_chosen": -2.190844774246216, + "logps/weighted_rejected": -2.7472167015075684, + "loss": 0.6135, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -25.111621856689453, + "rewards/margins": 14.543554306030273, + "rewards/rejected": -39.658592224121094, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.21762695908546448, + "rewards/weighted_margins": 0.36674195528030396, + "rewards/weighted_rejected": -0.584503173828125, + "step": 410 + }, + { + "epoch": 0.21983773881182936, + "grad_norm": 27.429603576660156, + "learning_rate": 9.575867082172085e-07, + "logits/chosen": -0.7379547357559204, + "logits/rejected": -0.7826202511787415, + "logps/chosen": -337.46405029296875, + "logps/rejected": -320.23907470703125, + "logps/weighted_chosen": -2.6387085914611816, + "logps/weighted_rejected": -2.712329149246216, + "loss": 0.6716, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -32.554298400878906, + "rewards/margins": 17.513866424560547, + "rewards/rejected": -50.060157775878906, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.35613709688186646, + "rewards/weighted_margins": 0.38392335176467896, + "rewards/weighted_rejected": -0.7401062250137329, + "step": 420 + }, + { + "epoch": 0.22507197068830148, + "grad_norm": 18.97144889831543, + "learning_rate": 9.538273936226673e-07, + "logits/chosen": -0.778491199016571, + "logits/rejected": -0.811004638671875, + "logps/chosen": -292.83984375, + "logps/rejected": -304.05352783203125, + "logps/weighted_chosen": -2.796630859375, + "logps/weighted_rejected": -3.2444825172424316, + "loss": 0.6544, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -27.327733993530273, + "rewards/margins": 12.26318359375, + "rewards/rejected": -39.58984375, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.21069030463695526, + "rewards/weighted_margins": 0.3678832948207855, + "rewards/weighted_rejected": -0.57806396484375, + "step": 430 + }, + { + "epoch": 0.23030620256477363, + "grad_norm": 25.43462371826172, + "learning_rate": 9.499165043652391e-07, + "logits/chosen": -0.7674010992050171, + "logits/rejected": -0.7686828374862671, + "logps/chosen": -319.55859375, + "logps/rejected": -309.03436279296875, + "logps/weighted_chosen": -2.82305908203125, + "logps/weighted_rejected": -2.993237257003784, + "loss": 0.631, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -28.386133193969727, + "rewards/margins": 13.9384765625, + "rewards/rejected": -42.326072692871094, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.26903897523880005, + "rewards/weighted_margins": 0.353515625, + "rewards/weighted_rejected": -0.622546374797821, + "step": 440 + }, + { + "epoch": 0.23554043444124576, + "grad_norm": 65.78443908691406, + "learning_rate": 9.458553466501665e-07, + "logits/chosen": -0.8066772222518921, + "logits/rejected": -0.8363037109375, + "logps/chosen": -314.7945251464844, + "logps/rejected": -287.65313720703125, + "logps/weighted_chosen": -2.8233399391174316, + "logps/weighted_rejected": -3.013622999191284, + "loss": 0.6831, + "rewards/accuracies": 0.640625, + "rewards/chosen": -28.642578125, + "rewards/margins": 15.162694931030273, + "rewards/rejected": -43.80976486206055, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.3802246153354645, + "rewards/weighted_margins": 0.3342132568359375, + "rewards/weighted_rejected": -0.7139984369277954, + "step": 450 + }, + { + "epoch": 0.24077466631771788, + "grad_norm": 13.290085792541504, + "learning_rate": 9.416452768711366e-07, + "logits/chosen": -0.7957550287246704, + "logits/rejected": -0.8287414312362671, + "logps/chosen": -323.0093688964844, + "logps/rejected": -306.98907470703125, + "logps/weighted_chosen": -2.544872999191284, + "logps/weighted_rejected": -3.100903272628784, + "loss": 0.6759, + "rewards/accuracies": 0.640625, + "rewards/chosen": -30.906835556030273, + "rewards/margins": 16.355859756469727, + "rewards/rejected": -47.24980545043945, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.343759149312973, + "rewards/weighted_margins": 0.40337830781936646, + "rewards/weighted_rejected": -0.747100830078125, + "step": 460 + }, + { + "epoch": 0.24600889819419, + "grad_norm": 21.766939163208008, + "learning_rate": 9.372877011572557e-07, + "logits/chosen": -0.7200164794921875, + "logits/rejected": -0.742279052734375, + "logps/chosen": -342.75079345703125, + "logps/rejected": -318.60626220703125, + "logps/weighted_chosen": -2.5311522483825684, + "logps/weighted_rejected": -2.990124464035034, + "loss": 0.63, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -32.077247619628906, + "rewards/margins": 11.883398056030273, + "rewards/rejected": -43.959373474121094, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.30525511503219604, + "rewards/weighted_margins": 0.45678406953811646, + "rewards/weighted_rejected": -0.7624969482421875, + "step": 470 + }, + { + "epoch": 0.2512431300706621, + "grad_norm": 28.76239776611328, + "learning_rate": 9.327840749034141e-07, + "logits/chosen": -0.7930053472518921, + "logits/rejected": -0.8311401605606079, + "logps/chosen": -316.79998779296875, + "logps/rejected": -326.5062561035156, + "logps/weighted_chosen": -2.4120116233825684, + "logps/weighted_rejected": -3.591870069503784, + "loss": 0.6639, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -29.640039443969727, + "rewards/margins": 20.350976943969727, + "rewards/rejected": -49.9853515625, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.27521055936813354, + "rewards/weighted_margins": 0.4522705078125, + "rewards/weighted_rejected": -0.7274719476699829, + "step": 480 + }, + { + "epoch": 0.2564773619471343, + "grad_norm": 36.629127502441406, + "learning_rate": 9.281359022841965e-07, + "logits/chosen": -0.72747802734375, + "logits/rejected": -0.7426910400390625, + "logps/chosen": -308.5406188964844, + "logps/rejected": -300.71484375, + "logps/weighted_chosen": -2.6044554710388184, + "logps/weighted_rejected": -3.862866163253784, + "loss": 0.6178, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -33.54804611206055, + "rewards/margins": 22.3515625, + "rewards/rejected": -55.88496017456055, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.38198548555374146, + "rewards/weighted_margins": 0.525561511516571, + "rewards/weighted_rejected": -0.9073349237442017, + "step": 490 + }, + { + "epoch": 0.26171159382360637, + "grad_norm": 20.296154022216797, + "learning_rate": 9.233447357514989e-07, + "logits/chosen": -0.7092193365097046, + "logits/rejected": -0.751629650592804, + "logps/chosen": -337.10467529296875, + "logps/rejected": -328.71875, + "logps/weighted_chosen": -3.054370164871216, + "logps/weighted_rejected": -3.5334715843200684, + "loss": 0.6534, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -39.15234375, + "rewards/margins": 19.770116806030273, + "rewards/rejected": -58.90898513793945, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.470510870218277, + "rewards/weighted_margins": 0.566607654094696, + "rewards/weighted_rejected": -1.0376465320587158, + "step": 500 + }, + { + "epoch": 0.26171159382360637, + "eval_logits/chosen": -0.8052441477775574, + "eval_logits/rejected": -0.8225547075271606, + "eval_logps/chosen": -333.44000244140625, + "eval_logps/rejected": -331.98199462890625, + "eval_logps/weighted_chosen": -2.756896734237671, + "eval_logps/weighted_rejected": -3.441680908203125, + "eval_loss": 0.6561886668205261, + "eval_rewards/accuracies": 0.6370000243186951, + "eval_rewards/chosen": -44.67877960205078, + "eval_rewards/margins": 19.602703094482422, + "eval_rewards/rejected": -64.27362823486328, + "eval_rewards/weighted_accuracies": 0.6445000171661377, + "eval_rewards/weighted_chosen": -0.485819548368454, + "eval_rewards/weighted_margins": 0.4620407819747925, + "eval_rewards/weighted_rejected": -0.9478604793548584, + "eval_runtime": 1263.3333, + "eval_samples_per_second": 1.583, + "eval_steps_per_second": 0.396, + "step": 500 + }, + { + "epoch": 0.2669458257000785, + "grad_norm": 44.70832824707031, + "learning_rate": 9.184121755160232e-07, + "logits/chosen": -0.7849181890487671, + "logits/rejected": -0.8171790838241577, + "logps/chosen": -344.84454345703125, + "logps/rejected": -356.3671875, + "logps/weighted_chosen": -2.9210448265075684, + "logps/weighted_rejected": -3.3969483375549316, + "loss": 0.6735, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -43.604881286621094, + "rewards/margins": 24.157032012939453, + "rewards/rejected": -67.75703430175781, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.43794554471969604, + "rewards/weighted_margins": 0.407052606344223, + "rewards/weighted_rejected": -0.8448547124862671, + "step": 510 + }, + { + "epoch": 0.2721800575765506, + "grad_norm": 34.907981872558594, + "learning_rate": 9.133398690128193e-07, + "logits/chosen": -0.8243468999862671, + "logits/rejected": -0.852618396282196, + "logps/chosen": -370.2640686035156, + "logps/rejected": -357.4296875, + "logps/weighted_chosen": -2.7059326171875, + "logps/weighted_rejected": -3.559033155441284, + "loss": 0.6233, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -48.807029724121094, + "rewards/margins": 28.050586700439453, + "rewards/rejected": -76.85234069824219, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.3096374571323395, + "rewards/weighted_margins": 0.5285431146621704, + "rewards/weighted_rejected": -0.838287353515625, + "step": 520 + }, + { + "epoch": 0.27741428945302277, + "grad_norm": 17.412511825561523, + "learning_rate": 9.081295103510554e-07, + "logits/chosen": -0.7943557500839233, + "logits/rejected": -0.8541763424873352, + "logps/chosen": -339.65313720703125, + "logps/rejected": -351.77813720703125, + "logps/weighted_chosen": -2.331298828125, + "logps/weighted_rejected": -3.5838379859924316, + "loss": 0.5587, + "rewards/accuracies": 0.65625, + "rewards/chosen": -55.494140625, + "rewards/margins": 26.642187118530273, + "rewards/rejected": -82.14140319824219, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.33439940214157104, + "rewards/weighted_margins": 0.7252563238143921, + "rewards/weighted_rejected": -1.0597717761993408, + "step": 530 + }, + { + "epoch": 0.2826485213294949, + "grad_norm": 30.677711486816406, + "learning_rate": 9.027828397481989e-07, + "logits/chosen": -0.7925201654434204, + "logits/rejected": -0.8262939453125, + "logps/chosen": -318.59063720703125, + "logps/rejected": -337.55157470703125, + "logps/weighted_chosen": -3.059436082839966, + "logps/weighted_rejected": -3.750012159347534, + "loss": 0.6464, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -57.99492263793945, + "rewards/margins": 24.116796493530273, + "rewards/rejected": -82.107421875, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.507769763469696, + "rewards/weighted_margins": 0.4883270263671875, + "rewards/weighted_rejected": -0.995800793170929, + "step": 540 + }, + { + "epoch": 0.287882753205967, + "grad_norm": 34.808658599853516, + "learning_rate": 8.973016429487988e-07, + "logits/chosen": -0.8280746340751648, + "logits/rejected": -0.8393570184707642, + "logps/chosen": -340.02032470703125, + "logps/rejected": -340.90625, + "logps/weighted_chosen": -3.016833543777466, + "logps/weighted_rejected": -3.3585205078125, + "loss": 0.6373, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -58.115234375, + "rewards/margins": 28.575389862060547, + "rewards/rejected": -86.7035140991211, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.5330657958984375, + "rewards/weighted_margins": 0.48836976289749146, + "rewards/weighted_rejected": -1.0212554931640625, + "step": 550 + }, + { + "epoch": 0.29311698508243916, + "grad_norm": 21.559553146362305, + "learning_rate": 8.916877506280601e-07, + "logits/chosen": -0.8576828241348267, + "logits/rejected": -0.850115954875946, + "logps/chosen": -343.0625, + "logps/rejected": -340.73126220703125, + "logps/weighted_chosen": -3.004504442214966, + "logps/weighted_rejected": -3.3214111328125, + "loss": 0.6493, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -62.214454650878906, + "rewards/margins": 24.381053924560547, + "rewards/rejected": -86.59492492675781, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.5099731683731079, + "rewards/weighted_margins": 0.535810112953186, + "rewards/weighted_rejected": -1.0458984375, + "step": 560 + }, + { + "epoch": 0.29835121695891126, + "grad_norm": 16.77034568786621, + "learning_rate": 8.85943037780415e-07, + "logits/chosen": -0.901629626750946, + "logits/rejected": -0.9031143188476562, + "logps/chosen": -347.7562561035156, + "logps/rejected": -320.90936279296875, + "logps/weighted_chosen": -2.9189209938049316, + "logps/weighted_rejected": -3.346874952316284, + "loss": 0.6796, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -61.52226638793945, + "rewards/margins": 17.764842987060547, + "rewards/rejected": -79.3128890991211, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.5591338872909546, + "rewards/weighted_margins": 0.44241029024124146, + "rewards/weighted_rejected": -1.00177001953125, + "step": 570 + }, + { + "epoch": 0.3035854488353834, + "grad_norm": 23.179088592529297, + "learning_rate": 8.800694230932884e-07, + "logits/chosen": -0.808392345905304, + "logits/rejected": -0.8254486322402954, + "logps/chosen": -345.52032470703125, + "logps/rejected": -338.59844970703125, + "logps/weighted_chosen": -2.4705810546875, + "logps/weighted_rejected": -3.031982421875, + "loss": 0.6672, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -56.009376525878906, + "rewards/margins": 17.356250762939453, + "rewards/rejected": -73.3921890258789, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.3469276428222656, + "rewards/weighted_margins": 0.38211363554000854, + "rewards/weighted_rejected": -0.7289062738418579, + "step": 580 + }, + { + "epoch": 0.30881968071185556, + "grad_norm": 16.077539443969727, + "learning_rate": 8.740688683062723e-07, + "logits/chosen": -0.8602691888809204, + "logits/rejected": -0.874432384967804, + "logps/chosen": -382.03436279296875, + "logps/rejected": -349.27655029296875, + "logps/weighted_chosen": -2.4807372093200684, + "logps/weighted_rejected": -3.101879835128784, + "loss": 0.6615, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -52.95586013793945, + "rewards/margins": 20.580469131469727, + "rewards/rejected": -73.5445327758789, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.31566160917282104, + "rewards/weighted_margins": 0.39473265409469604, + "rewards/weighted_rejected": -0.7103912234306335, + "step": 590 + }, + { + "epoch": 0.31405391258832765, + "grad_norm": 17.0419864654541, + "learning_rate": 8.679433775559215e-07, + "logits/chosen": -0.8191520571708679, + "logits/rejected": -0.8663116693496704, + "logps/chosen": -379.78125, + "logps/rejected": -361.57501220703125, + "logps/weighted_chosen": -2.305920362472534, + "logps/weighted_rejected": -3.3094482421875, + "loss": 0.6241, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -55.09687423706055, + "rewards/margins": 20.994531631469727, + "rewards/rejected": -76.0738296508789, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.31669920682907104, + "rewards/weighted_margins": 0.4563964903354645, + "rewards/weighted_rejected": -0.7728790044784546, + "step": 600 + }, + { + "epoch": 0.3192881444647998, + "grad_norm": 30.722089767456055, + "learning_rate": 8.616949967063871e-07, + "logits/chosen": -0.7851959466934204, + "logits/rejected": -0.82568359375, + "logps/chosen": -323.5859375, + "logps/rejected": -338.609375, + "logps/weighted_chosen": -2.7684326171875, + "logps/weighted_rejected": -3.191943407058716, + "loss": 0.6918, + "rewards/accuracies": 0.684374988079071, + "rewards/chosen": -55.26250076293945, + "rewards/margins": 22.568164825439453, + "rewards/rejected": -77.8238296508789, + "rewards/weighted_accuracies": 0.625, + "rewards/weighted_chosen": -0.423666387796402, + "rewards/weighted_margins": 0.310333251953125, + "rewards/weighted_rejected": -0.733630359172821, + "step": 610 + }, + { + "epoch": 0.3245223763412719, + "grad_norm": 14.894518852233887, + "learning_rate": 8.553258126661154e-07, + "logits/chosen": -0.831768810749054, + "logits/rejected": -0.846484363079071, + "logps/chosen": -338.09686279296875, + "logps/rejected": -336.015625, + "logps/weighted_chosen": -2.84112548828125, + "logps/weighted_rejected": -3.4341063499450684, + "loss": 0.708, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -58.20390701293945, + "rewards/margins": 21.783594131469727, + "rewards/rejected": -79.9749984741211, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.4535583555698395, + "rewards/weighted_margins": 0.33486634492874146, + "rewards/weighted_rejected": -0.788104236125946, + "step": 620 + }, + { + "epoch": 0.32975660821774405, + "grad_norm": 23.813823699951172, + "learning_rate": 8.488379526908368e-07, + "logits/chosen": -0.826812744140625, + "logits/rejected": -0.837506115436554, + "logps/chosen": -352.4593811035156, + "logps/rejected": -357.29376220703125, + "logps/weighted_chosen": -2.6490235328674316, + "logps/weighted_rejected": -3.211181640625, + "loss": 0.6454, + "rewards/accuracies": 0.640625, + "rewards/chosen": -61.892189025878906, + "rewards/margins": 26.424219131469727, + "rewards/rejected": -88.33320617675781, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.3862060606479645, + "rewards/weighted_margins": 0.415771484375, + "rewards/weighted_rejected": -0.8020385503768921, + "step": 630 + }, + { + "epoch": 0.33499084009421615, + "grad_norm": 19.41891098022461, + "learning_rate": 8.422335836730802e-07, + "logits/chosen": -0.7994629144668579, + "logits/rejected": -0.7995041012763977, + "logps/chosen": -333.2593688964844, + "logps/rejected": -366.6499938964844, + "logps/weighted_chosen": -2.6496825218200684, + "logps/weighted_rejected": -3.1250548362731934, + "loss": 0.6734, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -59.58320236206055, + "rewards/margins": 29.476757049560547, + "rewards/rejected": -89.0625, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.47590941190719604, + "rewards/weighted_margins": 0.394134521484375, + "rewards/weighted_rejected": -0.8701080083847046, + "step": 640 + }, + { + "epoch": 0.3402250719706883, + "grad_norm": 16.819276809692383, + "learning_rate": 8.355149114184485e-07, + "logits/chosen": -0.846386730670929, + "logits/rejected": -0.8338836431503296, + "logps/chosen": -370.0062561035156, + "logps/rejected": -376.0843811035156, + "logps/weighted_chosen": -2.787951707839966, + "logps/weighted_rejected": -3.106738328933716, + "loss": 0.6483, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -62.594337463378906, + "rewards/margins": 31.204687118530273, + "rewards/rejected": -93.80000305175781, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.5348541140556335, + "rewards/weighted_margins": 0.446258544921875, + "rewards/weighted_rejected": -0.980926513671875, + "step": 650 + }, + { + "epoch": 0.34545930384716045, + "grad_norm": 18.66504669189453, + "learning_rate": 8.286841799088963e-07, + "logits/chosen": -0.8683761358261108, + "logits/rejected": -0.860211193561554, + "logps/chosen": -344.94061279296875, + "logps/rejected": -343.61407470703125, + "logps/weighted_chosen": -2.3468871116638184, + "logps/weighted_rejected": -2.966168165206909, + "loss": 0.6577, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -60.857032775878906, + "rewards/margins": 19.649999618530273, + "rewards/rejected": -80.50117492675781, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.4206695556640625, + "rewards/weighted_margins": 0.41551512479782104, + "rewards/weighted_rejected": -0.8359512090682983, + "step": 660 + }, + { + "epoch": 0.35069353572363254, + "grad_norm": 26.941055297851562, + "learning_rate": 8.217436705532599e-07, + "logits/chosen": -0.8248077630996704, + "logits/rejected": -0.8512848019599915, + "logps/chosen": -369.5484313964844, + "logps/rejected": -348.30157470703125, + "logps/weighted_chosen": -2.3807740211486816, + "logps/weighted_rejected": -3.006176710128784, + "loss": 0.6373, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -66.71875, + "rewards/margins": 17.846094131469727, + "rewards/rejected": -84.57929992675781, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.3303161561489105, + "rewards/weighted_margins": 0.45032960176467896, + "rewards/weighted_rejected": -0.7810913324356079, + "step": 670 + }, + { + "epoch": 0.3559277676001047, + "grad_norm": 1516.0828857421875, + "learning_rate": 8.14695701425284e-07, + "logits/chosen": -0.8020523190498352, + "logits/rejected": -0.845899224281311, + "logps/chosen": -371.13751220703125, + "logps/rejected": -352.38592529296875, + "logps/weighted_chosen": -2.49072265625, + "logps/weighted_rejected": -3.2708497047424316, + "loss": 0.5885, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -65.716796875, + "rewards/margins": 24.369531631469727, + "rewards/rejected": -90.1097640991211, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.330526739358902, + "rewards/weighted_margins": 0.5371948480606079, + "rewards/weighted_rejected": -0.867462158203125, + "step": 680 + }, + { + "epoch": 0.3611619994765768, + "grad_norm": 295.9232482910156, + "learning_rate": 8.075426264894046e-07, + "logits/chosen": -0.7686309814453125, + "logits/rejected": -0.805737316608429, + "logps/chosen": -370.75, + "logps/rejected": -373.64373779296875, + "logps/weighted_chosen": -2.509265184402466, + "logps/weighted_rejected": -3.719970703125, + "loss": 0.5532, + "rewards/accuracies": 0.6875, + "rewards/chosen": -65.3414077758789, + "rewards/margins": 30.711523056030273, + "rewards/rejected": -96.052734375, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -0.3384948670864105, + "rewards/weighted_margins": 0.6478027105331421, + "rewards/weighted_rejected": -0.9860213994979858, + "step": 690 + }, + { + "epoch": 0.36639623135304894, + "grad_norm": 80.06324768066406, + "learning_rate": 8.002868348145435e-07, + "logits/chosen": -0.7615035772323608, + "logits/rejected": -0.758954644203186, + "logps/chosen": -364.390625, + "logps/rejected": -353.75, + "logps/weighted_chosen": -2.562756299972534, + "logps/weighted_rejected": -2.821521043777466, + "loss": 0.6219, + "rewards/accuracies": 0.609375, + "rewards/chosen": -64.1957015991211, + "rewards/margins": 21.513866424560547, + "rewards/rejected": -85.70429992675781, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.38551026582717896, + "rewards/weighted_margins": 0.45988160371780396, + "rewards/weighted_rejected": -0.8446716070175171, + "step": 700 + }, + { + "epoch": 0.3716304632295211, + "grad_norm": 1476.4896240234375, + "learning_rate": 7.92930749776179e-07, + "logits/chosen": -0.75201416015625, + "logits/rejected": -0.7803069949150085, + "logps/chosen": -337.1851501464844, + "logps/rejected": -346.5375061035156, + "logps/weighted_chosen": -2.752087354660034, + "logps/weighted_rejected": -3.417065382003784, + "loss": 0.6452, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -65.52070617675781, + "rewards/margins": 20.681640625, + "rewards/rejected": -86.203125, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.2934509217739105, + "rewards/weighted_margins": 0.537158191204071, + "rewards/weighted_rejected": -0.8301132321357727, + "step": 710 + }, + { + "epoch": 0.3768646951059932, + "grad_norm": 37.869117736816406, + "learning_rate": 7.854768282469582e-07, + "logits/chosen": -0.814867377281189, + "logits/rejected": -0.8494598269462585, + "logps/chosen": -332.6953125, + "logps/rejected": -362.8187561035156, + "logps/weighted_chosen": -2.5288939476013184, + "logps/weighted_rejected": -3.1172118186950684, + "loss": 0.6496, + "rewards/accuracies": 0.640625, + "rewards/chosen": -55.247657775878906, + "rewards/margins": 29.8330078125, + "rewards/rejected": -85.080078125, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.355978399515152, + "rewards/weighted_margins": 0.4522338807582855, + "rewards/weighted_rejected": -0.8084503412246704, + "step": 720 + }, + { + "epoch": 0.38209892698246534, + "grad_norm": 48.1231575012207, + "learning_rate": 7.779275597761215e-07, + "logits/chosen": -0.7673202753067017, + "logits/rejected": -0.81195068359375, + "logps/chosen": -330.94451904296875, + "logps/rejected": -355.31561279296875, + "logps/weighted_chosen": -2.6014404296875, + "logps/weighted_rejected": -3.1932616233825684, + "loss": 0.5952, + "rewards/accuracies": 0.690625011920929, + "rewards/chosen": -54.890235900878906, + "rewards/margins": 31.880468368530273, + "rewards/rejected": -86.75312805175781, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.2228240966796875, + "rewards/weighted_margins": 0.6149749755859375, + "rewards/weighted_rejected": -0.8373657464981079, + "step": 730 + }, + { + "epoch": 0.38733315885893743, + "grad_norm": 296.0299377441406, + "learning_rate": 7.702854657580126e-07, + "logits/chosen": -0.8295089602470398, + "logits/rejected": -0.843798816204071, + "logps/chosen": -352.19842529296875, + "logps/rejected": -333.7593688964844, + "logps/weighted_chosen": -2.45697021484375, + "logps/weighted_rejected": -3.375244140625, + "loss": 0.6318, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -57.54804611206055, + "rewards/margins": 21.8720703125, + "rewards/rejected": -79.4625015258789, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.19748535752296448, + "rewards/weighted_margins": 0.5647827386856079, + "rewards/weighted_rejected": -0.762377917766571, + "step": 740 + }, + { + "epoch": 0.3925673907354096, + "grad_norm": 222.2068328857422, + "learning_rate": 7.625530985899547e-07, + "logits/chosen": -0.8145691156387329, + "logits/rejected": -0.8263305425643921, + "logps/chosen": -328.7578125, + "logps/rejected": -331.390625, + "logps/weighted_chosen": -2.581188917160034, + "logps/weighted_rejected": -3.505688428878784, + "loss": 0.6377, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -61.987892150878906, + "rewards/margins": 24.421483993530273, + "rewards/rejected": -86.39921569824219, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.493093878030777, + "rewards/weighted_margins": 0.4742370545864105, + "rewards/weighted_rejected": -0.9672302007675171, + "step": 750 + }, + { + "epoch": 0.39780162261188173, + "grad_norm": 34.57517623901367, + "learning_rate": 7.547330408197694e-07, + "logits/chosen": -0.8249969482421875, + "logits/rejected": -0.8720428347587585, + "logps/chosen": -363.44219970703125, + "logps/rejected": -345.2250061035156, + "logps/weighted_chosen": -2.4618163108825684, + "logps/weighted_rejected": -3.28759765625, + "loss": 0.6383, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -67.7855453491211, + "rewards/margins": 21.181835174560547, + "rewards/rejected": -88.9535140991211, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.3757568299770355, + "rewards/weighted_margins": 0.4518585205078125, + "rewards/weighted_rejected": -0.8270477056503296, + "step": 760 + }, + { + "epoch": 0.40303585448835383, + "grad_norm": 26.55402946472168, + "learning_rate": 7.468279042832271e-07, + "logits/chosen": -0.8424628973007202, + "logits/rejected": -0.8771301507949829, + "logps/chosen": -347.6484375, + "logps/rejected": -388.42266845703125, + "logps/weighted_chosen": -2.660815477371216, + "logps/weighted_rejected": -3.1720213890075684, + "loss": 0.6743, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -67.98554992675781, + "rewards/margins": 30.81640625, + "rewards/rejected": -98.75859069824219, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.5194793939590454, + "rewards/weighted_margins": 0.3566345274448395, + "rewards/weighted_rejected": -0.8764098882675171, + "step": 770 + }, + { + "epoch": 0.408270086364826, + "grad_norm": 14.42599868774414, + "learning_rate": 7.388403292317154e-07, + "logits/chosen": -0.7979522943496704, + "logits/rejected": -0.8573578000068665, + "logps/chosen": -367.4046936035156, + "logps/rejected": -358.2515563964844, + "logps/weighted_chosen": -2.490283250808716, + "logps/weighted_rejected": -3.1418213844299316, + "loss": 0.6398, + "rewards/accuracies": 0.65625, + "rewards/chosen": -66.2933578491211, + "rewards/margins": 26.066015243530273, + "rewards/rejected": -92.384765625, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.461639404296875, + "rewards/weighted_margins": 0.452981561422348, + "rewards/weighted_rejected": -0.914324939250946, + "step": 780 + }, + { + "epoch": 0.4135043182412981, + "grad_norm": 21.45990753173828, + "learning_rate": 7.307729834504154e-07, + "logits/chosen": -0.8032287359237671, + "logits/rejected": -0.8670104742050171, + "logps/chosen": -351.39532470703125, + "logps/rejected": -360.1734313964844, + "logps/weighted_chosen": -2.3328614234924316, + "logps/weighted_rejected": -3.198779344558716, + "loss": 0.6356, + "rewards/accuracies": 0.609375, + "rewards/chosen": -69.6167984008789, + "rewards/margins": 24.462499618530273, + "rewards/rejected": -94.07890319824219, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.3527267575263977, + "rewards/weighted_margins": 0.45725250244140625, + "rewards/weighted_rejected": -0.8100005984306335, + "step": 790 + }, + { + "epoch": 0.4187385501177702, + "grad_norm": 20.13976287841797, + "learning_rate": 7.226285613672847e-07, + "logits/chosen": -0.741473376750946, + "logits/rejected": -0.7820758819580078, + "logps/chosen": -342.1156311035156, + "logps/rejected": -382.6937561035156, + "logps/weighted_chosen": -2.3370361328125, + "logps/weighted_rejected": -3.3424315452575684, + "loss": 0.6236, + "rewards/accuracies": 0.703125, + "rewards/chosen": -64.8167953491211, + "rewards/margins": 39.174217224121094, + "rewards/rejected": -103.96992492675781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.34785765409469604, + "rewards/weighted_margins": 0.5167236328125, + "rewards/weighted_rejected": -0.864398181438446, + "step": 800 + }, + { + "epoch": 0.4239727819942423, + "grad_norm": 21.571788787841797, + "learning_rate": 7.144097831531398e-07, + "logits/chosen": -0.6900985836982727, + "logits/rejected": -0.7189788818359375, + "logps/chosen": -344.78436279296875, + "logps/rejected": -364.53436279296875, + "logps/weighted_chosen": -2.346606492996216, + "logps/weighted_rejected": -3.139209032058716, + "loss": 0.6171, + "rewards/accuracies": 0.640625, + "rewards/chosen": -74.72969055175781, + "rewards/margins": 25.596094131469727, + "rewards/rejected": -100.31640625, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.332855224609375, + "rewards/weighted_margins": 0.513507068157196, + "rewards/weighted_rejected": -0.8466736078262329, + "step": 810 + }, + { + "epoch": 0.42920701387071447, + "grad_norm": 23.761091232299805, + "learning_rate": 7.061193938131396e-07, + "logits/chosen": -0.620227038860321, + "logits/rejected": -0.6747413873672485, + "logps/chosen": -377.5609436035156, + "logps/rejected": -363.4296875, + "logps/weighted_chosen": -2.772265672683716, + "logps/weighted_rejected": -3.069580078125, + "loss": 0.6365, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -77.7945327758789, + "rewards/margins": 20.221874237060547, + "rewards/rejected": -97.98515319824219, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.3748535215854645, + "rewards/weighted_margins": 0.473724365234375, + "rewards/weighted_rejected": -0.8482757806777954, + "step": 820 + }, + { + "epoch": 0.4344412457471866, + "grad_norm": 21.571779251098633, + "learning_rate": 6.977601622699789e-07, + "logits/chosen": -0.689013659954071, + "logits/rejected": -0.7498534917831421, + "logps/chosen": -354.41876220703125, + "logps/rejected": -392.3500061035156, + "logps/weighted_chosen": -2.6583251953125, + "logps/weighted_rejected": -3.377002000808716, + "loss": 0.5618, + "rewards/accuracies": 0.7093750238418579, + "rewards/chosen": -69.55390930175781, + "rewards/margins": 43.986717224121094, + "rewards/rejected": -113.5484390258789, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -0.28594970703125, + "rewards/weighted_margins": 0.6694701910018921, + "rewards/weighted_rejected": -0.955474853515625, + "step": 830 + }, + { + "epoch": 0.4396754776236587, + "grad_norm": 21.845787048339844, + "learning_rate": 6.893348804390882e-07, + "logits/chosen": -0.7911956906318665, + "logits/rejected": -0.8087249994277954, + "logps/chosen": -377.0531311035156, + "logps/rejected": -377.48126220703125, + "logps/weighted_chosen": -2.844287157058716, + "logps/weighted_rejected": -3.24560546875, + "loss": 0.5927, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -81.9203109741211, + "rewards/margins": 35.79375076293945, + "rewards/rejected": -117.70625305175781, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.3613952696323395, + "rewards/weighted_margins": 0.5852203369140625, + "rewards/weighted_rejected": -0.94671630859375, + "step": 840 + }, + { + "epoch": 0.44490970950013087, + "grad_norm": 13.673724174499512, + "learning_rate": 6.808463622961578e-07, + "logits/chosen": -0.765423595905304, + "logits/rejected": -0.8230966329574585, + "logps/chosen": -385.33905029296875, + "logps/rejected": -413.21563720703125, + "logps/weighted_chosen": -2.7145752906799316, + "logps/weighted_rejected": -3.412890672683716, + "loss": 0.5718, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -92.6429672241211, + "rewards/margins": 38.67695236206055, + "rewards/rejected": -131.3136749267578, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.43825072050094604, + "rewards/weighted_margins": 0.649249255657196, + "rewards/weighted_rejected": -1.0877685546875, + "step": 850 + }, + { + "epoch": 0.45014394137660296, + "grad_norm": 17.156784057617188, + "learning_rate": 6.722974429372925e-07, + "logits/chosen": -0.733477771282196, + "logits/rejected": -0.7933975458145142, + "logps/chosen": -418.08282470703125, + "logps/rejected": -417.9937438964844, + "logps/weighted_chosen": -2.5140380859375, + "logps/weighted_rejected": -3.9316039085388184, + "loss": 0.5611, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -114.50508117675781, + "rewards/margins": 41.392189025878906, + "rewards/rejected": -155.9523468017578, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.5683807134628296, + "rewards/weighted_margins": 0.804516613483429, + "rewards/weighted_rejected": -1.373052954673767, + "step": 860 + }, + { + "epoch": 0.4553781732530751, + "grad_norm": 23.082002639770508, + "learning_rate": 6.636909776321128e-07, + "logits/chosen": -0.8063064813613892, + "logits/rejected": -0.802105724811554, + "logps/chosen": -369.12811279296875, + "logps/rejected": -413.70782470703125, + "logps/weighted_chosen": -2.945758104324341, + "logps/weighted_rejected": -3.6049561500549316, + "loss": 0.5946, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -107.4222640991211, + "rewards/margins": 41.763671875, + "rewards/rejected": -149.2078094482422, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.640515148639679, + "rewards/weighted_margins": 0.601641833782196, + "rewards/weighted_rejected": -1.2423064708709717, + "step": 870 + }, + { + "epoch": 0.46061240512954726, + "grad_norm": 27.672487258911133, + "learning_rate": 6.550298408701174e-07, + "logits/chosen": -0.778796374797821, + "logits/rejected": -0.830426037311554, + "logps/chosen": -389.8421936035156, + "logps/rejected": -428.6312561035156, + "logps/weighted_chosen": -3.13909912109375, + "logps/weighted_rejected": -4.012915134429932, + "loss": 0.6358, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -103.14042663574219, + "rewards/margins": 43.176368713378906, + "rewards/rejected": -146.2609405517578, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.5248870849609375, + "rewards/weighted_margins": 0.555926501750946, + "rewards/weighted_rejected": -1.0807831287384033, + "step": 880 + }, + { + "epoch": 0.46584663700601936, + "grad_norm": 20.153644561767578, + "learning_rate": 6.463169254006276e-07, + "logits/chosen": -0.7750915288925171, + "logits/rejected": -0.8219833374023438, + "logps/chosen": -377.8421936035156, + "logps/rejected": -378.10467529296875, + "logps/weighted_chosen": -2.7843995094299316, + "logps/weighted_rejected": -3.576098680496216, + "loss": 0.5705, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -96.46601867675781, + "rewards/margins": 34.888282775878906, + "rewards/rejected": -131.3464813232422, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.39057618379592896, + "rewards/weighted_margins": 0.706072986125946, + "rewards/weighted_rejected": -1.0967223644256592, + "step": 890 + }, + { + "epoch": 0.4710808688824915, + "grad_norm": 42.77175521850586, + "learning_rate": 6.375551412666326e-07, + "logits/chosen": -0.7759063839912415, + "logits/rejected": -0.8005096316337585, + "logps/chosen": -379.19219970703125, + "logps/rejected": -388.72344970703125, + "logps/weighted_chosen": -2.5501952171325684, + "logps/weighted_rejected": -3.5579466819763184, + "loss": 0.6503, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -105.25859069824219, + "rewards/margins": 27.150781631469727, + "rewards/rejected": -132.42733764648438, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.5587005615234375, + "rewards/weighted_margins": 0.5813232660293579, + "rewards/weighted_rejected": -1.1405792236328125, + "step": 900 + }, + { + "epoch": 0.4763151007589636, + "grad_norm": 24.800992965698242, + "learning_rate": 6.287474148328583e-07, + "logits/chosen": -0.7202819585800171, + "logits/rejected": -0.7240753173828125, + "logps/chosen": -371.1812438964844, + "logps/rejected": -371.109375, + "logps/weighted_chosen": -2.950915575027466, + "logps/weighted_rejected": -4.175073146820068, + "loss": 0.6282, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -100.2894515991211, + "rewards/margins": 23.904687881469727, + "rewards/rejected": -124.20625305175781, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.5755615234375, + "rewards/weighted_margins": 0.5273803472518921, + "rewards/weighted_rejected": -1.1032683849334717, + "step": 910 + }, + { + "epoch": 0.48154933263543576, + "grad_norm": 18.644733428955078, + "learning_rate": 6.198966878083857e-07, + "logits/chosen": -0.7572265863418579, + "logits/rejected": -0.7787246704101562, + "logps/chosen": -368.4359436035156, + "logps/rejected": -402.46563720703125, + "logps/weighted_chosen": -2.8515868186950684, + "logps/weighted_rejected": -3.4952635765075684, + "loss": 0.6159, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -97.32890319824219, + "rewards/margins": 38.184959411621094, + "rewards/rejected": -135.45703125, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.4532226622104645, + "rewards/weighted_margins": 0.559436023235321, + "rewards/weighted_rejected": -1.012457251548767, + "step": 920 + }, + { + "epoch": 0.48678356451190785, + "grad_norm": 44.07575988769531, + "learning_rate": 6.110059162641439e-07, + "logits/chosen": -0.7723480463027954, + "logits/rejected": -0.802471935749054, + "logps/chosen": -375.984375, + "logps/rejected": -391.7906188964844, + "logps/weighted_chosen": -2.391467332839966, + "logps/weighted_rejected": -3.1367430686950684, + "loss": 0.6244, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -93.6128921508789, + "rewards/margins": 30.975391387939453, + "rewards/rejected": -124.58906555175781, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.4246048033237457, + "rewards/weighted_margins": 0.47894287109375, + "rewards/weighted_rejected": -0.904034435749054, + "step": 930 + }, + { + "epoch": 0.49201779638838, + "grad_norm": 26.321582794189453, + "learning_rate": 6.020780696456059e-07, + "logits/chosen": -0.7484909296035767, + "logits/rejected": -0.7926574945449829, + "logps/chosen": -359.21875, + "logps/rejected": -411.1890563964844, + "logps/weighted_chosen": -2.2330689430236816, + "logps/weighted_rejected": -3.3442625999450684, + "loss": 0.5653, + "rewards/accuracies": 0.6875, + "rewards/chosen": -88.5511703491211, + "rewards/margins": 55.419921875, + "rewards/rejected": -143.99453735351562, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.47887879610061646, + "rewards/weighted_margins": 0.6291259527206421, + "rewards/weighted_rejected": -1.1078612804412842, + "step": 940 + }, + { + "epoch": 0.49725202826485215, + "grad_norm": 44.13637924194336, + "learning_rate": 5.931161297810185e-07, + "logits/chosen": -0.8126861453056335, + "logits/rejected": -0.829357922077179, + "logps/chosen": -376.6875, + "logps/rejected": -399.51251220703125, + "logps/weighted_chosen": -3.1253294944763184, + "logps/weighted_rejected": -3.8741211891174316, + "loss": 0.6461, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -103.4652328491211, + "rewards/margins": 34.713279724121094, + "rewards/rejected": -138.1457061767578, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.616656482219696, + "rewards/weighted_margins": 0.5488006472587585, + "rewards/weighted_rejected": -1.1659362316131592, + "step": 950 + }, + { + "epoch": 0.5024862601413242, + "grad_norm": 24.634550094604492, + "learning_rate": 5.841230898854959e-07, + "logits/chosen": -0.742846667766571, + "logits/rejected": -0.765765368938446, + "logps/chosen": -421.84686279296875, + "logps/rejected": -429.4312438964844, + "logps/weighted_chosen": -2.9749999046325684, + "logps/weighted_rejected": -3.778076171875, + "loss": 0.6955, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -121.6539077758789, + "rewards/margins": 46.04961013793945, + "rewards/rejected": -167.69375610351562, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.820935070514679, + "rewards/weighted_margins": 0.548413097858429, + "rewards/weighted_rejected": -1.369299292564392, + "step": 960 + }, + { + "epoch": 0.5077204920177963, + "grad_norm": 28.748939514160156, + "learning_rate": 5.751019535613102e-07, + "logits/chosen": -0.6985992193222046, + "logits/rejected": -0.7225921750068665, + "logps/chosen": -365.7984313964844, + "logps/rejected": -400.90625, + "logps/weighted_chosen": -2.9286131858825684, + "logps/weighted_rejected": -4.021093845367432, + "loss": 0.6325, + "rewards/accuracies": 0.684374988079071, + "rewards/chosen": -106.0390625, + "rewards/margins": 47.953514099121094, + "rewards/rejected": -153.97265625, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.7157821655273438, + "rewards/weighted_margins": 0.7237914800643921, + "rewards/weighted_rejected": -1.439599633216858, + "step": 970 + }, + { + "epoch": 0.5129547238942685, + "grad_norm": 15.98474407196045, + "learning_rate": 5.660557337947117e-07, + "logits/chosen": -0.6841033697128296, + "logits/rejected": -0.6997619867324829, + "logps/chosen": -409.1937561035156, + "logps/rejected": -406.05938720703125, + "logps/weighted_chosen": -2.480639696121216, + "logps/weighted_rejected": -3.3584961891174316, + "loss": 0.5997, + "rewards/accuracies": 0.640625, + "rewards/chosen": -114.5328140258789, + "rewards/margins": 34.099998474121094, + "rewards/rejected": -148.6570281982422, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.572741687297821, + "rewards/weighted_margins": 0.527575671672821, + "rewards/weighted_rejected": -1.100128173828125, + "step": 980 + }, + { + "epoch": 0.5181889557707406, + "grad_norm": 41.099185943603516, + "learning_rate": 5.569874519496174e-07, + "logits/chosen": -0.7119758725166321, + "logits/rejected": -0.7671966552734375, + "logps/chosen": -381.44842529296875, + "logps/rejected": -410.5015563964844, + "logps/weighted_chosen": -2.8494019508361816, + "logps/weighted_rejected": -3.8323974609375, + "loss": 0.6259, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -106.146484375, + "rewards/margins": 37.33867263793945, + "rewards/rejected": -143.4011688232422, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.4599502682685852, + "rewards/weighted_margins": 0.6096404790878296, + "rewards/weighted_rejected": -1.070257544517517, + "step": 990 + }, + { + "epoch": 0.5234231876472127, + "grad_norm": 34.9498176574707, + "learning_rate": 5.47900136758499e-07, + "logits/chosen": -0.6499813199043274, + "logits/rejected": -0.7194549441337585, + "logps/chosen": -369.4429626464844, + "logps/rejected": -382.0953063964844, + "logps/weighted_chosen": -2.7041993141174316, + "logps/weighted_rejected": -3.534008741378784, + "loss": 0.5974, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -101.8080062866211, + "rewards/margins": 38.08086013793945, + "rewards/rejected": -139.9406280517578, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.44673460721969604, + "rewards/weighted_margins": 0.6419677734375, + "rewards/weighted_rejected": -1.08880615234375, + "step": 1000 + }, + { + "epoch": 0.5234231876472127, + "eval_logits/chosen": -0.7614516615867615, + "eval_logits/rejected": -0.7845029234886169, + "eval_logps/chosen": -403.2145080566406, + "eval_logps/rejected": -419.8420104980469, + "eval_logps/weighted_chosen": -2.8744430541992188, + "eval_logps/weighted_rejected": -3.6894454956054688, + "eval_loss": 0.6146492958068848, + "eval_rewards/accuracies": 0.6269999742507935, + "eval_rewards/chosen": -114.45649719238281, + "eval_rewards/margins": 37.66427993774414, + "eval_rewards/rejected": -152.1232452392578, + "eval_rewards/weighted_accuracies": 0.6679999828338623, + "eval_rewards/weighted_chosen": -0.6033662557601929, + "eval_rewards/weighted_margins": 0.5922585129737854, + "eval_rewards/weighted_rejected": -1.195624828338623, + "eval_runtime": 1076.2039, + "eval_samples_per_second": 1.858, + "eval_steps_per_second": 0.465, + "step": 1000 + }, + { + "epoch": 0.528657419523685, + "grad_norm": 27.505380630493164, + "learning_rate": 5.387968233108113e-07, + "logits/chosen": -0.6624916195869446, + "logits/rejected": -0.6601654291152954, + "logps/chosen": -405.12188720703125, + "logps/rejected": -412.67498779296875, + "logps/weighted_chosen": -3.1322021484375, + "logps/weighted_rejected": -3.950244188308716, + "loss": 0.6199, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -121.03202819824219, + "rewards/margins": 36.890625, + "rewards/rejected": -157.9210968017578, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -0.70098876953125, + "rewards/weighted_margins": 0.5645721554756165, + "rewards/weighted_rejected": -1.2653167247772217, + "step": 1010 + }, + { + "epoch": 0.533891651400157, + "grad_norm": 79.68768310546875, + "learning_rate": 5.296805520392962e-07, + "logits/chosen": -0.7723739743232727, + "logits/rejected": -0.8210830688476562, + "logps/chosen": -450.1703186035156, + "logps/rejected": -454.51092529296875, + "logps/weighted_chosen": -2.5447754859924316, + "logps/weighted_rejected": -3.6876220703125, + "loss": 0.6481, + "rewards/accuracies": 0.625, + "rewards/chosen": -137.76171875, + "rewards/margins": 34.125, + "rewards/rejected": -171.921875, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -0.7918975949287415, + "rewards/weighted_margins": 0.5129028558731079, + "rewards/weighted_rejected": -1.3054625988006592, + "step": 1020 + }, + { + "epoch": 0.5391258832766291, + "grad_norm": 20.471067428588867, + "learning_rate": 5.205543677045049e-07, + "logits/chosen": -0.6790481805801392, + "logits/rejected": -0.7319175601005554, + "logps/chosen": -398.40313720703125, + "logps/rejected": -404.73126220703125, + "logps/weighted_chosen": -3.0057005882263184, + "logps/weighted_rejected": -3.9285340309143066, + "loss": 0.5668, + "rewards/accuracies": 0.640625, + "rewards/chosen": -124.0816421508789, + "rewards/margins": 35.26093673706055, + "rewards/rejected": -159.30624389648438, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.6341613531112671, + "rewards/weighted_margins": 0.74554443359375, + "rewards/weighted_rejected": -1.380334496498108, + "step": 1030 + }, + { + "epoch": 0.5443601151531012, + "grad_norm": 39.27846908569336, + "learning_rate": 5.114213183778697e-07, + "logits/chosen": -0.7818267941474915, + "logits/rejected": -0.8340057134628296, + "logps/chosen": -415.5874938964844, + "logps/rejected": -452.16876220703125, + "logps/weighted_chosen": -3.5662598609924316, + "logps/weighted_rejected": -4.276171684265137, + "loss": 0.569, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -135.00546264648438, + "rewards/margins": 60.15546798706055, + "rewards/rejected": -195.2058563232422, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -0.781695544719696, + "rewards/weighted_margins": 0.810791015625, + "rewards/weighted_rejected": -1.592443823814392, + "step": 1040 + }, + { + "epoch": 0.5495943470295734, + "grad_norm": 91.79661560058594, + "learning_rate": 5.022844544236754e-07, + "logits/chosen": -0.6607025265693665, + "logits/rejected": -0.6952179074287415, + "logps/chosen": -432.9281311035156, + "logps/rejected": -456.8125, + "logps/weighted_chosen": -3.3310546875, + "logps/weighted_rejected": -4.275341987609863, + "loss": 0.6363, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -152.73593139648438, + "rewards/margins": 49.055274963378906, + "rewards/rejected": -201.7624969482422, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.858135998249054, + "rewards/weighted_margins": 0.720965564250946, + "rewards/weighted_rejected": -1.578680396080017, + "step": 1050 + }, + { + "epoch": 0.5548285789060455, + "grad_norm": 22.53008270263672, + "learning_rate": 4.931468274802608e-07, + "logits/chosen": -0.6346206665039062, + "logits/rejected": -0.672467052936554, + "logps/chosen": -418.01251220703125, + "logps/rejected": -446.93280029296875, + "logps/weighted_chosen": -2.7108397483825684, + "logps/weighted_rejected": -3.727123975753784, + "loss": 0.6256, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -135.76327514648438, + "rewards/margins": 50.416015625, + "rewards/rejected": -186.1906280517578, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.803393542766571, + "rewards/weighted_margins": 0.617443859577179, + "rewards/weighted_rejected": -1.420556664466858, + "step": 1060 + }, + { + "epoch": 0.5600628107825176, + "grad_norm": 26.215744018554688, + "learning_rate": 4.840114894407974e-07, + "logits/chosen": -0.6653366088867188, + "logits/rejected": -0.712872326374054, + "logps/chosen": -398.4820251464844, + "logps/rejected": -414.1234436035156, + "logps/weighted_chosen": -3.0914406776428223, + "logps/weighted_rejected": -3.4233641624450684, + "loss": 0.6361, + "rewards/accuracies": 0.65625, + "rewards/chosen": -122.33203125, + "rewards/margins": 40.347267150878906, + "rewards/rejected": -162.654296875, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.7576019167900085, + "rewards/weighted_margins": 0.5621368288993835, + "rewards/weighted_rejected": -1.3194916248321533, + "step": 1070 + }, + { + "epoch": 0.5652970426589898, + "grad_norm": 29.65507698059082, + "learning_rate": 4.748814914339811e-07, + "logits/chosen": -0.6571930050849915, + "logits/rejected": -0.6954696774482727, + "logps/chosen": -399.234375, + "logps/rejected": -414.0953063964844, + "logps/weighted_chosen": -2.943469285964966, + "logps/weighted_rejected": -3.2953858375549316, + "loss": 0.6196, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -114.6865234375, + "rewards/margins": 44.128517150878906, + "rewards/rejected": -158.8156280517578, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.5992889404296875, + "rewards/weighted_margins": 0.569287121295929, + "rewards/weighted_rejected": -1.1684296131134033, + "step": 1080 + }, + { + "epoch": 0.5705312745354619, + "grad_norm": 18.823802947998047, + "learning_rate": 4.657598828049801e-07, + "logits/chosen": -0.712292492389679, + "logits/rejected": -0.7842041254043579, + "logps/chosen": -409.9398498535156, + "logps/rejected": -442.9085998535156, + "logps/weighted_chosen": -2.665234327316284, + "logps/weighted_rejected": -3.152148485183716, + "loss": 0.6048, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -119.8671875, + "rewards/margins": 42.529296875, + "rewards/rejected": -162.3874969482422, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.5373901128768921, + "rewards/weighted_margins": 0.5821533203125, + "rewards/weighted_rejected": -1.1196105480194092, + "step": 1090 + }, + { + "epoch": 0.575765506411934, + "grad_norm": 30.54826545715332, + "learning_rate": 4.566497100969792e-07, + "logits/chosen": -0.6948333978652954, + "logits/rejected": -0.7300826907157898, + "logps/chosen": -466.7093811035156, + "logps/rejected": -488.1484375, + "logps/weighted_chosen": -3.112719774246216, + "logps/weighted_rejected": -3.795117139816284, + "loss": 0.6565, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -144.0187530517578, + "rewards/margins": 45.876953125, + "rewards/rejected": -189.8953094482422, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.7201172113418579, + "rewards/weighted_margins": 0.6079467535018921, + "rewards/weighted_rejected": -1.328240990638733, + "step": 1100 + }, + { + "epoch": 0.5809997382884062, + "grad_norm": 34.50307083129883, + "learning_rate": 4.475540160336576e-07, + "logits/chosen": -0.6901916265487671, + "logits/rejected": -0.7613769769668579, + "logps/chosen": -435.2484436035156, + "logps/rejected": -444.1171875, + "logps/weighted_chosen": -3.2200684547424316, + "logps/weighted_rejected": -4.114941596984863, + "loss": 0.5873, + "rewards/accuracies": 0.640625, + "rewards/chosen": -137.34335327148438, + "rewards/margins": 48.84492111206055, + "rewards/rejected": -186.18984985351562, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.648083508014679, + "rewards/weighted_margins": 0.723559558391571, + "rewards/weighted_rejected": -1.3714599609375, + "step": 1110 + }, + { + "epoch": 0.5862339701648783, + "grad_norm": 39.40055465698242, + "learning_rate": 4.3847583850294565e-07, + "logits/chosen": -0.6873275637626648, + "logits/rejected": -0.7066406011581421, + "logps/chosen": -451.85467529296875, + "logps/rejected": -466.85626220703125, + "logps/weighted_chosen": -3.4102540016174316, + "logps/weighted_rejected": -3.818554639816284, + "loss": 0.604, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -160.84024047851562, + "rewards/margins": 46.264060974121094, + "rewards/rejected": -207.0906219482422, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.7199951410293579, + "rewards/weighted_margins": 0.6950317621231079, + "rewards/weighted_rejected": -1.414831519126892, + "step": 1120 + }, + { + "epoch": 0.5914682020413504, + "grad_norm": 21.84746551513672, + "learning_rate": 4.294182095423934e-07, + "logits/chosen": -0.654815673828125, + "logits/rejected": -0.7277862429618835, + "logps/chosen": -414.6421813964844, + "logps/rejected": -445.29376220703125, + "logps/weighted_chosen": -2.712451219558716, + "logps/weighted_rejected": -3.660595655441284, + "loss": 0.5936, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -128.95938110351562, + "rewards/margins": 55.48320388793945, + "rewards/rejected": -184.46875, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.6224609613418579, + "rewards/weighted_margins": 0.676257312297821, + "rewards/weighted_rejected": -1.2982666492462158, + "step": 1130 + }, + { + "epoch": 0.5967024339178225, + "grad_norm": 24.940584182739258, + "learning_rate": 4.20384154326496e-07, + "logits/chosen": -0.624951183795929, + "logits/rejected": -0.700488269329071, + "logps/chosen": -379.2828063964844, + "logps/rejected": -386.80938720703125, + "logps/weighted_chosen": -2.8125243186950684, + "logps/weighted_rejected": -3.7369627952575684, + "loss": 0.6115, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -118.71406555175781, + "rewards/margins": 32.34589767456055, + "rewards/rejected": -151.05078125, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.5637878179550171, + "rewards/weighted_margins": 0.579620361328125, + "rewards/weighted_rejected": -1.1440308094024658, + "step": 1140 + }, + { + "epoch": 0.6019366657942947, + "grad_norm": 23.36368179321289, + "learning_rate": 4.1137669015630863e-07, + "logits/chosen": -0.641278088092804, + "logits/rejected": -0.70306396484375, + "logps/chosen": -415.4234313964844, + "logps/rejected": -448.7046813964844, + "logps/weighted_chosen": -2.616198778152466, + "logps/weighted_rejected": -3.3738036155700684, + "loss": 0.6451, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -119.44921875, + "rewards/margins": 45.560546875, + "rewards/rejected": -164.9968719482422, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.5660766363143921, + "rewards/weighted_margins": 0.524096667766571, + "rewards/weighted_rejected": -1.090057373046875, + "step": 1150 + }, + { + "epoch": 0.6071708976707668, + "grad_norm": 18.298032760620117, + "learning_rate": 4.023988254516943e-07, + "logits/chosen": -0.6434844732284546, + "logits/rejected": -0.716784656047821, + "logps/chosen": -421.26873779296875, + "logps/rejected": -432.30780029296875, + "logps/weighted_chosen": -3.1639037132263184, + "logps/weighted_rejected": -3.5290770530700684, + "loss": 0.5673, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -120.2816390991211, + "rewards/margins": 44.93476486206055, + "rewards/rejected": -165.24649047851562, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -0.48875731229782104, + "rewards/weighted_margins": 0.6547485589981079, + "rewards/weighted_rejected": -1.1428101062774658, + "step": 1160 + }, + { + "epoch": 0.6124051295472389, + "grad_norm": 14.904504776000977, + "learning_rate": 3.9345355874653366e-07, + "logits/chosen": -0.661773681640625, + "logits/rejected": -0.694866955280304, + "logps/chosen": -423.79217529296875, + "logps/rejected": -409.97344970703125, + "logps/weighted_chosen": -2.8211731910705566, + "logps/weighted_rejected": -3.5768675804138184, + "loss": 0.6323, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -126.34375, + "rewards/margins": 33.49296951293945, + "rewards/rejected": -159.88827514648438, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.597332775592804, + "rewards/weighted_margins": 0.6163696050643921, + "rewards/weighted_rejected": -1.2137069702148438, + "step": 1170 + }, + { + "epoch": 0.6176393614237111, + "grad_norm": 36.4361572265625, + "learning_rate": 3.8454387768724157e-07, + "logits/chosen": -0.7368072271347046, + "logits/rejected": -0.7342742681503296, + "logps/chosen": -395.6109313964844, + "logps/rejected": -391.0609436035156, + "logps/weighted_chosen": -3.109619140625, + "logps/weighted_rejected": -3.850390672683716, + "loss": 0.6065, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -122.6927719116211, + "rewards/margins": 37.65546798706055, + "rewards/rejected": -160.3816375732422, + "rewards/weighted_accuracies": 0.625, + "rewards/weighted_chosen": -0.5655761957168579, + "rewards/weighted_margins": 0.675152599811554, + "rewards/weighted_rejected": -1.2410064935684204, + "step": 1180 + }, + { + "epoch": 0.6228735933001832, + "grad_norm": 25.73611068725586, + "learning_rate": 3.7567275803491525e-07, + "logits/chosen": -0.7339996099472046, + "logits/rejected": -0.7955261468887329, + "logps/chosen": -432.7734375, + "logps/rejected": -440.52813720703125, + "logps/weighted_chosen": -2.832934617996216, + "logps/weighted_rejected": -4.004590034484863, + "loss": 0.556, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -130.6726531982422, + "rewards/margins": 56.34843826293945, + "rewards/rejected": -186.99063110351562, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -0.598583996295929, + "rewards/weighted_margins": 0.8679443597793579, + "rewards/weighted_rejected": -1.4669402837753296, + "step": 1190 + }, + { + "epoch": 0.6281078251766553, + "grad_norm": 16.663801193237305, + "learning_rate": 3.66843162671456e-07, + "logits/chosen": -0.724108874797821, + "logits/rejected": -0.7493377923965454, + "logps/chosen": -403.37188720703125, + "logps/rejected": -458.7109375, + "logps/weighted_chosen": -3.391552686691284, + "logps/weighted_rejected": -3.7638916969299316, + "loss": 0.6456, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -129.22500610351562, + "rewards/margins": 57.38671875, + "rewards/rejected": -186.65859985351562, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.7329651117324829, + "rewards/weighted_margins": 0.6716979742050171, + "rewards/weighted_rejected": -1.404699683189392, + "step": 1200 + }, + { + "epoch": 0.6333420570531274, + "grad_norm": 112.89408111572266, + "learning_rate": 3.5805804060998924e-07, + "logits/chosen": -0.7207000851631165, + "logits/rejected": -0.7580230832099915, + "logps/chosen": -420.49530029296875, + "logps/rejected": -461.3656311035156, + "logps/weighted_chosen": -2.5892090797424316, + "logps/weighted_rejected": -3.7215576171875, + "loss": 0.5822, + "rewards/accuracies": 0.6875, + "rewards/chosen": -119.79765319824219, + "rewards/margins": 70.79374694824219, + "rewards/rejected": -190.6042938232422, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -0.6828033328056335, + "rewards/weighted_margins": 0.7842346429824829, + "rewards/weighted_rejected": -1.4667174816131592, + "step": 1210 + }, + { + "epoch": 0.6385762889295996, + "grad_norm": 54.26150894165039, + "learning_rate": 3.493203260099197e-07, + "logits/chosen": -0.7219696044921875, + "logits/rejected": -0.7961196899414062, + "logps/chosen": -430.78436279296875, + "logps/rejected": -452.4828186035156, + "logps/weighted_chosen": -2.7947998046875, + "logps/weighted_rejected": -3.781079053878784, + "loss": 0.6434, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -143.125, + "rewards/margins": 36.7109375, + "rewards/rejected": -179.82852172851562, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.879199206829071, + "rewards/weighted_margins": 0.549389660358429, + "rewards/weighted_rejected": -1.428198218345642, + "step": 1220 + }, + { + "epoch": 0.6438105208060717, + "grad_norm": 38.24674606323242, + "learning_rate": 3.4063293719694407e-07, + "logits/chosen": -0.702880859375, + "logits/rejected": -0.7701690793037415, + "logps/chosen": -408.3453063964844, + "logps/rejected": -438.4671936035156, + "logps/weighted_chosen": -3.1091065406799316, + "logps/weighted_rejected": -4.008740425109863, + "loss": 0.662, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -131.74609375, + "rewards/margins": 49.351173400878906, + "rewards/rejected": -181.07186889648438, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.8140197992324829, + "rewards/weighted_margins": 0.6572936773300171, + "rewards/weighted_rejected": -1.471563696861267, + "step": 1230 + }, + { + "epoch": 0.6490447526825438, + "grad_norm": 22.835683822631836, + "learning_rate": 3.319987756883559e-07, + "logits/chosen": -0.7557929754257202, + "logits/rejected": -0.8191741704940796, + "logps/chosen": -408.5484313964844, + "logps/rejected": -464.4468688964844, + "logps/weighted_chosen": -2.7848753929138184, + "logps/weighted_rejected": -3.874755859375, + "loss": 0.5519, + "rewards/accuracies": 0.6875, + "rewards/chosen": -130.79296875, + "rewards/margins": 64.52461242675781, + "rewards/rejected": -195.3078155517578, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -0.692169189453125, + "rewards/weighted_margins": 0.828686535358429, + "rewards/weighted_rejected": -1.52044677734375, + "step": 1240 + }, + { + "epoch": 0.654278984559016, + "grad_norm": 23.04342269897461, + "learning_rate": 3.234207252239607e-07, + "logits/chosen": -0.7066299319267273, + "logits/rejected": -0.7571166753768921, + "logps/chosen": -432.0093688964844, + "logps/rejected": -439.5257873535156, + "logps/weighted_chosen": -3.251415967941284, + "logps/weighted_rejected": -4.0426025390625, + "loss": 0.6115, + "rewards/accuracies": 0.609375, + "rewards/chosen": -139.2062530517578, + "rewards/margins": 47.450782775878906, + "rewards/rejected": -186.681640625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.844378650188446, + "rewards/weighted_margins": 0.693859875202179, + "rewards/weighted_rejected": -1.53778076171875, + "step": 1250 + }, + { + "epoch": 0.6595132164354881, + "grad_norm": 20.90934944152832, + "learning_rate": 3.1490165080293175e-07, + "logits/chosen": -0.695568859577179, + "logits/rejected": -0.7663360834121704, + "logps/chosen": -364.7124938964844, + "logps/rejected": -407.20623779296875, + "logps/weighted_chosen": -2.688647508621216, + "logps/weighted_rejected": -3.5754151344299316, + "loss": 0.5785, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -97.32929992675781, + "rewards/margins": 46.430274963378906, + "rewards/rejected": -143.73672485351562, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -0.6165847778320312, + "rewards/weighted_margins": 0.731945812702179, + "rewards/weighted_rejected": -1.3484008312225342, + "step": 1260 + }, + { + "epoch": 0.6647474483119602, + "grad_norm": 16.100906372070312, + "learning_rate": 3.06444397726922e-07, + "logits/chosen": -0.6802154779434204, + "logits/rejected": -0.7747405767440796, + "logps/chosen": -391.58905029296875, + "logps/rejected": -414.72186279296875, + "logps/weighted_chosen": -2.6523680686950684, + "logps/weighted_rejected": -4.031542778015137, + "loss": 0.5494, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -95.9515609741211, + "rewards/margins": 56.62031173706055, + "rewards/rejected": -152.6218719482422, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.4894607663154602, + "rewards/weighted_margins": 0.908905029296875, + "rewards/weighted_rejected": -1.3983032703399658, + "step": 1270 + }, + { + "epoch": 0.6699816801884323, + "grad_norm": 54.34008026123047, + "learning_rate": 2.980517906497586e-07, + "logits/chosen": -0.7491426467895508, + "logits/rejected": -0.8174285888671875, + "logps/chosen": -384.3046875, + "logps/rejected": -423.42657470703125, + "logps/weighted_chosen": -2.9336180686950684, + "logps/weighted_rejected": -4.224340915679932, + "loss": 0.5857, + "rewards/accuracies": 0.715624988079071, + "rewards/chosen": -91.23359680175781, + "rewards/margins": 60.244529724121094, + "rewards/rejected": -151.47421264648438, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -0.6464203000068665, + "rewards/weighted_margins": 0.8068603277206421, + "rewards/weighted_rejected": -1.454095482826233, + "step": 1280 + }, + { + "epoch": 0.6752159120649045, + "grad_norm": 19.96272087097168, + "learning_rate": 2.89726632634029e-07, + "logits/chosen": -0.7757827639579773, + "logits/rejected": -0.8287597894668579, + "logps/chosen": -393.6968688964844, + "logps/rejected": -413.81719970703125, + "logps/weighted_chosen": -2.6946778297424316, + "logps/weighted_rejected": -3.6367554664611816, + "loss": 0.6337, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -119.30390930175781, + "rewards/margins": 39.421875, + "rewards/rejected": -158.71484375, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -0.68804931640625, + "rewards/weighted_margins": 0.6822723150253296, + "rewards/weighted_rejected": -1.3703644275665283, + "step": 1290 + }, + { + "epoch": 0.6804501439413766, + "grad_norm": 103.50169372558594, + "learning_rate": 2.814717042148827e-07, + "logits/chosen": -0.795379638671875, + "logits/rejected": -0.827313244342804, + "logps/chosen": -370.70782470703125, + "logps/rejected": -407.5078125, + "logps/weighted_chosen": -3.238085985183716, + "logps/weighted_rejected": -3.6040282249450684, + "loss": 0.6676, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -96.85546875, + "rewards/margins": 49.802734375, + "rewards/rejected": -146.69296264648438, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.47616881132125854, + "rewards/weighted_margins": 0.5577331781387329, + "rewards/weighted_rejected": -1.0332458019256592, + "step": 1300 + }, + { + "epoch": 0.6856843758178487, + "grad_norm": 16.826086044311523, + "learning_rate": 2.7328976247135416e-07, + "logits/chosen": -0.775390625, + "logits/rejected": -0.84033203125, + "logps/chosen": -377.55157470703125, + "logps/rejected": -383.8343811035156, + "logps/weighted_chosen": -2.696847438812256, + "logps/weighted_rejected": -3.4454588890075684, + "loss": 0.6125, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -87.9710922241211, + "rewards/margins": 43.82929611206055, + "rewards/rejected": -131.8117218017578, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.4756942689418793, + "rewards/weighted_margins": 0.5390655398368835, + "rewards/weighted_rejected": -1.014410376548767, + "step": 1310 + }, + { + "epoch": 0.6909186076943209, + "grad_norm": 22.221208572387695, + "learning_rate": 2.651835401055217e-07, + "logits/chosen": -0.7605789303779602, + "logits/rejected": -0.7902587652206421, + "logps/chosen": -368.7953186035156, + "logps/rejected": -401.0375061035156, + "logps/weighted_chosen": -2.715527296066284, + "logps/weighted_rejected": -3.181933641433716, + "loss": 0.6434, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -91.14668273925781, + "rewards/margins": 44.18671798706055, + "rewards/rejected": -135.3484344482422, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.495400995016098, + "rewards/weighted_margins": 0.4389404356479645, + "rewards/weighted_rejected": -0.9345031976699829, + "step": 1320 + }, + { + "epoch": 0.696152839570793, + "grad_norm": 38.99746322631836, + "learning_rate": 2.571557445298055e-07, + "logits/chosen": -0.7685943841934204, + "logits/rejected": -0.8259338140487671, + "logps/chosen": -371.0375061035156, + "logps/rejected": -396.23748779296875, + "logps/weighted_chosen": -2.6904540061950684, + "logps/weighted_rejected": -3.423779249191284, + "loss": 0.6416, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -92.7613296508789, + "rewards/margins": 39.52460861206055, + "rewards/rejected": -132.27969360351562, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -0.46378785371780396, + "rewards/weighted_margins": 0.543591320514679, + "rewards/weighted_rejected": -1.0074951648712158, + "step": 1330 + }, + { + "epoch": 0.7013870714472651, + "grad_norm": 22.136323928833008, + "learning_rate": 2.49209056962716e-07, + "logits/chosen": -0.813885509967804, + "logits/rejected": -0.8644195795059204, + "logps/chosen": -422.7875061035156, + "logps/rejected": -420.34063720703125, + "logps/weighted_chosen": -2.8973631858825684, + "logps/weighted_rejected": -3.8612303733825684, + "loss": 0.5794, + "rewards/accuracies": 0.6875, + "rewards/chosen": -108.18281555175781, + "rewards/margins": 45.218360900878906, + "rewards/rejected": -153.408203125, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.47807615995407104, + "rewards/weighted_margins": 0.652331531047821, + "rewards/weighted_rejected": -1.13037109375, + "step": 1340 + }, + { + "epoch": 0.7066213033237373, + "grad_norm": 31.176504135131836, + "learning_rate": 2.41346131533347e-07, + "logits/chosen": -0.8553321957588196, + "logits/rejected": -0.884033203125, + "logps/chosen": -441.421875, + "logps/rejected": -453.1328125, + "logps/weighted_chosen": -2.5696043968200684, + "logps/weighted_rejected": -3.671630859375, + "loss": 0.612, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -134.77304077148438, + "rewards/margins": 43.71503829956055, + "rewards/rejected": -178.5226593017578, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -0.674908459186554, + "rewards/weighted_margins": 0.633074939250946, + "rewards/weighted_rejected": -1.3082733154296875, + "step": 1350 + }, + { + "epoch": 0.7118555352002094, + "grad_norm": 32.60160827636719, + "learning_rate": 2.3356959439491898e-07, + "logits/chosen": -0.7793235778808594, + "logits/rejected": -0.860076904296875, + "logps/chosen": -410.12030029296875, + "logps/rejected": -439.12030029296875, + "logps/weighted_chosen": -3.4254393577575684, + "logps/weighted_rejected": -4.135571479797363, + "loss": 0.5926, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -140.2078094482422, + "rewards/margins": 56.835548400878906, + "rewards/rejected": -196.984375, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.729571521282196, + "rewards/weighted_margins": 0.86962890625, + "rewards/weighted_rejected": -1.5993865728378296, + "step": 1360 + }, + { + "epoch": 0.7170897670766815, + "grad_norm": 14.313425064086914, + "learning_rate": 2.258820428476645e-07, + "logits/chosen": -0.8218353390693665, + "logits/rejected": -0.8879333734512329, + "logps/chosen": -424.1937561035156, + "logps/rejected": -486.390625, + "logps/weighted_chosen": -2.883496046066284, + "logps/weighted_rejected": -3.5981202125549316, + "loss": 0.5838, + "rewards/accuracies": 0.625, + "rewards/chosen": -145.17147827148438, + "rewards/margins": 71.9976577758789, + "rewards/rejected": -217.15390014648438, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.767047107219696, + "rewards/weighted_margins": 0.671905517578125, + "rewards/weighted_rejected": -1.438879370689392, + "step": 1370 + }, + { + "epoch": 0.7223239989531536, + "grad_norm": 34.26926040649414, + "learning_rate": 2.1828604447135245e-07, + "logits/chosen": -0.7955230474472046, + "logits/rejected": -0.8580917119979858, + "logps/chosen": -472.06561279296875, + "logps/rejected": -517.7249755859375, + "logps/weighted_chosen": -3.485278367996216, + "logps/weighted_rejected": -4.529760837554932, + "loss": 0.6311, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -202.09805297851562, + "rewards/margins": 58.64374923706055, + "rewards/rejected": -260.86248779296875, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -0.9545013308525085, + "rewards/weighted_margins": 0.7419983148574829, + "rewards/weighted_rejected": -1.6971924304962158, + "step": 1380 + }, + { + "epoch": 0.7275582308296258, + "grad_norm": 36.47634506225586, + "learning_rate": 2.1078413626773545e-07, + "logits/chosen": -0.8125702142715454, + "logits/rejected": -0.859088122844696, + "logps/chosen": -455.5843811035156, + "logps/rejected": -539.3624877929688, + "logps/weighted_chosen": -3.0859618186950684, + "logps/weighted_rejected": -4.498315334320068, + "loss": 0.6211, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -175.7160186767578, + "rewards/margins": 94.20703125, + "rewards/rejected": -269.9898376464844, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.9282928705215454, + "rewards/weighted_margins": 0.6790130734443665, + "rewards/weighted_rejected": -1.6075317859649658, + "step": 1390 + }, + { + "epoch": 0.7327924627060979, + "grad_norm": 15.634138107299805, + "learning_rate": 2.0337882381321347e-07, + "logits/chosen": -0.7886413335800171, + "logits/rejected": -0.8139312863349915, + "logps/chosen": -465.96405029296875, + "logps/rejected": -487.4781188964844, + "logps/weighted_chosen": -2.910961866378784, + "logps/weighted_rejected": -3.713574171066284, + "loss": 0.5975, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -174.375, + "rewards/margins": 59.26093673706055, + "rewards/rejected": -233.60702514648438, + "rewards/weighted_accuracies": 0.703125, + "rewards/weighted_chosen": -0.8153778314590454, + "rewards/weighted_margins": 0.7291595339775085, + "rewards/weighted_rejected": -1.5444214344024658, + "step": 1400 + }, + { + "epoch": 0.73802669458257, + "grad_norm": 30.378263473510742, + "learning_rate": 1.960725804219905e-07, + "logits/chosen": -0.7158355712890625, + "logits/rejected": -0.78448486328125, + "logps/chosen": -437.4203186035156, + "logps/rejected": -482.3531188964844, + "logps/weighted_chosen": -3.304150342941284, + "logps/weighted_rejected": -3.201416015625, + "loss": 0.6427, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -137.9460906982422, + "rewards/margins": 61.10546875, + "rewards/rejected": -199.02578735351562, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.5938171148300171, + "rewards/weighted_margins": 0.5732055902481079, + "rewards/weighted_rejected": -1.1667938232421875, + "step": 1410 + }, + { + "epoch": 0.7432609264590422, + "grad_norm": 21.7137508392334, + "learning_rate": 1.8886784632000824e-07, + "logits/chosen": -0.7250915765762329, + "logits/rejected": -0.769946277141571, + "logps/chosen": -403.0, + "logps/rejected": -472.5687561035156, + "logps/weighted_chosen": -2.5306396484375, + "logps/weighted_rejected": -3.677197217941284, + "loss": 0.5801, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -117.0738296508789, + "rewards/margins": 87.76094055175781, + "rewards/rejected": -204.9113311767578, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -0.4598632752895355, + "rewards/weighted_margins": 0.6666930913925171, + "rewards/weighted_rejected": -1.126226782798767, + "step": 1420 + }, + { + "epoch": 0.7484951583355143, + "grad_norm": 24.44352912902832, + "learning_rate": 1.8176702782993025e-07, + "logits/chosen": -0.7201507687568665, + "logits/rejected": -0.7507484555244446, + "logps/chosen": -397.28594970703125, + "logps/rejected": -443.4453125, + "logps/weighted_chosen": -2.65478515625, + "logps/weighted_rejected": -3.630932569503784, + "loss": 0.6459, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -135.69766235351562, + "rewards/margins": 53.7060546875, + "rewards/rejected": -189.41796875, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -0.5992950201034546, + "rewards/weighted_margins": 0.5739685297012329, + "rewards/weighted_rejected": -1.173132300376892, + "step": 1430 + }, + { + "epoch": 0.7537293902119864, + "grad_norm": 32.458709716796875, + "learning_rate": 1.7477249656745034e-07, + "logits/chosen": -0.6446533203125, + "logits/rejected": -0.7007080316543579, + "logps/chosen": -367.4984436035156, + "logps/rejected": -391.62030029296875, + "logps/weighted_chosen": -2.759448289871216, + "logps/weighted_rejected": -3.7931885719299316, + "loss": 0.5964, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -116.71601867675781, + "rewards/margins": 48.75664138793945, + "rewards/rejected": -165.47030639648438, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -0.42836302518844604, + "rewards/weighted_margins": 0.7737976312637329, + "rewards/weighted_rejected": -1.2024414539337158, + "step": 1440 + }, + { + "epoch": 0.7589636220884585, + "grad_norm": 56.54096984863281, + "learning_rate": 1.6788658864919118e-07, + "logits/chosen": -0.6913238763809204, + "logits/rejected": -0.795092761516571, + "logps/chosen": -472.4078063964844, + "logps/rejected": -529.9281005859375, + "logps/weighted_chosen": -2.7608399391174316, + "logps/weighted_rejected": -3.37042236328125, + "loss": 0.566, + "rewards/accuracies": 0.65625, + "rewards/chosen": -157.638671875, + "rewards/margins": 88.724609375, + "rewards/rejected": -246.39999389648438, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.5100753903388977, + "rewards/weighted_margins": 0.792651355266571, + "rewards/weighted_rejected": -1.3026001453399658, + "step": 1450 + }, + { + "epoch": 0.7641978539649307, + "grad_norm": 29.560546875, + "learning_rate": 1.611116039124613e-07, + "logits/chosen": -0.7274124026298523, + "logits/rejected": -0.7964813113212585, + "logps/chosen": -441.23126220703125, + "logps/rejected": -479.5765686035156, + "logps/weighted_chosen": -3.2884521484375, + "logps/weighted_rejected": -4.231982231140137, + "loss": 0.592, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -178.3828125, + "rewards/margins": 71.8671875, + "rewards/rejected": -250.22811889648438, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.639935314655304, + "rewards/weighted_margins": 0.787335216999054, + "rewards/weighted_rejected": -1.42718505859375, + "step": 1460 + }, + { + "epoch": 0.7694320858414028, + "grad_norm": 36.940250396728516, + "learning_rate": 1.5444980514712723e-07, + "logits/chosen": -0.8617309331893921, + "logits/rejected": -0.8865936398506165, + "logps/chosen": -520.8031005859375, + "logps/rejected": -599.0, + "logps/weighted_chosen": -3.0329833030700684, + "logps/weighted_rejected": -3.6827635765075684, + "loss": 0.6473, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -209.4656219482422, + "rewards/margins": 83.8765640258789, + "rewards/rejected": -293.328125, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -0.8635589480400085, + "rewards/weighted_margins": 0.585784912109375, + "rewards/weighted_rejected": -1.4496307373046875, + "step": 1470 + }, + { + "epoch": 0.7746663177178749, + "grad_norm": 16.788267135620117, + "learning_rate": 1.4790341733986083e-07, + "logits/chosen": -0.8069809079170227, + "logits/rejected": -0.8585540652275085, + "logps/chosen": -481.3187561035156, + "logps/rejected": -522.4156494140625, + "logps/weighted_chosen": -3.3416991233825684, + "logps/weighted_rejected": -3.6712403297424316, + "loss": 0.59, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -194.30624389648438, + "rewards/margins": 65.70390319824219, + "rewards/rejected": -259.9476623535156, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -0.759503185749054, + "rewards/weighted_margins": 0.66339111328125, + "rewards/weighted_rejected": -1.422460913658142, + "step": 1480 + }, + { + "epoch": 0.7799005495943471, + "grad_norm": 38.181453704833984, + "learning_rate": 1.4147462693101108e-07, + "logits/chosen": -0.7319869995117188, + "logits/rejected": -0.7861053347587585, + "logps/chosen": -428.0562438964844, + "logps/rejected": -476.8140563964844, + "logps/weighted_chosen": -2.714123487472534, + "logps/weighted_rejected": -3.5479736328125, + "loss": 0.6091, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -140.8445281982422, + "rewards/margins": 64.64765930175781, + "rewards/rejected": -205.42227172851562, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -0.49198609590530396, + "rewards/weighted_margins": 0.685406506061554, + "rewards/weighted_rejected": -1.178009033203125, + "step": 1490 + }, + { + "epoch": 0.7851347814708192, + "grad_norm": 32.59770202636719, + "learning_rate": 1.3516558108435177e-07, + "logits/chosen": -0.7364288568496704, + "logits/rejected": -0.7587372064590454, + "logps/chosen": -394.5492248535156, + "logps/rejected": -458.09063720703125, + "logps/weighted_chosen": -2.572314500808716, + "logps/weighted_rejected": -4.043042182922363, + "loss": 0.5775, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -115.5425796508789, + "rewards/margins": 77.4566421508789, + "rewards/rejected": -192.8859405517578, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -0.4650115966796875, + "rewards/weighted_margins": 0.6923462152481079, + "rewards/weighted_rejected": -1.1571471691131592, + "step": 1500 + }, + { + "epoch": 0.7851347814708192, + "eval_logits/chosen": -0.8365868926048279, + "eval_logits/rejected": -0.8772695064544678, + "eval_logps/chosen": -431.9909973144531, + "eval_logps/rejected": -480.4159851074219, + "eval_logps/weighted_chosen": -2.8402867317199707, + "eval_logps/weighted_rejected": -3.7289178371429443, + "eval_loss": 0.5886616110801697, + "eval_rewards/accuracies": 0.6234999895095825, + "eval_rewards/chosen": -143.2762451171875, + "eval_rewards/margins": 69.428466796875, + "eval_rewards/rejected": -212.71400451660156, + "eval_rewards/weighted_accuracies": 0.6955000162124634, + "eval_rewards/weighted_chosen": -0.5692093968391418, + "eval_rewards/weighted_margins": 0.6658874750137329, + "eval_rewards/weighted_rejected": -1.2350969314575195, + "eval_runtime": 1042.5015, + "eval_samples_per_second": 1.918, + "eval_steps_per_second": 0.48, + "step": 1500 + } + ], + "logging_steps": 10, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000..55fbd6a --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce12db195466115f95dcb5cdef20e4bd0d806824726d6c0b3824d1ebf482ce4d +size 8721 diff --git a/checkpoint-1500/zero_to_fp32.py b/checkpoint-1500/zero_to_fp32.py new file mode 100644 index 0000000..0e75914 --- /dev/null +++ b/checkpoint-1500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-500/chat_template.jinja b/checkpoint-500/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-500/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-500/generation_config.json b/checkpoint-500/generation_config.json new file mode 100644 index 0000000..fc3c54a --- /dev/null +++ b/checkpoint-500/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128001, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-500/latest b/checkpoint-500/latest new file mode 100644 index 0000000..f0b47ce --- /dev/null +++ b/checkpoint-500/latest @@ -0,0 +1 @@ +global_step500 \ No newline at end of file diff --git a/checkpoint-500/model-00001-of-00004.safetensors b/checkpoint-500/model-00001-of-00004.safetensors new file mode 100644 index 0000000..0f330a2 --- /dev/null +++ b/checkpoint-500/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda84f8bb03e780f06bc2cb59b0546abd0d6284203fee54e9379609ef96b9777 +size 4976698672 diff --git a/checkpoint-500/model-00002-of-00004.safetensors b/checkpoint-500/model-00002-of-00004.safetensors new file mode 100644 index 0000000..03919cd --- /dev/null +++ b/checkpoint-500/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e948f87e59d28afc17222ad68e6c5031d58bfe1ea1a6ab0df82c637e9f916923 +size 4999802720 diff --git a/checkpoint-500/model-00003-of-00004.safetensors b/checkpoint-500/model-00003-of-00004.safetensors new file mode 100644 index 0000000..e844ee7 --- /dev/null +++ b/checkpoint-500/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f743fd4ab69e522db7c087ae9ae48bc561a27ee186924fcccac99821f69c52 +size 4915916176 diff --git a/checkpoint-500/model-00004-of-00004.safetensors b/checkpoint-500/model-00004-of-00004.safetensors new file mode 100644 index 0000000..581c057 --- /dev/null +++ b/checkpoint-500/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5736fe5da0fb55fe90c936c1efc6d17319e4d6141391c49c2c95b32f89b4431 +size 1168138808 diff --git a/checkpoint-500/model.safetensors.index.json b/checkpoint-500/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-500/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth new file mode 100644 index 0000000..6ae1c3a --- /dev/null +++ b/checkpoint-500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fe1a45c0111b18df213058c73c3a4e717295b975e92faf7b2e048e6504b3f3 +size 14917 diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth new file mode 100644 index 0000000..58bf4e2 --- /dev/null +++ b/checkpoint-500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf26da988f2f17ca0d366aece1dfdb5c3bcab91066168b7062b361b8c3ac2d6 +size 14917 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000..a2af8e9 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f8728b8c285bed1ca96ea99a5e658a8a9c58f9dd1ce1805f1213195612503b +size 1465 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000..38e975b --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,1127 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.26171159382360637, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005234231876472127, + "grad_norm": 126.29230499267578, + "learning_rate": 0.0, + "logits/chosen": -0.40118408203125, + "logits/rejected": -0.41802978515625, + "logps/chosen": -297.609375, + "logps/rejected": -247.84375, + "logps/weighted_chosen": -4.5152587890625, + "logps/weighted_rejected": -3.032470703125, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005234231876472127, + "grad_norm": 296.4369812011719, + "learning_rate": 4.6875e-08, + "logits/chosen": -0.3177456259727478, + "logits/rejected": -0.3534359335899353, + "logps/chosen": -275.5711669921875, + "logps/rejected": -255.90451049804688, + "logps/weighted_chosen": -2.350965738296509, + "logps/weighted_rejected": -2.549940347671509, + "loss": 0.6917, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0401475690305233, + "rewards/margins": 0.04296875, + "rewards/rejected": -0.0831163227558136, + "rewards/weighted_accuracies": 0.3229166567325592, + "rewards/weighted_chosen": -0.00032212998485192657, + "rewards/weighted_margins": 0.00019327799964230508, + "rewards/weighted_rejected": -0.0005154079990461469, + "step": 10 + }, + { + "epoch": 0.010468463752944255, + "grad_norm": 21.67967414855957, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -0.29769057035446167, + "logits/rejected": -0.313650518655777, + "logps/chosen": -294.3374938964844, + "logps/rejected": -272.6703186035156, + "logps/weighted_chosen": -2.13031005859375, + "logps/weighted_rejected": -2.103222608566284, + "loss": 0.6908, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.06103515625, + "rewards/margins": -0.01318359375, + "rewards/rejected": -0.0478515625, + "rewards/weighted_accuracies": 0.4437499940395355, + "rewards/weighted_chosen": 0.0014366150135174394, + "rewards/weighted_margins": 0.0021545409690588713, + "rewards/weighted_rejected": -0.0007179260137490928, + "step": 20 + }, + { + "epoch": 0.015702695629416383, + "grad_norm": 76.9887466430664, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -0.2917121946811676, + "logits/rejected": -0.337240606546402, + "logps/chosen": -298.02655029296875, + "logps/rejected": -268.12188720703125, + "logps/weighted_chosen": -2.0724120140075684, + "logps/weighted_rejected": -2.4466919898986816, + "loss": 0.6912, + "rewards/accuracies": 0.28125, + "rewards/chosen": -0.0062500000931322575, + "rewards/margins": -0.02509765699505806, + "rewards/rejected": 0.01884765550494194, + "rewards/weighted_accuracies": 0.4281249940395355, + "rewards/weighted_chosen": 0.0027938843704760075, + "rewards/weighted_margins": 0.0019706725142896175, + "rewards/weighted_rejected": 0.0008232116815634072, + "step": 30 + }, + { + "epoch": 0.02093692750588851, + "grad_norm": 32.98203659057617, + "learning_rate": 2.03125e-07, + "logits/chosen": -0.3011154234409332, + "logits/rejected": -0.3432762026786804, + "logps/chosen": -278.63751220703125, + "logps/rejected": -253.88125610351562, + "logps/weighted_chosen": -2.2070555686950684, + "logps/weighted_rejected": -2.605224609375, + "loss": 0.692, + "rewards/accuracies": 0.26249998807907104, + "rewards/chosen": 0.0034667968284338713, + "rewards/margins": -0.05991210788488388, + "rewards/rejected": 0.06337890774011612, + "rewards/weighted_accuracies": 0.35624998807907104, + "rewards/weighted_chosen": 0.0014549255138263106, + "rewards/weighted_margins": -0.00034332275390625, + "rewards/weighted_rejected": 0.0017982482677325606, + "step": 40 + }, + { + "epoch": 0.02617115938236064, + "grad_norm": 20.751684188842773, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -0.2822524905204773, + "logits/rejected": -0.32080918550491333, + "logps/chosen": -280.31329345703125, + "logps/rejected": -267.58709716796875, + "logps/weighted_chosen": -2.136962890625, + "logps/weighted_rejected": -2.1753907203674316, + "loss": 0.6883, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.07236327975988388, + "rewards/margins": -0.09189452975988388, + "rewards/rejected": 0.01953125, + "rewards/weighted_accuracies": 0.4375, + "rewards/weighted_chosen": 0.0054107666946947575, + "rewards/weighted_margins": 0.0078063965775072575, + "rewards/weighted_rejected": -0.0023956298828125, + "step": 50 + }, + { + "epoch": 0.031405391258832765, + "grad_norm": 40.70024108886719, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -0.3149581849575043, + "logits/rejected": -0.3086872100830078, + "logps/chosen": -277.6031188964844, + "logps/rejected": -261.8031311035156, + "logps/weighted_chosen": -2.5905518531799316, + "logps/weighted_rejected": -2.4834961891174316, + "loss": 0.6874, + "rewards/accuracies": 0.3812499940395355, + "rewards/chosen": 0.03662109375, + "rewards/margins": 0.12646484375, + "rewards/rejected": -0.08984375, + "rewards/weighted_accuracies": 0.5, + "rewards/weighted_chosen": 0.0004280090215615928, + "rewards/weighted_margins": 0.01105651818215847, + "rewards/weighted_rejected": -0.01062927208840847, + "step": 60 + }, + { + "epoch": 0.036639623135304895, + "grad_norm": 67.51947021484375, + "learning_rate": 3.59375e-07, + "logits/chosen": -0.318746954202652, + "logits/rejected": -0.32574766874313354, + "logps/chosen": -289.90313720703125, + "logps/rejected": -245.04452514648438, + "logps/weighted_chosen": -2.098431348800659, + "logps/weighted_rejected": -2.392407178878784, + "loss": 0.6841, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": 0.16708984971046448, + "rewards/margins": 0.4442382752895355, + "rewards/rejected": -0.27714842557907104, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": 0.015575408935546875, + "rewards/weighted_margins": 0.02174072340130806, + "rewards/weighted_rejected": -0.00616531353443861, + "step": 70 + }, + { + "epoch": 0.04187385501177702, + "grad_norm": 68.87100982666016, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -0.286581426858902, + "logits/rejected": -0.3082527220249176, + "logps/chosen": -289.5101623535156, + "logps/rejected": -270.4375, + "logps/weighted_chosen": -2.2385497093200684, + "logps/weighted_rejected": -2.4218382835388184, + "loss": 0.6727, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.31572264432907104, + "rewards/margins": 0.5547851324081421, + "rewards/rejected": -0.23906250298023224, + "rewards/weighted_accuracies": 0.596875011920929, + "rewards/weighted_chosen": 0.03613891452550888, + "rewards/weighted_margins": 0.05283202975988388, + "rewards/weighted_rejected": -0.01669769361615181, + "step": 80 + }, + { + "epoch": 0.04710808688824915, + "grad_norm": 40.29203414916992, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -0.3158706724643707, + "logits/rejected": -0.30914992094039917, + "logps/chosen": -280.5726623535156, + "logps/rejected": -258.17657470703125, + "logps/weighted_chosen": -2.45281982421875, + "logps/weighted_rejected": -2.5444703102111816, + "loss": 0.6683, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": 0.15966796875, + "rewards/margins": 0.599609375, + "rewards/rejected": -0.43994140625, + "rewards/weighted_accuracies": 0.581250011920929, + "rewards/weighted_chosen": 0.05808715894818306, + "rewards/weighted_margins": 0.07471618801355362, + "rewards/weighted_rejected": -0.0166168212890625, + "step": 90 + }, + { + "epoch": 0.05234231876472128, + "grad_norm": 46.855377197265625, + "learning_rate": 5.156249999999999e-07, + "logits/chosen": -0.2856552004814148, + "logits/rejected": -0.3585342466831207, + "logps/chosen": -291.05548095703125, + "logps/rejected": -287.078125, + "logps/weighted_chosen": -1.9577789306640625, + "logps/weighted_rejected": -2.532482862472534, + "loss": 0.6785, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.13925781846046448, + "rewards/margins": 0.9869140386581421, + "rewards/rejected": -1.1261718273162842, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": 0.03715210035443306, + "rewards/weighted_margins": 0.0635833740234375, + "rewards/weighted_rejected": -0.02643737755715847, + "step": 100 + }, + { + "epoch": 0.05757655064119341, + "grad_norm": 55.04579162597656, + "learning_rate": 5.677083333333333e-07, + "logits/chosen": -0.33493995666503906, + "logits/rejected": -0.3254844546318054, + "logps/chosen": -297.2953186035156, + "logps/rejected": -262.6773376464844, + "logps/weighted_chosen": -2.606689453125, + "logps/weighted_rejected": -2.648364305496216, + "loss": 0.6821, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -0.9228515625, + "rewards/margins": 0.8955078125, + "rewards/rejected": -1.818359375, + "rewards/weighted_accuracies": 0.518750011920929, + "rewards/weighted_chosen": -0.005747986026108265, + "rewards/weighted_margins": 0.05161895602941513, + "rewards/weighted_rejected": -0.05732421949505806, + "step": 110 + }, + { + "epoch": 0.06281078251766553, + "grad_norm": 22.23135757446289, + "learning_rate": 6.197916666666666e-07, + "logits/chosen": -0.3393222689628601, + "logits/rejected": -0.36481350660324097, + "logps/chosen": -295.6703186035156, + "logps/rejected": -256.3296813964844, + "logps/weighted_chosen": -1.8351562023162842, + "logps/weighted_rejected": -2.124218702316284, + "loss": 0.6752, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.071679711341858, + "rewards/margins": 1.46142578125, + "rewards/rejected": -2.5331053733825684, + "rewards/weighted_accuracies": 0.546875, + "rewards/weighted_chosen": 0.0018810272449627519, + "rewards/weighted_margins": 0.06835174560546875, + "rewards/weighted_rejected": -0.0664466843008995, + "step": 120 + }, + { + "epoch": 0.06804501439413765, + "grad_norm": 57.93917465209961, + "learning_rate": 6.718749999999999e-07, + "logits/chosen": -0.30284881591796875, + "logits/rejected": -0.2989334166049957, + "logps/chosen": -306.5074157714844, + "logps/rejected": -279.8265686035156, + "logps/weighted_chosen": -1.910064697265625, + "logps/weighted_rejected": -2.2278685569763184, + "loss": 0.6738, + "rewards/accuracies": 0.578125, + "rewards/chosen": -0.45097655057907104, + "rewards/margins": 1.7268555164337158, + "rewards/rejected": -2.177734375, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": 0.02166290208697319, + "rewards/weighted_margins": 0.07758025825023651, + "rewards/weighted_rejected": -0.05589141696691513, + "step": 130 + }, + { + "epoch": 0.07327924627060979, + "grad_norm": 66.64070892333984, + "learning_rate": 7.239583333333333e-07, + "logits/chosen": -0.34190064668655396, + "logits/rejected": -0.3586837649345398, + "logps/chosen": -300.01483154296875, + "logps/rejected": -276.1703186035156, + "logps/weighted_chosen": -2.202807664871216, + "logps/weighted_rejected": -2.474353075027466, + "loss": 0.6635, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -1.3230469226837158, + "rewards/margins": 2.2220702171325684, + "rewards/rejected": -3.545117139816284, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": 0.0006683349492959678, + "rewards/weighted_margins": 0.10604552924633026, + "rewards/weighted_rejected": -0.1053924560546875, + "step": 140 + }, + { + "epoch": 0.07851347814708191, + "grad_norm": 18.789766311645508, + "learning_rate": 7.760416666666666e-07, + "logits/chosen": -0.2976974546909332, + "logits/rejected": -0.3081321716308594, + "logps/chosen": -286.27813720703125, + "logps/rejected": -255.4640655517578, + "logps/weighted_chosen": -2.7657103538513184, + "logps/weighted_rejected": -2.831347703933716, + "loss": 0.6605, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -2.104687452316284, + "rewards/margins": 2.5054688453674316, + "rewards/rejected": -4.610156059265137, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.0018630981212481856, + "rewards/weighted_margins": 0.158416748046875, + "rewards/weighted_rejected": -0.1603546142578125, + "step": 150 + }, + { + "epoch": 0.08374771002355404, + "grad_norm": 51.51210021972656, + "learning_rate": 8.28125e-07, + "logits/chosen": -0.3341739773750305, + "logits/rejected": -0.3859619200229645, + "logps/chosen": -306.4765625, + "logps/rejected": -279.1148376464844, + "logps/weighted_chosen": -2.3189454078674316, + "logps/weighted_rejected": -2.36669921875, + "loss": 0.636, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -2.575390577316284, + "rewards/margins": 3.349609375, + "rewards/rejected": -5.925000190734863, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": 0.02147369459271431, + "rewards/weighted_margins": 0.22438660264015198, + "rewards/weighted_rejected": -0.203105166554451, + "step": 160 + }, + { + "epoch": 0.08898194190002617, + "grad_norm": 398.3809509277344, + "learning_rate": 8.802083333333333e-07, + "logits/chosen": -0.36855775117874146, + "logits/rejected": -0.37070387601852417, + "logps/chosen": -307.1656188964844, + "logps/rejected": -265.78436279296875, + "logps/weighted_chosen": -2.459460496902466, + "logps/weighted_rejected": -2.757373094558716, + "loss": 0.6811, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -4.097460746765137, + "rewards/margins": 3.488476514816284, + "rewards/rejected": -7.585839748382568, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": 0.018505096435546875, + "rewards/weighted_margins": 0.19701537489891052, + "rewards/weighted_rejected": -0.17839965224266052, + "step": 170 + }, + { + "epoch": 0.0942161737764983, + "grad_norm": 55.77580261230469, + "learning_rate": 9.322916666666666e-07, + "logits/chosen": -0.3392753601074219, + "logits/rejected": -0.35816192626953125, + "logps/chosen": -278.99530029296875, + "logps/rejected": -265.18359375, + "logps/weighted_chosen": -2.362103223800659, + "logps/weighted_rejected": -2.754711866378784, + "loss": 0.6944, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -5.346972465515137, + "rewards/margins": 3.5015625953674316, + "rewards/rejected": -8.848730087280273, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.05782318115234375, + "rewards/weighted_margins": 0.16480103135108948, + "rewards/weighted_rejected": -0.22255554795265198, + "step": 180 + }, + { + "epoch": 0.09945040565297043, + "grad_norm": 38.015960693359375, + "learning_rate": 9.84375e-07, + "logits/chosen": -0.3686843812465668, + "logits/rejected": -0.4041244387626648, + "logps/chosen": -314.3070373535156, + "logps/rejected": -276.7484436035156, + "logps/weighted_chosen": -2.123486280441284, + "logps/weighted_rejected": -2.6261963844299316, + "loss": 0.6392, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -8.581738471984863, + "rewards/margins": 4.317968845367432, + "rewards/rejected": -12.900781631469727, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.129638671875, + "rewards/weighted_margins": 0.2160186767578125, + "rewards/weighted_rejected": -0.345590204000473, + "step": 190 + }, + { + "epoch": 0.10468463752944256, + "grad_norm": 46.52367401123047, + "learning_rate": 9.99959085414323e-07, + "logits/chosen": -0.4128967225551605, + "logits/rejected": -0.4471847414970398, + "logps/chosen": -320.0546875, + "logps/rejected": -273.11248779296875, + "logps/weighted_chosen": -2.5019164085388184, + "logps/weighted_rejected": -2.9936890602111816, + "loss": 0.6473, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -10.737597465515137, + "rewards/margins": 4.738671779632568, + "rewards/rejected": -15.476171493530273, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.11443634331226349, + "rewards/weighted_margins": 0.2610321044921875, + "rewards/weighted_rejected": -0.37534791231155396, + "step": 200 + }, + { + "epoch": 0.10991886940591468, + "grad_norm": 21.238189697265625, + "learning_rate": 9.997587035630105e-07, + "logits/chosen": -0.4288749694824219, + "logits/rejected": -0.4688262939453125, + "logps/chosen": -300.0765686035156, + "logps/rejected": -304.63751220703125, + "logps/weighted_chosen": -2.32427978515625, + "logps/weighted_rejected": -3.0592284202575684, + "loss": 0.6424, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -13.117578506469727, + "rewards/margins": 7.013671875, + "rewards/rejected": -20.133594512939453, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -0.21423491835594177, + "rewards/weighted_margins": 0.27025145292282104, + "rewards/weighted_rejected": -0.4845077395439148, + "step": 210 + }, + { + "epoch": 0.11515310128238682, + "grad_norm": 24.92041015625, + "learning_rate": 9.99391406364405e-07, + "logits/chosen": -0.42696380615234375, + "logits/rejected": -0.429006963968277, + "logps/chosen": -305.4906311035156, + "logps/rejected": -288.6312561035156, + "logps/weighted_chosen": -2.625018358230591, + "logps/weighted_rejected": -3.102160692214966, + "loss": 0.6601, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -13.349413871765137, + "rewards/margins": 6.373632907867432, + "rewards/rejected": -19.72265625, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.20062866806983948, + "rewards/weighted_margins": 0.316873162984848, + "rewards/weighted_rejected": -0.5174545049667358, + "step": 220 + }, + { + "epoch": 0.12038733315885894, + "grad_norm": 147.95851135253906, + "learning_rate": 9.988573164927884e-07, + "logits/chosen": -0.3811447024345398, + "logits/rejected": -0.4161086976528168, + "logps/chosen": -281.33203125, + "logps/rejected": -274.234375, + "logps/weighted_chosen": -2.32806396484375, + "logps/weighted_rejected": -2.6552734375, + "loss": 0.7195, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -12.46875, + "rewards/margins": 10.046093940734863, + "rewards/rejected": -22.515430450439453, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.24639587104320526, + "rewards/weighted_margins": 0.23908081650733948, + "rewards/weighted_rejected": -0.4853073060512543, + "step": 230 + }, + { + "epoch": 0.12562156503533106, + "grad_norm": 26.882122039794922, + "learning_rate": 9.98156612329838e-07, + "logits/chosen": -0.4748245179653168, + "logits/rejected": -0.5250595211982727, + "logps/chosen": -278.16717529296875, + "logps/rejected": -306.29376220703125, + "logps/weighted_chosen": -2.348803758621216, + "logps/weighted_rejected": -2.9455933570861816, + "loss": 0.6674, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -13.405566215515137, + "rewards/margins": 10.753710746765137, + "rewards/rejected": -24.158985137939453, + "rewards/weighted_accuracies": 0.625, + "rewards/weighted_chosen": -0.14908751845359802, + "rewards/weighted_margins": 0.33162689208984375, + "rewards/weighted_rejected": -0.48021697998046875, + "step": 240 + }, + { + "epoch": 0.13085579691180318, + "grad_norm": 86.49760437011719, + "learning_rate": 9.97289527905053e-07, + "logits/chosen": -0.478302001953125, + "logits/rejected": -0.48861923813819885, + "logps/chosen": -277.0523376464844, + "logps/rejected": -275.80938720703125, + "logps/weighted_chosen": -2.61376953125, + "logps/weighted_rejected": -2.787853956222534, + "loss": 0.7022, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -12.673730850219727, + "rewards/margins": 7.1806640625, + "rewards/rejected": -19.852344512939453, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.12388916313648224, + "rewards/weighted_margins": 0.19627074897289276, + "rewards/weighted_rejected": -0.3203796446323395, + "step": 250 + }, + { + "epoch": 0.1360900287882753, + "grad_norm": 19.698871612548828, + "learning_rate": 9.962563528175875e-07, + "logits/chosen": -0.4065658450126648, + "logits/rejected": -0.4432968199253082, + "logps/chosen": -310.62890625, + "logps/rejected": -281.46405029296875, + "logps/weighted_chosen": -2.184094190597534, + "logps/weighted_rejected": -3.0492796897888184, + "loss": 0.6507, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -11.896581649780273, + "rewards/margins": 8.622265815734863, + "rewards/rejected": -20.520313262939453, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": -0.1260833740234375, + "rewards/weighted_margins": 0.25025635957717896, + "rewards/weighted_rejected": -0.3761749267578125, + "step": 260 + }, + { + "epoch": 0.14132426066474746, + "grad_norm": 16.363121032714844, + "learning_rate": 9.950574321395277e-07, + "logits/chosen": -0.42208632826805115, + "logits/rejected": -0.4458427429199219, + "logps/chosen": -305.9046936035156, + "logps/rejected": -286.06561279296875, + "logps/weighted_chosen": -2.40838623046875, + "logps/weighted_rejected": -2.7938475608825684, + "loss": 0.6573, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -16.622364044189453, + "rewards/margins": 6.233202934265137, + "rewards/rejected": -22.855077743530273, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.189697265625, + "rewards/weighted_margins": 0.27490538358688354, + "rewards/weighted_rejected": -0.4645233154296875, + "step": 270 + }, + { + "epoch": 0.14655849254121958, + "grad_norm": 54.42692947387695, + "learning_rate": 9.936931663006413e-07, + "logits/chosen": -0.45263671875, + "logits/rejected": -0.44363707304000854, + "logps/chosen": -316.171875, + "logps/rejected": -303.3656311035156, + "logps/weighted_chosen": -2.4659423828125, + "logps/weighted_rejected": -3.0541749000549316, + "loss": 0.6068, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -13.402734756469727, + "rewards/margins": 10.619824409484863, + "rewards/rejected": -24.025390625, + "rewards/weighted_accuracies": 0.6875, + "rewards/weighted_chosen": -0.05214080959558487, + "rewards/weighted_margins": 0.40336912870407104, + "rewards/weighted_rejected": -0.455657958984375, + "step": 280 + }, + { + "epoch": 0.1517927244176917, + "grad_norm": 39.20017623901367, + "learning_rate": 9.921640109546357e-07, + "logits/chosen": -0.42310255765914917, + "logits/rejected": -0.48920440673828125, + "logps/chosen": -283.7171936035156, + "logps/rejected": -278.1859436035156, + "logps/weighted_chosen": -2.396167039871216, + "logps/weighted_rejected": -3.5881590843200684, + "loss": 0.6649, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -16.9111328125, + "rewards/margins": 9.704492568969727, + "rewards/rejected": -26.62109375, + "rewards/weighted_accuracies": 0.6031249761581421, + "rewards/weighted_chosen": -0.130279541015625, + "rewards/weighted_margins": 0.3882461488246918, + "rewards/weighted_rejected": -0.5187179446220398, + "step": 290 + }, + { + "epoch": 0.15702695629416383, + "grad_norm": 28.03601837158203, + "learning_rate": 9.90470476826975e-07, + "logits/chosen": -0.485189825296402, + "logits/rejected": -0.48862916231155396, + "logps/chosen": -289.09765625, + "logps/rejected": -297.625, + "logps/weighted_chosen": -2.2784485816955566, + "logps/weighted_rejected": -2.771862745285034, + "loss": 0.6608, + "rewards/accuracies": 0.640625, + "rewards/chosen": -19.676952362060547, + "rewards/margins": 10.679491996765137, + "rewards/rejected": -30.360937118530273, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.1669921875, + "rewards/weighted_margins": 0.28967589139938354, + "rewards/weighted_rejected": -0.4566032290458679, + "step": 300 + }, + { + "epoch": 0.16226118817063595, + "grad_norm": 538.50927734375, + "learning_rate": 9.886131295443002e-07, + "logits/chosen": -0.654278576374054, + "logits/rejected": -0.7076683044433594, + "logps/chosen": -341.85467529296875, + "logps/rejected": -309.89764404296875, + "logps/weighted_chosen": -2.629150390625, + "logps/weighted_rejected": -2.8698973655700684, + "loss": 0.6788, + "rewards/accuracies": 0.515625, + "rewards/chosen": -60.568748474121094, + "rewards/margins": -0.7822265625, + "rewards/rejected": -59.785743713378906, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.24791869521141052, + "rewards/weighted_margins": 0.28089600801467896, + "rewards/weighted_rejected": -0.528765857219696, + "step": 310 + }, + { + "epoch": 0.16749542004710807, + "grad_norm": 34.656883239746094, + "learning_rate": 9.865925894455166e-07, + "logits/chosen": -0.7003936767578125, + "logits/rejected": -0.719250500202179, + "logps/chosen": -326.3960876464844, + "logps/rejected": -290.3453063964844, + "logps/weighted_chosen": -2.553356885910034, + "logps/weighted_rejected": -3.10992431640625, + "loss": 0.7054, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -26.532812118530273, + "rewards/margins": 6.3564453125, + "rewards/rejected": -32.88788986206055, + "rewards/weighted_accuracies": 0.609375, + "rewards/weighted_chosen": -0.18020018935203552, + "rewards/weighted_margins": 0.3489990234375, + "rewards/weighted_rejected": -0.5293639898300171, + "step": 320 + }, + { + "epoch": 0.17272965192358022, + "grad_norm": 61.1888542175293, + "learning_rate": 9.84409531374603e-07, + "logits/chosen": -0.6631911993026733, + "logits/rejected": -0.6448425054550171, + "logps/chosen": -324.87579345703125, + "logps/rejected": -291.71875, + "logps/weighted_chosen": -2.5611815452575684, + "logps/weighted_rejected": -3.060229539871216, + "loss": 0.6449, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -20.070018768310547, + "rewards/margins": 8.8251953125, + "rewards/rejected": -28.8876953125, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.140888974070549, + "rewards/weighted_margins": 0.3719635009765625, + "rewards/weighted_rejected": -0.5127013921737671, + "step": 330 + }, + { + "epoch": 0.17796388380005235, + "grad_norm": 27.2315673828125, + "learning_rate": 9.820646844552219e-07, + "logits/chosen": -0.6496349573135376, + "logits/rejected": -0.7006805539131165, + "logps/chosen": -295.5882873535156, + "logps/rejected": -297.4906311035156, + "logps/weighted_chosen": -2.6988892555236816, + "logps/weighted_rejected": -2.898681640625, + "loss": 0.6788, + "rewards/accuracies": 0.6875, + "rewards/chosen": -19.176855087280273, + "rewards/margins": 12.649316787719727, + "rewards/rejected": -31.822460174560547, + "rewards/weighted_accuracies": 0.659375011920929, + "rewards/weighted_chosen": -0.23918533325195312, + "rewards/weighted_margins": 0.2961669862270355, + "rewards/weighted_rejected": -0.5351837277412415, + "step": 340 + }, + { + "epoch": 0.18319811567652447, + "grad_norm": 24.073888778686523, + "learning_rate": 9.795588318471964e-07, + "logits/chosen": -0.7137314081192017, + "logits/rejected": -0.7225399017333984, + "logps/chosen": -277.8890686035156, + "logps/rejected": -304.59063720703125, + "logps/weighted_chosen": -2.4300780296325684, + "logps/weighted_rejected": -2.771411180496216, + "loss": 0.6675, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -18.445703506469727, + "rewards/margins": 9.8720703125, + "rewards/rejected": -28.31640625, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -0.20159301161766052, + "rewards/weighted_margins": 0.2779785096645355, + "rewards/weighted_rejected": -0.47947996854782104, + "step": 350 + }, + { + "epoch": 0.1884323475529966, + "grad_norm": 12.750471115112305, + "learning_rate": 9.768928104849415e-07, + "logits/chosen": -0.7212737798690796, + "logits/rejected": -0.7225433588027954, + "logps/chosen": -299.53594970703125, + "logps/rejected": -275.5718688964844, + "logps/weighted_chosen": -2.667529344558716, + "logps/weighted_rejected": -2.800830125808716, + "loss": 0.6916, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -16.816015243530273, + "rewards/margins": 9.876562118530273, + "rewards/rejected": -26.690235137939453, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.15018615126609802, + "rewards/weighted_margins": 0.301962286233902, + "rewards/weighted_rejected": -0.45206451416015625, + "step": 360 + }, + { + "epoch": 0.19366657942946872, + "grad_norm": 29.441747665405273, + "learning_rate": 9.740675107979355e-07, + "logits/chosen": -0.6865798830986023, + "logits/rejected": -0.7117553949356079, + "logps/chosen": -331.06561279296875, + "logps/rejected": -300.31719970703125, + "logps/weighted_chosen": -1.9907715320587158, + "logps/weighted_rejected": -2.932177782058716, + "loss": 0.6819, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -16.022266387939453, + "rewards/margins": 9.630078315734863, + "rewards/rejected": -25.654491424560547, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -0.18877258896827698, + "rewards/weighted_margins": 0.2808380126953125, + "rewards/weighted_rejected": -0.4699081480503082, + "step": 370 + }, + { + "epoch": 0.19890081130594087, + "grad_norm": 29.483524322509766, + "learning_rate": 9.71083876413323e-07, + "logits/chosen": -0.6637862920761108, + "logits/rejected": -0.669873058795929, + "logps/chosen": -322.3882751464844, + "logps/rejected": -300.85858154296875, + "logps/weighted_chosen": -2.189379930496216, + "logps/weighted_rejected": -2.9217162132263184, + "loss": 0.6846, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -20.004688262939453, + "rewards/margins": 11.246289253234863, + "rewards/rejected": -31.24609375, + "rewards/weighted_accuracies": 0.6000000238418579, + "rewards/weighted_chosen": -0.22467346489429474, + "rewards/weighted_margins": 0.2720580995082855, + "rewards/weighted_rejected": -0.4967102110385895, + "step": 380 + }, + { + "epoch": 0.204135043182413, + "grad_norm": 20.563907623291016, + "learning_rate": 9.67942903840751e-07, + "logits/chosen": -0.7051689028739929, + "logits/rejected": -0.7537201046943665, + "logps/chosen": -324.1015625, + "logps/rejected": -310.375, + "logps/weighted_chosen": -2.397631883621216, + "logps/weighted_rejected": -2.950610399246216, + "loss": 0.6478, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -19.788671493530273, + "rewards/margins": 16.317577362060547, + "rewards/rejected": -36.111328125, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.22498169541358948, + "rewards/weighted_margins": 0.3520751893520355, + "rewards/weighted_rejected": -0.5770629644393921, + "step": 390 + }, + { + "epoch": 0.2093692750588851, + "grad_norm": 23.1771183013916, + "learning_rate": 9.646456421395447e-07, + "logits/chosen": -0.7504974603652954, + "logits/rejected": -0.7628723382949829, + "logps/chosen": -341.2171936035156, + "logps/rejected": -343.9375, + "logps/weighted_chosen": -2.2680420875549316, + "logps/weighted_rejected": -3.0065674781799316, + "loss": 0.6746, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -22.944530487060547, + "rewards/margins": 17.315624237060547, + "rewards/rejected": -40.2587890625, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -0.20337525010108948, + "rewards/weighted_margins": 0.216084286570549, + "rewards/weighted_rejected": -0.4196624755859375, + "step": 400 + }, + { + "epoch": 0.21460350693535724, + "grad_norm": 14.420520782470703, + "learning_rate": 9.611931925683266e-07, + "logits/chosen": -0.7154334783554077, + "logits/rejected": -0.7491073608398438, + "logps/chosen": -331.38983154296875, + "logps/rejected": -303.3890686035156, + "logps/weighted_chosen": -2.190844774246216, + "logps/weighted_rejected": -2.7472167015075684, + "loss": 0.6135, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -25.111621856689453, + "rewards/margins": 14.543554306030273, + "rewards/rejected": -39.658592224121094, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -0.21762695908546448, + "rewards/weighted_margins": 0.36674195528030396, + "rewards/weighted_rejected": -0.584503173828125, + "step": 410 + }, + { + "epoch": 0.21983773881182936, + "grad_norm": 27.429603576660156, + "learning_rate": 9.575867082172085e-07, + "logits/chosen": -0.7379547357559204, + "logits/rejected": -0.7826202511787415, + "logps/chosen": -337.46405029296875, + "logps/rejected": -320.23907470703125, + "logps/weighted_chosen": -2.6387085914611816, + "logps/weighted_rejected": -2.712329149246216, + "loss": 0.6716, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -32.554298400878906, + "rewards/margins": 17.513866424560547, + "rewards/rejected": -50.060157775878906, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -0.35613709688186646, + "rewards/weighted_margins": 0.38392335176467896, + "rewards/weighted_rejected": -0.7401062250137329, + "step": 420 + }, + { + "epoch": 0.22507197068830148, + "grad_norm": 18.97144889831543, + "learning_rate": 9.538273936226673e-07, + "logits/chosen": -0.778491199016571, + "logits/rejected": -0.811004638671875, + "logps/chosen": -292.83984375, + "logps/rejected": -304.05352783203125, + "logps/weighted_chosen": -2.796630859375, + "logps/weighted_rejected": -3.2444825172424316, + "loss": 0.6544, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -27.327733993530273, + "rewards/margins": 12.26318359375, + "rewards/rejected": -39.58984375, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.21069030463695526, + "rewards/weighted_margins": 0.3678832948207855, + "rewards/weighted_rejected": -0.57806396484375, + "step": 430 + }, + { + "epoch": 0.23030620256477363, + "grad_norm": 25.43462371826172, + "learning_rate": 9.499165043652391e-07, + "logits/chosen": -0.7674010992050171, + "logits/rejected": -0.7686828374862671, + "logps/chosen": -319.55859375, + "logps/rejected": -309.03436279296875, + "logps/weighted_chosen": -2.82305908203125, + "logps/weighted_rejected": -2.993237257003784, + "loss": 0.631, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -28.386133193969727, + "rewards/margins": 13.9384765625, + "rewards/rejected": -42.326072692871094, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -0.26903897523880005, + "rewards/weighted_margins": 0.353515625, + "rewards/weighted_rejected": -0.622546374797821, + "step": 440 + }, + { + "epoch": 0.23554043444124576, + "grad_norm": 65.78443908691406, + "learning_rate": 9.458553466501665e-07, + "logits/chosen": -0.8066772222518921, + "logits/rejected": -0.8363037109375, + "logps/chosen": -314.7945251464844, + "logps/rejected": -287.65313720703125, + "logps/weighted_chosen": -2.8233399391174316, + "logps/weighted_rejected": -3.013622999191284, + "loss": 0.6831, + "rewards/accuracies": 0.640625, + "rewards/chosen": -28.642578125, + "rewards/margins": 15.162694931030273, + "rewards/rejected": -43.80976486206055, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -0.3802246153354645, + "rewards/weighted_margins": 0.3342132568359375, + "rewards/weighted_rejected": -0.7139984369277954, + "step": 450 + }, + { + "epoch": 0.24077466631771788, + "grad_norm": 13.290085792541504, + "learning_rate": 9.416452768711366e-07, + "logits/chosen": -0.7957550287246704, + "logits/rejected": -0.8287414312362671, + "logps/chosen": -323.0093688964844, + "logps/rejected": -306.98907470703125, + "logps/weighted_chosen": -2.544872999191284, + "logps/weighted_rejected": -3.100903272628784, + "loss": 0.6759, + "rewards/accuracies": 0.640625, + "rewards/chosen": -30.906835556030273, + "rewards/margins": 16.355859756469727, + "rewards/rejected": -47.24980545043945, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.343759149312973, + "rewards/weighted_margins": 0.40337830781936646, + "rewards/weighted_rejected": -0.747100830078125, + "step": 460 + }, + { + "epoch": 0.24600889819419, + "grad_norm": 21.766939163208008, + "learning_rate": 9.372877011572557e-07, + "logits/chosen": -0.7200164794921875, + "logits/rejected": -0.742279052734375, + "logps/chosen": -342.75079345703125, + "logps/rejected": -318.60626220703125, + "logps/weighted_chosen": -2.5311522483825684, + "logps/weighted_rejected": -2.990124464035034, + "loss": 0.63, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -32.077247619628906, + "rewards/margins": 11.883398056030273, + "rewards/rejected": -43.959373474121094, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -0.30525511503219604, + "rewards/weighted_margins": 0.45678406953811646, + "rewards/weighted_rejected": -0.7624969482421875, + "step": 470 + }, + { + "epoch": 0.2512431300706621, + "grad_norm": 28.76239776611328, + "learning_rate": 9.327840749034141e-07, + "logits/chosen": -0.7930053472518921, + "logits/rejected": -0.8311401605606079, + "logps/chosen": -316.79998779296875, + "logps/rejected": -326.5062561035156, + "logps/weighted_chosen": -2.4120116233825684, + "logps/weighted_rejected": -3.591870069503784, + "loss": 0.6639, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -29.640039443969727, + "rewards/margins": 20.350976943969727, + "rewards/rejected": -49.9853515625, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -0.27521055936813354, + "rewards/weighted_margins": 0.4522705078125, + "rewards/weighted_rejected": -0.7274719476699829, + "step": 480 + }, + { + "epoch": 0.2564773619471343, + "grad_norm": 36.629127502441406, + "learning_rate": 9.281359022841965e-07, + "logits/chosen": -0.72747802734375, + "logits/rejected": -0.7426910400390625, + "logps/chosen": -308.5406188964844, + "logps/rejected": -300.71484375, + "logps/weighted_chosen": -2.6044554710388184, + "logps/weighted_rejected": -3.862866163253784, + "loss": 0.6178, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -33.54804611206055, + "rewards/margins": 22.3515625, + "rewards/rejected": -55.88496017456055, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -0.38198548555374146, + "rewards/weighted_margins": 0.525561511516571, + "rewards/weighted_rejected": -0.9073349237442017, + "step": 490 + }, + { + "epoch": 0.26171159382360637, + "grad_norm": 20.296154022216797, + "learning_rate": 9.233447357514989e-07, + "logits/chosen": -0.7092193365097046, + "logits/rejected": -0.751629650592804, + "logps/chosen": -337.10467529296875, + "logps/rejected": -328.71875, + "logps/weighted_chosen": -3.054370164871216, + "logps/weighted_rejected": -3.5334715843200684, + "loss": 0.6534, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -39.15234375, + "rewards/margins": 19.770116806030273, + "rewards/rejected": -58.90898513793945, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.470510870218277, + "rewards/weighted_margins": 0.566607654094696, + "rewards/weighted_rejected": -1.0376465320587158, + "step": 500 + }, + { + "epoch": 0.26171159382360637, + "eval_logits/chosen": -0.8052441477775574, + "eval_logits/rejected": -0.8225547075271606, + "eval_logps/chosen": -333.44000244140625, + "eval_logps/rejected": -331.98199462890625, + "eval_logps/weighted_chosen": -2.756896734237671, + "eval_logps/weighted_rejected": -3.441680908203125, + "eval_loss": 0.6561886668205261, + "eval_rewards/accuracies": 0.6370000243186951, + "eval_rewards/chosen": -44.67877960205078, + "eval_rewards/margins": 19.602703094482422, + "eval_rewards/rejected": -64.27362823486328, + "eval_rewards/weighted_accuracies": 0.6445000171661377, + "eval_rewards/weighted_chosen": -0.485819548368454, + "eval_rewards/weighted_margins": 0.4620407819747925, + "eval_rewards/weighted_rejected": -0.9478604793548584, + "eval_runtime": 1263.3333, + "eval_samples_per_second": 1.583, + "eval_steps_per_second": 0.396, + "step": 500 + } + ], + "logging_steps": 10, + "max_steps": 1911, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000..6696854 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edb204807d1db856d22da0875320cd19b9233ec97000900538cab6c38a661e1 +size 8721 diff --git a/checkpoint-500/zero_to_fp32.py b/checkpoint-500/zero_to_fp32.py new file mode 100644 index 0000000..0e75914 --- /dev/null +++ b/checkpoint-500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/config.json b/config.json new file mode 100644 index 0000000..3f8f5c0 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..03aa64f --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968dcc0ee8e56c7dccd34a7f51f8065ea0cb9e2cc529e3243d1e5c0a4bdaa0c +size 17208754 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..877a9a9 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 32768, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..55fbd6a --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce12db195466115f95dcb5cdef20e4bd0d806824726d6c0b3824d1ebf482ce4d +size 8721