commit c6a30a8d26d55e81cf890873c6351114b4d6674c Author: ModelHub XC Date: Fri May 22 13:24:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: HCY123902/llama-3-8b-inst-dpo-on-p-tw31-beta-2.5e-0-ift Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..c9fdcdb --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: transformers +model_name: llama-3-8b-inst-dpo-on-p-tw31-beta-2.5e-0-ift +tags: +- generated_from_trainer +- trl +- dpo +licence: license +--- + +# Model Card for llama-3-8b-inst-dpo-on-p-tw31-beta-2.5e-0-ift + +This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="HCY123902/llama-3-8b-inst-dpo-on-p-tw31-beta-2.5e-0-ift", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/2320032466hchy/attention_dpo/runs/ak1x2kht) + + +This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290). + +### Framework versions + +- TRL: 0.20.0 +- Transformers: 4.54.1 +- Pytorch: 2.7.1+cu128 +- Datasets: 3.6.0 +- Tokenizers: 0.21.1 + +## Citations + +Cite DPO as: + +```bibtex +@inproceedings{rafailov2023direct, + title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, + author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn}, + year = 2023, + booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, + url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html}, + editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, +} +``` + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1000/chat_template.jinja b/checkpoint-1000/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-1000/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000..ceed8ae --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json new file mode 100644 index 0000000..38e1b41 --- /dev/null +++ b/checkpoint-1000/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128009 + ], + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-1000/model-00001-of-00004.safetensors b/checkpoint-1000/model-00001-of-00004.safetensors new file mode 100644 index 0000000..9555d3d --- /dev/null +++ b/checkpoint-1000/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22018cb2cb2316189a378f2b508376f80678c5533603153e61c5aa940c559b6 +size 4976698672 diff --git a/checkpoint-1000/model-00002-of-00004.safetensors b/checkpoint-1000/model-00002-of-00004.safetensors new file mode 100644 index 0000000..a3adc70 --- /dev/null +++ b/checkpoint-1000/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d045b297b68a0f6e6eb4431f77f6b026ac948d5c60501dcd12ce7fa73927a2e0 +size 4999802720 diff --git a/checkpoint-1000/model-00003-of-00004.safetensors b/checkpoint-1000/model-00003-of-00004.safetensors new file mode 100644 index 0000000..a6c8120 --- /dev/null +++ b/checkpoint-1000/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e88ad5193ce461d51768091ebe6ebc2513e25c5b395cd4bbd58ad222fbff47f +size 4915916176 diff --git a/checkpoint-1000/model-00004-of-00004.safetensors b/checkpoint-1000/model-00004-of-00004.safetensors new file mode 100644 index 0000000..6a8d384 --- /dev/null +++ b/checkpoint-1000/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:686e55f735c246fbeab39a925934fee3072b0bbfd2f5c8d92aadb5672c80694e +size 1168138808 diff --git a/checkpoint-1000/model.safetensors.index.json b/checkpoint-1000/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-1000/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000..b43be96 --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-1000/tokenizer.json b/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000..34d134f --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000..e78cb55 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,2199 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5344378381989445, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005344378381989445, + "grad_norm": 31.37949562072754, + "learning_rate": 0.0, + "logits/chosen": -0.2252655029296875, + "logits/rejected": -0.192626953125, + "logps/chosen": -110.828125, + "logps/rejected": -115.515625, + "logps/weighted_chosen": -0.31903076171875, + "logps/weighted_rejected": -0.333709716796875, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005344378381989445, + "grad_norm": 75.76042938232422, + "learning_rate": 4.787234042553191e-08, + "logits/chosen": -0.3284708559513092, + "logits/rejected": -0.3214448392391205, + "logps/chosen": -134.46006774902344, + "logps/rejected": -138.06597900390625, + "logps/weighted_chosen": -0.3890923261642456, + "logps/weighted_rejected": -0.4067721962928772, + "loss": 0.6953, + "rewards/accuracies": 0.2951388955116272, + "rewards/chosen": -0.2072482705116272, + "rewards/margins": -0.1790364533662796, + "rewards/rejected": -0.0282118059694767, + "rewards/weighted_accuracies": 0.3472222089767456, + "rewards/weighted_chosen": -0.0032717387657612562, + "rewards/weighted_margins": -0.0047516291961073875, + "rewards/weighted_rejected": 0.0014813741436228156, + "step": 10 + }, + { + "epoch": 0.01068875676397889, + "grad_norm": 24.94420623779297, + "learning_rate": 1.0106382978723404e-07, + "logits/chosen": -0.2780090272426605, + "logits/rejected": -0.2689048647880554, + "logps/chosen": -115.3070297241211, + "logps/rejected": -114.8101577758789, + "logps/weighted_chosen": -0.354583740234375, + "logps/weighted_rejected": -0.36929017305374146, + "loss": 0.6925, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": 0.0087890625, + "rewards/margins": -0.02734375, + "rewards/rejected": 0.0361328125, + "rewards/weighted_accuracies": 0.4312500059604645, + "rewards/weighted_chosen": 0.0016719817649573088, + "rewards/weighted_margins": 0.0011638641590252519, + "rewards/weighted_rejected": 0.0005052566411904991, + "step": 20 + }, + { + "epoch": 0.016033135145968335, + "grad_norm": 26.90618133544922, + "learning_rate": 1.5425531914893615e-07, + "logits/chosen": -0.26707762479782104, + "logits/rejected": -0.2697288393974304, + "logps/chosen": -122.49687194824219, + "logps/rejected": -128.2218780517578, + "logps/weighted_chosen": -0.364663690328598, + "logps/weighted_rejected": -0.40430909395217896, + "loss": 0.6919, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": 0.099609375, + "rewards/margins": 0.2177734375, + "rewards/rejected": -0.1181640625, + "rewards/weighted_accuracies": 0.4468750059604645, + "rewards/weighted_chosen": 0.0011037830263376236, + "rewards/weighted_margins": 0.0029600143898278475, + "rewards/weighted_rejected": -0.0018524170154705644, + "step": 30 + }, + { + "epoch": 0.02137751352795778, + "grad_norm": 19.056455612182617, + "learning_rate": 2.074468085106383e-07, + "logits/chosen": -0.31552428007125854, + "logits/rejected": -0.309671014547348, + "logps/chosen": -126.5132827758789, + "logps/rejected": -127.7515640258789, + "logps/weighted_chosen": -0.3717803955078125, + "logps/weighted_rejected": -0.36720579862594604, + "loss": 0.6927, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.041015625, + "rewards/margins": 0.150390625, + "rewards/rejected": -0.109375, + "rewards/weighted_accuracies": 0.44062501192092896, + "rewards/weighted_chosen": 0.0009314537164755166, + "rewards/weighted_margins": 0.0009433746454305947, + "rewards/weighted_rejected": -1.831054760259576e-05, + "step": 40 + }, + { + "epoch": 0.026721891909947223, + "grad_norm": 94.1146469116211, + "learning_rate": 2.6063829787234044e-07, + "logits/chosen": -0.2799697816371918, + "logits/rejected": -0.2664199769496918, + "logps/chosen": -120.34375, + "logps/rejected": -120.19062805175781, + "logps/weighted_chosen": -0.367788702249527, + "logps/weighted_rejected": -0.37299805879592896, + "loss": 0.6934, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.01806640625, + "rewards/margins": -0.11865234375, + "rewards/rejected": 0.1005859375, + "rewards/weighted_accuracies": 0.421875, + "rewards/weighted_chosen": 0.0011091709602624178, + "rewards/weighted_margins": -0.0005058288807049394, + "rewards/weighted_rejected": 0.00161571498028934, + "step": 50 + }, + { + "epoch": 0.03206627029193667, + "grad_norm": 47.161922454833984, + "learning_rate": 3.1382978723404253e-07, + "logits/chosen": -0.22172394394874573, + "logits/rejected": -0.2157600373029709, + "logps/chosen": -115.8382797241211, + "logps/rejected": -118.75859069824219, + "logps/weighted_chosen": -0.36602783203125, + "logps/weighted_rejected": -0.369253545999527, + "loss": 0.6908, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.10546875, + "rewards/margins": -0.1259765625, + "rewards/rejected": 0.0205078125, + "rewards/weighted_accuracies": 0.4000000059604645, + "rewards/weighted_chosen": 0.008862781338393688, + "rewards/weighted_margins": 0.00811080913990736, + "rewards/weighted_rejected": 0.0007405281066894531, + "step": 60 + }, + { + "epoch": 0.037410648673926114, + "grad_norm": 42.38877868652344, + "learning_rate": 3.6702127659574467e-07, + "logits/chosen": -0.299722284078598, + "logits/rejected": -0.29665374755859375, + "logps/chosen": -114.1656265258789, + "logps/rejected": -118.0765609741211, + "logps/weighted_chosen": -0.35313719511032104, + "logps/weighted_rejected": -0.3739013671875, + "loss": 0.6949, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0693359375, + "rewards/margins": -0.0458984375, + "rewards/rejected": -0.0234375, + "rewards/weighted_accuracies": 0.390625, + "rewards/weighted_chosen": 0.0011390686267986894, + "rewards/weighted_margins": -0.0028884888160973787, + "rewards/weighted_rejected": 0.0040260315872728825, + "step": 70 + }, + { + "epoch": 0.04275502705591556, + "grad_norm": 35.451927185058594, + "learning_rate": 4.202127659574468e-07, + "logits/chosen": -0.30262452363967896, + "logits/rejected": -0.24024733901023865, + "logps/chosen": -112.3812484741211, + "logps/rejected": -111.8375015258789, + "logps/weighted_chosen": -0.37481385469436646, + "logps/weighted_rejected": -0.38435667753219604, + "loss": 0.694, + "rewards/accuracies": 0.390625, + "rewards/chosen": -0.1552734375, + "rewards/margins": -0.0341796875, + "rewards/rejected": -0.12109375, + "rewards/weighted_accuracies": 0.48750001192092896, + "rewards/weighted_chosen": 0.01006317138671875, + "rewards/weighted_margins": 0.0036018372047692537, + "rewards/weighted_rejected": 0.006462156772613525, + "step": 80 + }, + { + "epoch": 0.048099405437905, + "grad_norm": 42.264678955078125, + "learning_rate": 4.734042553191489e-07, + "logits/chosen": -0.2891853451728821, + "logits/rejected": -0.23835448920726776, + "logps/chosen": -118.0296859741211, + "logps/rejected": -116.484375, + "logps/weighted_chosen": -0.376077264547348, + "logps/weighted_rejected": -0.3848114013671875, + "loss": 0.6935, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": 0.0458984375, + "rewards/margins": 0.267578125, + "rewards/rejected": -0.2216796875, + "rewards/weighted_accuracies": 0.46562498807907104, + "rewards/weighted_chosen": 0.016840171068906784, + "rewards/weighted_margins": 0.014923477545380592, + "rewards/weighted_rejected": 0.00187511439435184, + "step": 90 + }, + { + "epoch": 0.053443783819894446, + "grad_norm": 36.87267303466797, + "learning_rate": 5.26595744680851e-07, + "logits/chosen": -0.3333755433559418, + "logits/rejected": -0.28821104764938354, + "logps/chosen": -124.0367202758789, + "logps/rejected": -124.33906555175781, + "logps/weighted_chosen": -0.39268797636032104, + "logps/weighted_rejected": -0.4093261659145355, + "loss": 0.6875, + "rewards/accuracies": 0.44062501192092896, + "rewards/chosen": -0.0302734375, + "rewards/margins": 0.36835938692092896, + "rewards/rejected": -0.39863282442092896, + "rewards/weighted_accuracies": 0.484375, + "rewards/weighted_chosen": 0.012582575902342796, + "rewards/weighted_margins": 0.026942063122987747, + "rewards/weighted_rejected": -0.014329910278320312, + "step": 100 + }, + { + "epoch": 0.058788162201883896, + "grad_norm": 17.8848876953125, + "learning_rate": 5.797872340425531e-07, + "logits/chosen": -0.3335327208042145, + "logits/rejected": -0.32384032011032104, + "logps/chosen": -117.6968765258789, + "logps/rejected": -119.85859680175781, + "logps/weighted_chosen": -0.35866087675094604, + "logps/weighted_rejected": -0.37585145235061646, + "loss": 0.7015, + "rewards/accuracies": 0.3968749940395355, + "rewards/chosen": -0.524609386920929, + "rewards/margins": -0.01328125037252903, + "rewards/rejected": -0.511523425579071, + "rewards/weighted_accuracies": 0.4281249940395355, + "rewards/weighted_chosen": -0.0057319640181958675, + "rewards/weighted_margins": 0.0012493133544921875, + "rewards/weighted_rejected": -0.0070056915283203125, + "step": 110 + }, + { + "epoch": 0.06413254058387334, + "grad_norm": 37.190059661865234, + "learning_rate": 6.329787234042553e-07, + "logits/chosen": -0.29607391357421875, + "logits/rejected": -0.2735137939453125, + "logps/chosen": -120.81718444824219, + "logps/rejected": -127.04219055175781, + "logps/weighted_chosen": -0.41831666231155396, + "logps/weighted_rejected": -0.42036741971969604, + "loss": 0.7443, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.43964844942092896, + "rewards/margins": 0.690234363079071, + "rewards/rejected": -1.1298828125, + "rewards/weighted_accuracies": 0.45625001192092896, + "rewards/weighted_chosen": -0.07387389987707138, + "rewards/weighted_margins": -0.04258232191205025, + "rewards/weighted_rejected": -0.031409453600645065, + "step": 120 + }, + { + "epoch": 0.06947691896586278, + "grad_norm": 22.76742172241211, + "learning_rate": 6.861702127659574e-07, + "logits/chosen": -0.30709609389305115, + "logits/rejected": -0.29381561279296875, + "logps/chosen": -120.3140640258789, + "logps/rejected": -121.73515319824219, + "logps/weighted_chosen": -0.38916015625, + "logps/weighted_rejected": -0.38067322969436646, + "loss": 0.6906, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.8837890625, + "rewards/margins": 0.4273437559604645, + "rewards/rejected": -1.310937523841858, + "rewards/weighted_accuracies": 0.46875, + "rewards/weighted_chosen": -0.0019147873390465975, + "rewards/weighted_margins": 0.025261688977479935, + "rewards/weighted_rejected": -0.027143806219100952, + "step": 130 + }, + { + "epoch": 0.07482129734785223, + "grad_norm": 25.72498321533203, + "learning_rate": 7.393617021276596e-07, + "logits/chosen": -0.2590804994106293, + "logits/rejected": -0.25146180391311646, + "logps/chosen": -114.2992172241211, + "logps/rejected": -118.26094055175781, + "logps/weighted_chosen": -0.376434326171875, + "logps/weighted_rejected": -0.39961546659469604, + "loss": 0.6968, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": -1.7428710460662842, + "rewards/margins": 1.043554663658142, + "rewards/rejected": -2.7867188453674316, + "rewards/weighted_accuracies": 0.46562498807907104, + "rewards/weighted_chosen": -0.01796722412109375, + "rewards/weighted_margins": 0.044054411351680756, + "rewards/weighted_rejected": -0.062059782445430756, + "step": 140 + }, + { + "epoch": 0.08016567572984168, + "grad_norm": 27.819217681884766, + "learning_rate": 7.925531914893616e-07, + "logits/chosen": -0.260824590921402, + "logits/rejected": -0.2456924468278885, + "logps/chosen": -118.3109359741211, + "logps/rejected": -116.1898422241211, + "logps/weighted_chosen": -0.3584175109863281, + "logps/weighted_rejected": -0.389230340719223, + "loss": 0.693, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -2.796679735183716, + "rewards/margins": 1.470312476158142, + "rewards/rejected": -4.267773628234863, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": 0.02875671349465847, + "rewards/weighted_margins": 0.08138389885425568, + "rewards/weighted_rejected": -0.05276889726519585, + "step": 150 + }, + { + "epoch": 0.08551005411183112, + "grad_norm": 20.792280197143555, + "learning_rate": 8.457446808510637e-07, + "logits/chosen": -0.27181702852249146, + "logits/rejected": -0.26198044419288635, + "logps/chosen": -119.7906265258789, + "logps/rejected": -120.09687805175781, + "logps/weighted_chosen": -0.3836608827114105, + "logps/weighted_rejected": -0.40611571073532104, + "loss": 0.6795, + "rewards/accuracies": 0.503125011920929, + "rewards/chosen": -3.6748046875, + "rewards/margins": 1.8689453601837158, + "rewards/rejected": -5.542382717132568, + "rewards/weighted_accuracies": 0.543749988079071, + "rewards/weighted_chosen": -0.013747024349868298, + "rewards/weighted_margins": 0.10787200927734375, + "rewards/weighted_rejected": -0.12159118801355362, + "step": 160 + }, + { + "epoch": 0.09085443249382057, + "grad_norm": 28.162086486816406, + "learning_rate": 8.989361702127659e-07, + "logits/chosen": -0.30354803800582886, + "logits/rejected": -0.28291016817092896, + "logps/chosen": -118.81172180175781, + "logps/rejected": -123.3851547241211, + "logps/weighted_chosen": -0.364785760641098, + "logps/weighted_rejected": -0.40638428926467896, + "loss": 0.7104, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -4.450781345367432, + "rewards/margins": 1.540624976158142, + "rewards/rejected": -5.989843845367432, + "rewards/weighted_accuracies": 0.49687498807907104, + "rewards/weighted_chosen": -0.05272483825683594, + "rewards/weighted_margins": 0.03959999233484268, + "rewards/weighted_rejected": -0.0922950729727745, + "step": 170 + }, + { + "epoch": 0.09619881087581, + "grad_norm": 62.7450065612793, + "learning_rate": 9.52127659574468e-07, + "logits/chosen": -0.3086685240268707, + "logits/rejected": -0.29756468534469604, + "logps/chosen": -120.9000015258789, + "logps/rejected": -121.6031265258789, + "logps/weighted_chosen": -0.4021057188510895, + "logps/weighted_rejected": -0.43016356229782104, + "loss": 0.6902, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -5.364843845367432, + "rewards/margins": 0.95654296875, + "rewards/rejected": -6.323437690734863, + "rewards/weighted_accuracies": 0.515625, + "rewards/weighted_chosen": -0.07762374728918076, + "rewards/weighted_margins": 0.042090605944395065, + "rewards/weighted_rejected": -0.11983337253332138, + "step": 180 + }, + { + "epoch": 0.10154318925779945, + "grad_norm": 107.93684387207031, + "learning_rate": 9.99999129927477e-07, + "logits/chosen": -0.2998809814453125, + "logits/rejected": -0.2729034423828125, + "logps/chosen": -121.68046569824219, + "logps/rejected": -122.90547180175781, + "logps/weighted_chosen": -0.4000488221645355, + "logps/weighted_rejected": -0.49153441190719604, + "loss": 0.6846, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": -7.084570407867432, + "rewards/margins": 1.763671875, + "rewards/rejected": -8.850390434265137, + "rewards/weighted_accuracies": 0.5625, + "rewards/weighted_chosen": -0.06708984076976776, + "rewards/weighted_margins": 0.19685058295726776, + "rewards/weighted_rejected": -0.2636512815952301, + "step": 190 + }, + { + "epoch": 0.10688756763978889, + "grad_norm": 42.62810516357422, + "learning_rate": 9.99894724888679e-07, + "logits/chosen": -0.2994216978549957, + "logits/rejected": -0.2775813937187195, + "logps/chosen": -129.10311889648438, + "logps/rejected": -127.71015930175781, + "logps/weighted_chosen": -0.40317994356155396, + "logps/weighted_rejected": -0.4718689024448395, + "loss": 0.7052, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -8.422460556030273, + "rewards/margins": 1.215234398841858, + "rewards/rejected": -9.638280868530273, + "rewards/weighted_accuracies": 0.518750011920929, + "rewards/weighted_chosen": -0.10227356106042862, + "rewards/weighted_margins": 0.08837012946605682, + "rewards/weighted_rejected": -0.19074249267578125, + "step": 200 + }, + { + "epoch": 0.11223194602177834, + "grad_norm": 56.36786651611328, + "learning_rate": 9.996163469793475e-07, + "logits/chosen": -0.3200393617153168, + "logits/rejected": -0.28080445528030396, + "logps/chosen": -127.65625, + "logps/rejected": -122.5406265258789, + "logps/weighted_chosen": -0.402487188577652, + "logps/weighted_rejected": -0.4784179627895355, + "loss": 0.6855, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -9.696874618530273, + "rewards/margins": 3.138671875, + "rewards/rejected": -12.8359375, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.07228164374828339, + "rewards/weighted_margins": 0.1660926789045334, + "rewards/weighted_rejected": -0.23847046494483948, + "step": 210 + }, + { + "epoch": 0.11757632440376779, + "grad_norm": 24.45851707458496, + "learning_rate": 9.991640930802883e-07, + "logits/chosen": -0.30699461698532104, + "logits/rejected": -0.3066558837890625, + "logps/chosen": -125.8734359741211, + "logps/rejected": -129.6999969482422, + "logps/weighted_chosen": -0.41710203886032104, + "logps/weighted_rejected": -0.4827117919921875, + "loss": 0.6884, + "rewards/accuracies": 0.546875, + "rewards/chosen": -12.277539253234863, + "rewards/margins": 2.571484327316284, + "rewards/rejected": -14.851171493530273, + "rewards/weighted_accuracies": 0.5218750238418579, + "rewards/weighted_chosen": -0.12371826171875, + "rewards/weighted_margins": 0.1390731781721115, + "rewards/weighted_rejected": -0.26273268461227417, + "step": 220 + }, + { + "epoch": 0.12292070278575723, + "grad_norm": 27.215944290161133, + "learning_rate": 9.98538120584459e-07, + "logits/chosen": -0.3107505738735199, + "logits/rejected": -0.283193975687027, + "logps/chosen": -134.8015594482422, + "logps/rejected": -138.5890655517578, + "logps/weighted_chosen": -0.4462524354457855, + "logps/weighted_rejected": -0.511853039264679, + "loss": 0.6939, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -15.346875190734863, + "rewards/margins": 2.830273389816284, + "rewards/rejected": -18.179492950439453, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": -0.1530204713344574, + "rewards/weighted_margins": 0.1789344847202301, + "rewards/weighted_rejected": -0.3320491909980774, + "step": 230 + }, + { + "epoch": 0.12826508116774668, + "grad_norm": 17.173702239990234, + "learning_rate": 9.977386473421917e-07, + "logits/chosen": -0.27986279129981995, + "logits/rejected": -0.2772073745727539, + "logps/chosen": -121.0453109741211, + "logps/rejected": -124.65312194824219, + "logps/weighted_chosen": -0.4528869688510895, + "logps/weighted_rejected": -0.501416027545929, + "loss": 0.7222, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -18.59375, + "rewards/margins": 2.5132813453674316, + "rewards/rejected": -21.106054306030273, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.2343955934047699, + "rewards/weighted_margins": 0.07334060966968536, + "rewards/weighted_rejected": -0.30777662992477417, + "step": 240 + }, + { + "epoch": 0.13360945954973613, + "grad_norm": 24.67556381225586, + "learning_rate": 9.96765951585378e-07, + "logits/chosen": -0.262664794921875, + "logits/rejected": -0.24544373154640198, + "logps/chosen": -121.7421875, + "logps/rejected": -126.42500305175781, + "logps/weighted_chosen": -0.46705931425094604, + "logps/weighted_rejected": -0.547576904296875, + "loss": 0.6683, + "rewards/accuracies": 0.5625, + "rewards/chosen": -21.9228515625, + "rewards/margins": 2.5238280296325684, + "rewards/rejected": -24.447071075439453, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.2455238401889801, + "rewards/weighted_margins": 0.1756332367658615, + "rewards/weighted_rejected": -0.4210983216762543, + "step": 250 + }, + { + "epoch": 0.13895383793172555, + "grad_norm": 33.55352783203125, + "learning_rate": 9.956203718306388e-07, + "logits/chosen": -0.18781813979148865, + "logits/rejected": -0.156982421875, + "logps/chosen": -127.8578109741211, + "logps/rejected": -132.35546875, + "logps/weighted_chosen": -0.4968322813510895, + "logps/weighted_rejected": -0.5148254632949829, + "loss": 0.7213, + "rewards/accuracies": 0.5531250238418579, + "rewards/chosen": -26.109960556030273, + "rewards/margins": 2.660937547683716, + "rewards/rejected": -28.761327743530273, + "rewards/weighted_accuracies": 0.534375011920929, + "rewards/weighted_chosen": -0.29717254638671875, + "rewards/weighted_margins": 0.04170074313879013, + "rewards/weighted_rejected": -0.3386779725551605, + "step": 260 + }, + { + "epoch": 0.144298216313715, + "grad_norm": 242.56521606445312, + "learning_rate": 9.943023067615136e-07, + "logits/chosen": -0.17297974228858948, + "logits/rejected": -0.1584724485874176, + "logps/chosen": -139.23046875, + "logps/rejected": -142.3390655517578, + "logps/weighted_chosen": -0.48270875215530396, + "logps/weighted_rejected": -0.560772716999054, + "loss": 0.7059, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -33.55195236206055, + "rewards/margins": 3.075976610183716, + "rewards/rejected": -36.6328125, + "rewards/weighted_accuracies": 0.4937500059604645, + "rewards/weighted_chosen": -0.321615606546402, + "rewards/weighted_margins": 0.13364562392234802, + "rewards/weighted_rejected": -0.45517730712890625, + "step": 270 + }, + { + "epoch": 0.14964259469570446, + "grad_norm": 16.8142147064209, + "learning_rate": 9.928122150897112e-07, + "logits/chosen": -0.21183013916015625, + "logits/rejected": -0.168986514210701, + "logps/chosen": -130.66250610351562, + "logps/rejected": -132.0812530517578, + "logps/weighted_chosen": -0.48672789335250854, + "logps/weighted_rejected": -0.578961193561554, + "loss": 0.6614, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -34.4287109375, + "rewards/margins": 3.7144532203674316, + "rewards/rejected": -38.127342224121094, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.28246229887008667, + "rewards/weighted_margins": 0.23281364142894745, + "rewards/weighted_rejected": -0.5149310827255249, + "step": 280 + }, + { + "epoch": 0.1549869730776939, + "grad_norm": 17.110448837280273, + "learning_rate": 9.9115061539547e-07, + "logits/chosen": -0.20588979125022888, + "logits/rejected": -0.18258285522460938, + "logps/chosen": -141.0203094482422, + "logps/rejected": -142.50625610351562, + "logps/weighted_chosen": -0.4897003173828125, + "logps/weighted_rejected": -0.5554351806640625, + "loss": 0.7051, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -38.395896911621094, + "rewards/margins": 3.002734422683716, + "rewards/rejected": -41.394920349121094, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": -0.3098343014717102, + "rewards/weighted_margins": 0.1397857666015625, + "rewards/weighted_rejected": -0.4498863220214844, + "step": 290 + }, + { + "epoch": 0.16033135145968336, + "grad_norm": 21.960878372192383, + "learning_rate": 9.893180859470818e-07, + "logits/chosen": -0.1905662566423416, + "logits/rejected": -0.16956177353858948, + "logps/chosen": -131.609375, + "logps/rejected": -133.328125, + "logps/weighted_chosen": -0.5007995367050171, + "logps/weighted_rejected": -0.5999816656112671, + "loss": 0.6676, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -38.5810546875, + "rewards/margins": 5.082226753234863, + "rewards/rejected": -43.662498474121094, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.27146607637405396, + "rewards/weighted_margins": 0.2570602297782898, + "rewards/weighted_rejected": -0.5287536382675171, + "step": 300 + }, + { + "epoch": 0.16567572984167278, + "grad_norm": 45.954952239990234, + "learning_rate": 9.873152644996424e-07, + "logits/chosen": -0.23566055297851562, + "logits/rejected": -0.23574523627758026, + "logps/chosen": -134.5734405517578, + "logps/rejected": -136.5500030517578, + "logps/weighted_chosen": -0.565338134765625, + "logps/weighted_rejected": -0.620849609375, + "loss": 0.7314, + "rewards/accuracies": 0.59375, + "rewards/chosen": -44.349021911621094, + "rewards/margins": 6.559765815734863, + "rewards/rejected": -50.908592224121094, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": -0.44298553466796875, + "rewards/weighted_margins": 0.1432647705078125, + "rewards/weighted_rejected": -0.5857940912246704, + "step": 310 + }, + { + "epoch": 0.17102010822366223, + "grad_norm": 22.280086517333984, + "learning_rate": 9.85142848073103e-07, + "logits/chosen": -0.2385093718767166, + "logits/rejected": -0.21721191704273224, + "logps/chosen": -138.27499389648438, + "logps/rejected": -138.13516235351562, + "logps/weighted_chosen": -0.5242675542831421, + "logps/weighted_rejected": -0.591705322265625, + "loss": 0.6903, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -48.820899963378906, + "rewards/margins": 1.562109351158142, + "rewards/rejected": -50.394920349121094, + "rewards/weighted_accuracies": 0.5718749761581421, + "rewards/weighted_chosen": -0.3802032470703125, + "rewards/weighted_margins": 0.14343567192554474, + "rewards/weighted_rejected": -0.523608386516571, + "step": 320 + }, + { + "epoch": 0.17636448660565168, + "grad_norm": 19.125673294067383, + "learning_rate": 9.828015927096914e-07, + "logits/chosen": -0.2693939208984375, + "logits/rejected": -0.23780974745750427, + "logps/chosen": -148.56875610351562, + "logps/rejected": -147.24453735351562, + "logps/weighted_chosen": -0.521636962890625, + "logps/weighted_rejected": -0.5547729730606079, + "loss": 0.6703, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -50.613868713378906, + "rewards/margins": 3.5126953125, + "rewards/rejected": -54.113670349121094, + "rewards/weighted_accuracies": 0.550000011920929, + "rewards/weighted_chosen": -0.321145623922348, + "rewards/weighted_margins": 0.18524780869483948, + "rewards/weighted_rejected": -0.5064395666122437, + "step": 330 + }, + { + "epoch": 0.18170886498764113, + "grad_norm": 14.343570709228516, + "learning_rate": 9.802923132107968e-07, + "logits/chosen": -0.25108033418655396, + "logits/rejected": -0.2313240021467209, + "logps/chosen": -149.78671264648438, + "logps/rejected": -153.48046875, + "logps/weighted_chosen": -0.562329113483429, + "logps/weighted_rejected": -0.583233654499054, + "loss": 0.741, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -58.06855392456055, + "rewards/margins": 4.334374904632568, + "rewards/rejected": -62.40312576293945, + "rewards/weighted_accuracies": 0.512499988079071, + "rewards/weighted_chosen": -0.47095948457717896, + "rewards/weighted_margins": 0.02723388746380806, + "rewards/weighted_rejected": -0.498382568359375, + "step": 340 + }, + { + "epoch": 0.18705324336963056, + "grad_norm": 66.20745849609375, + "learning_rate": 9.776158828534024e-07, + "logits/chosen": -0.2837265133857727, + "logits/rejected": -0.2537124752998352, + "logps/chosen": -149.5359344482422, + "logps/rejected": -194.4765625, + "logps/weighted_chosen": -0.540771484375, + "logps/weighted_rejected": -0.589801013469696, + "loss": 0.6779, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -59.75273513793945, + "rewards/margins": 7.256640434265137, + "rewards/rejected": -66.99531555175781, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.4245468080043793, + "rewards/weighted_margins": 0.146717831492424, + "rewards/weighted_rejected": -0.571148693561554, + "step": 350 + }, + { + "epoch": 0.19239762175162, + "grad_norm": 13.690327644348145, + "learning_rate": 9.747732330861695e-07, + "logits/chosen": -0.18397827446460724, + "logits/rejected": -0.15533828735351562, + "logps/chosen": -143.4562530517578, + "logps/rejected": -145.44686889648438, + "logps/weighted_chosen": -0.5499817132949829, + "logps/weighted_rejected": -0.6820312738418579, + "loss": 0.6644, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -63.58203125, + "rewards/margins": 7.447851657867432, + "rewards/rejected": -71.0308609008789, + "rewards/weighted_accuracies": 0.6031249761581421, + "rewards/weighted_chosen": -0.43936461210250854, + "rewards/weighted_margins": 0.3239502012729645, + "rewards/weighted_rejected": -0.7634918093681335, + "step": 360 + }, + { + "epoch": 0.19774200013360946, + "grad_norm": 20.364688873291016, + "learning_rate": 9.717653532052742e-07, + "logits/chosen": -0.16991272568702698, + "logits/rejected": -0.16076354682445526, + "logps/chosen": -137.11172485351562, + "logps/rejected": -146.09375, + "logps/weighted_chosen": -0.609234631061554, + "logps/weighted_rejected": -0.658831775188446, + "loss": 0.7108, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -63.02363204956055, + "rewards/margins": 9.1494140625, + "rewards/rejected": -72.1488265991211, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.5135604739189148, + "rewards/weighted_margins": 0.1833236664533615, + "rewards/weighted_rejected": -0.6966766119003296, + "step": 370 + }, + { + "epoch": 0.2030863785155989, + "grad_norm": 15.459892272949219, + "learning_rate": 9.685932900101146e-07, + "logits/chosen": -0.17396697402000427, + "logits/rejected": -0.1525276154279709, + "logps/chosen": -143.62655639648438, + "logps/rejected": -147.03515625, + "logps/weighted_chosen": -0.584460437297821, + "logps/weighted_rejected": -0.642120361328125, + "loss": 0.7159, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -68.6537094116211, + "rewards/margins": 5.928124904632568, + "rewards/rejected": -74.57109069824219, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.528277575969696, + "rewards/weighted_margins": 0.11352996528148651, + "rewards/weighted_rejected": -0.6417190432548523, + "step": 380 + }, + { + "epoch": 0.20843075689758836, + "grad_norm": 25.710723876953125, + "learning_rate": 9.652581474390043e-07, + "logits/chosen": -0.17167052626609802, + "logits/rejected": -0.14335784316062927, + "logps/chosen": -147.3156280517578, + "logps/rejected": -151.1484375, + "logps/weighted_chosen": -0.5953735113143921, + "logps/weighted_rejected": -0.649151623249054, + "loss": 0.7014, + "rewards/accuracies": 0.59375, + "rewards/chosen": -70.93769836425781, + "rewards/margins": 9.441015243530273, + "rewards/rejected": -80.35664367675781, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.5777953863143921, + "rewards/weighted_margins": 0.10624237358570099, + "rewards/weighted_rejected": -0.6836212277412415, + "step": 390 + }, + { + "epoch": 0.21377513527957778, + "grad_norm": 42.97126007080078, + "learning_rate": 9.61761086184981e-07, + "logits/chosen": -0.192851260304451, + "logits/rejected": -0.16070251166820526, + "logps/chosen": -148.39688110351562, + "logps/rejected": -149.6046905517578, + "logps/weighted_chosen": -0.6315368413925171, + "logps/weighted_rejected": -0.7087768316268921, + "loss": 0.7113, + "rewards/accuracies": 0.565625011920929, + "rewards/chosen": -73.91288757324219, + "rewards/margins": 6.164453029632568, + "rewards/rejected": -80.05976867675781, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.6400848627090454, + "rewards/weighted_margins": 0.11194305121898651, + "rewards/weighted_rejected": -0.752105712890625, + "step": 400 + }, + { + "epoch": 0.21911951366156723, + "grad_norm": 17.99481773376465, + "learning_rate": 9.581033232918629e-07, + "logits/chosen": -0.14135894179344177, + "logits/rejected": -0.11229457706212997, + "logps/chosen": -145.88827514648438, + "logps/rejected": -149.74063110351562, + "logps/weighted_chosen": -0.6018310785293579, + "logps/weighted_rejected": -0.7620849609375, + "loss": 0.6764, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -74.9345703125, + "rewards/margins": 8.443944931030273, + "rewards/rejected": -83.36601257324219, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.578625500202179, + "rewards/weighted_margins": 0.35536497831344604, + "rewards/weighted_rejected": -0.9342681765556335, + "step": 410 + }, + { + "epoch": 0.22446389204355668, + "grad_norm": 19.236024856567383, + "learning_rate": 9.542861317306952e-07, + "logits/chosen": -0.1445457488298416, + "logits/rejected": -0.1324237883090973, + "logps/chosen": -148.76718139648438, + "logps/rejected": -150.97811889648438, + "logps/weighted_chosen": -0.5991576910018921, + "logps/weighted_rejected": -0.674072265625, + "loss": 0.6735, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -80.28242492675781, + "rewards/margins": 4.427929878234863, + "rewards/rejected": -84.7109375, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.543652355670929, + "rewards/weighted_margins": 0.184315487742424, + "rewards/weighted_rejected": -0.727569580078125, + "step": 420 + }, + { + "epoch": 0.22980827042554614, + "grad_norm": 14.300553321838379, + "learning_rate": 9.503108399567308e-07, + "logits/chosen": -0.14830398559570312, + "logits/rejected": -0.09484557807445526, + "logps/chosen": -162.40625, + "logps/rejected": -167.7195281982422, + "logps/weighted_chosen": -0.5840820074081421, + "logps/weighted_rejected": -0.647021472454071, + "loss": 0.672, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -89.32051086425781, + "rewards/margins": 8.4365234375, + "rewards/rejected": -97.79023742675781, + "rewards/weighted_accuracies": 0.596875011920929, + "rewards/weighted_chosen": -0.545318603515625, + "rewards/weighted_margins": 0.13585510849952698, + "rewards/weighted_rejected": -0.681243896484375, + "step": 430 + }, + { + "epoch": 0.23515264880753559, + "grad_norm": 21.087541580200195, + "learning_rate": 9.461788314471034e-07, + "logits/chosen": -0.10236664116382599, + "logits/rejected": -0.05696678161621094, + "logps/chosen": -155.13827514648438, + "logps/rejected": -159.828125, + "logps/weighted_chosen": -0.6568237543106079, + "logps/weighted_rejected": -0.7309814691543579, + "loss": 0.6898, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -92.36328125, + "rewards/margins": 7.942968845367432, + "rewards/rejected": -100.3109359741211, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.6760101318359375, + "rewards/weighted_margins": 0.16096191108226776, + "rewards/weighted_rejected": -0.836883544921875, + "step": 440 + }, + { + "epoch": 0.240497027189525, + "grad_norm": 19.655607223510742, + "learning_rate": 9.418915442193509e-07, + "logits/chosen": -0.10150299221277237, + "logits/rejected": -0.05734825134277344, + "logps/chosen": -152.95858764648438, + "logps/rejected": -165.30078125, + "logps/weighted_chosen": -0.6426635980606079, + "logps/weighted_rejected": -0.69622802734375, + "loss": 0.7073, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -89.14433288574219, + "rewards/margins": 14.668359756469727, + "rewards/rejected": -103.8412094116211, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.694122314453125, + "rewards/weighted_margins": 0.11862488090991974, + "rewards/weighted_rejected": -0.8125030398368835, + "step": 450 + }, + { + "epoch": 0.24584140557151446, + "grad_norm": 35.64816665649414, + "learning_rate": 9.374504703309579e-07, + "logits/chosen": -0.1730697602033615, + "logits/rejected": -0.1376514434814453, + "logps/chosen": -149.97811889648438, + "logps/rejected": -152.8171844482422, + "logps/weighted_chosen": -0.638531506061554, + "logps/weighted_rejected": -0.77386474609375, + "loss": 0.6563, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -86.3251953125, + "rewards/margins": 8.056055068969727, + "rewards/rejected": -94.4281234741211, + "rewards/weighted_accuracies": 0.5625, + "rewards/weighted_chosen": -0.6749175786972046, + "rewards/weighted_margins": 0.30900877714157104, + "rewards/weighted_rejected": -0.983563244342804, + "step": 460 + }, + { + "epoch": 0.2511857839535039, + "grad_norm": 20.12373161315918, + "learning_rate": 9.328571553600915e-07, + "logits/chosen": -0.14519290626049042, + "logits/rejected": -0.11081619560718536, + "logps/chosen": -154.73828125, + "logps/rejected": -158.703125, + "logps/weighted_chosen": -0.656390368938446, + "logps/weighted_rejected": -0.7307983636856079, + "loss": 0.7214, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -92.1263656616211, + "rewards/margins": 7.731054782867432, + "rewards/rejected": -99.86836242675781, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.6984283328056335, + "rewards/weighted_margins": 0.14166870713233948, + "rewards/weighted_rejected": -0.8403259515762329, + "step": 470 + }, + { + "epoch": 0.25653016233549336, + "grad_norm": 19.16153907775879, + "learning_rate": 9.281131978677106e-07, + "logits/chosen": -0.1819503754377365, + "logits/rejected": -0.14701232314109802, + "logps/chosen": -160.57968139648438, + "logps/rejected": -164.2414093017578, + "logps/weighted_chosen": -0.609344482421875, + "logps/weighted_rejected": -0.750903308391571, + "loss": 0.6525, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -93.19140625, + "rewards/margins": 9.908788681030273, + "rewards/rejected": -103.0589828491211, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.6482604742050171, + "rewards/weighted_margins": 0.2539626955986023, + "rewards/weighted_rejected": -0.9027160406112671, + "step": 480 + }, + { + "epoch": 0.2618745407174828, + "grad_norm": 47.42090606689453, + "learning_rate": 9.232202488412361e-07, + "logits/chosen": -0.18560639023780823, + "logits/rejected": -0.152149960398674, + "logps/chosen": -150.15078735351562, + "logps/rejected": -156.22109985351562, + "logps/weighted_chosen": -0.6249145269393921, + "logps/weighted_rejected": -0.755505383014679, + "loss": 0.6793, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -89.3853530883789, + "rewards/margins": 8.602734565734863, + "rewards/rejected": -97.9830093383789, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.6580413579940796, + "rewards/weighted_margins": 0.2508483827114105, + "rewards/weighted_rejected": -0.909197986125946, + "step": 490 + }, + { + "epoch": 0.26721891909947226, + "grad_norm": 21.92582893371582, + "learning_rate": 9.181800111199766e-07, + "logits/chosen": -0.2139892578125, + "logits/rejected": -0.185211181640625, + "logps/chosen": -153.05624389648438, + "logps/rejected": -155.6671905517578, + "logps/weighted_chosen": -0.660186767578125, + "logps/weighted_rejected": -0.740765392780304, + "loss": 0.6631, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -91.49101257324219, + "rewards/margins": 12.721094131469727, + "rewards/rejected": -104.1937484741211, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.679455578327179, + "rewards/weighted_margins": 0.2384185791015625, + "rewards/weighted_rejected": -0.9178100824356079, + "step": 500 + }, + { + "epoch": 0.26721891909947226, + "eval_logits/chosen": -0.27163267135620117, + "eval_logits/rejected": -0.24348750710487366, + "eval_logps/chosen": -159.72760009765625, + "eval_logps/rejected": -165.05091857910156, + "eval_logps/weighted_chosen": -0.6352449059486389, + "eval_logps/weighted_rejected": -0.7405111789703369, + "eval_loss": 0.6863088607788086, + "eval_rewards/accuracies": 0.5992871522903442, + "eval_rewards/chosen": -97.6285629272461, + "eval_rewards/margins": 10.883528709411621, + "eval_rewards/rejected": -108.50712585449219, + "eval_rewards/weighted_accuracies": 0.5972505211830139, + "eval_rewards/weighted_chosen": -0.6794247031211853, + "eval_rewards/weighted_margins": 0.20837070047855377, + "eval_rewards/weighted_rejected": -0.8877954483032227, + "eval_runtime": 1137.126, + "eval_samples_per_second": 1.725, + "eval_steps_per_second": 0.432, + "step": 500 + }, + { + "epoch": 0.2725632974814617, + "grad_norm": 13.315505027770996, + "learning_rate": 9.129942388025066e-07, + "logits/chosen": -0.16319426894187927, + "logits/rejected": -0.12026214599609375, + "logps/chosen": -149.140625, + "logps/rejected": -157.3195343017578, + "logps/weighted_chosen": -0.686871349811554, + "logps/weighted_rejected": -0.76031494140625, + "loss": 0.6974, + "rewards/accuracies": 0.609375, + "rewards/chosen": -96.3252944946289, + "rewards/margins": 11.934374809265137, + "rewards/rejected": -108.2183609008789, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.7806671261787415, + "rewards/weighted_margins": 0.146159365773201, + "rewards/weighted_rejected": -0.926666259765625, + "step": 510 + }, + { + "epoch": 0.2779076758634511, + "grad_norm": 32.33969497680664, + "learning_rate": 9.076647366362082e-07, + "logits/chosen": -0.12646484375, + "logits/rejected": -0.08115959167480469, + "logps/chosen": -154.6750030517578, + "logps/rejected": -163.578125, + "logps/weighted_chosen": -0.655651867389679, + "logps/weighted_rejected": -0.7443176507949829, + "loss": 0.7056, + "rewards/accuracies": 0.625, + "rewards/chosen": -102.30839538574219, + "rewards/margins": 14.767578125, + "rewards/rejected": -117.12773132324219, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": -0.716845691204071, + "rewards/weighted_margins": 0.19047698378562927, + "rewards/weighted_rejected": -0.9073394536972046, + "step": 520 + }, + { + "epoch": 0.2832520542454406, + "grad_norm": 37.720340728759766, + "learning_rate": 9.021933593891841e-07, + "logits/chosen": -0.08947906643152237, + "logits/rejected": -0.060358427464962006, + "logps/chosen": -156.99063110351562, + "logps/rejected": -160.6953125, + "logps/weighted_chosen": -0.643658459186554, + "logps/weighted_rejected": -0.7408691644668579, + "loss": 0.6914, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -111.7953109741211, + "rewards/margins": 9.351758003234863, + "rewards/rejected": -121.13594055175781, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.7329131960868835, + "rewards/weighted_margins": 0.17979125678539276, + "rewards/weighted_rejected": -0.9128783941268921, + "step": 530 + }, + { + "epoch": 0.28859643262743, + "grad_norm": 14.224921226501465, + "learning_rate": 8.965820112047629e-07, + "logits/chosen": -0.09932632744312286, + "logits/rejected": -0.08042526245117188, + "logps/chosen": -167.265625, + "logps/rejected": -178.6164093017578, + "logps/weighted_chosen": -0.663098156452179, + "logps/weighted_rejected": -0.736401379108429, + "loss": 0.6642, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -116.47245788574219, + "rewards/margins": 15.925390243530273, + "rewards/rejected": -132.3351593017578, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.7144225835800171, + "rewards/weighted_margins": 0.17554016411304474, + "rewards/weighted_rejected": -0.889788806438446, + "step": 540 + }, + { + "epoch": 0.2939408110094195, + "grad_norm": 25.44819450378418, + "learning_rate": 8.908326449388213e-07, + "logits/chosen": -0.09075965732336044, + "logits/rejected": -0.0493927001953125, + "logps/chosen": -160.14608764648438, + "logps/rejected": -166.7140655517578, + "logps/weighted_chosen": -0.66522216796875, + "logps/weighted_rejected": -0.734057605266571, + "loss": 0.6806, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -115.48554992675781, + "rewards/margins": 15.63916015625, + "rewards/rejected": -131.1242218017578, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.7069762945175171, + "rewards/weighted_margins": 0.17700500786304474, + "rewards/weighted_rejected": -0.8841186761856079, + "step": 550 + }, + { + "epoch": 0.2992851893914089, + "grad_norm": 16.921142578125, + "learning_rate": 8.849472614801527e-07, + "logits/chosen": -0.09975433349609375, + "logits/rejected": -0.07463760673999786, + "logps/chosen": -165.10311889648438, + "logps/rejected": -171.703125, + "logps/weighted_chosen": -0.670971691608429, + "logps/weighted_rejected": -0.7793823480606079, + "loss": 0.6625, + "rewards/accuracies": 0.609375, + "rewards/chosen": -121.6898422241211, + "rewards/margins": 17.183399200439453, + "rewards/rejected": -138.83261108398438, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.7858215570449829, + "rewards/weighted_margins": 0.20911255478858948, + "rewards/weighted_rejected": -0.994738757610321, + "step": 560 + }, + { + "epoch": 0.30462956777339834, + "grad_norm": 25.59569549560547, + "learning_rate": 8.789279090541208e-07, + "logits/chosen": -0.12498245388269424, + "logits/rejected": -0.07875537872314453, + "logps/chosen": -168.8429718017578, + "logps/rejected": -182.640625, + "logps/weighted_chosen": -0.729663074016571, + "logps/weighted_rejected": -0.8487914800643921, + "loss": 0.7176, + "rewards/accuracies": 0.625, + "rewards/chosen": -128.54061889648438, + "rewards/margins": 19.065624237060547, + "rewards/rejected": -147.580078125, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.860760509967804, + "rewards/weighted_margins": 0.2542465329170227, + "rewards/weighted_rejected": -1.1147247552871704, + "step": 570 + }, + { + "epoch": 0.3099739461553878, + "grad_norm": 29.514760971069336, + "learning_rate": 8.72776682509837e-07, + "logits/chosen": -0.06974849849939346, + "logits/rejected": -0.010189438238739967, + "logps/chosen": -165.45547485351562, + "logps/rejected": -175.70938110351562, + "logps/weighted_chosen": -0.70281982421875, + "logps/weighted_rejected": -0.855236828327179, + "loss": 0.6413, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -129.60879516601562, + "rewards/margins": 22.657421112060547, + "rewards/rejected": -152.33944702148438, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.8432861566543579, + "rewards/weighted_margins": 0.32232969999313354, + "rewards/weighted_rejected": -1.1654784679412842, + "step": 580 + }, + { + "epoch": 0.31531832453737724, + "grad_norm": 30.710643768310547, + "learning_rate": 8.664957225911138e-07, + "logits/chosen": -0.06455497443675995, + "logits/rejected": -0.048407744616270065, + "logps/chosen": -178.4812469482422, + "logps/rejected": -201.52969360351562, + "logps/weighted_chosen": -0.7231200933456421, + "logps/weighted_rejected": -0.8162475824356079, + "loss": 0.6554, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -144.26171875, + "rewards/margins": 14.5029296875, + "rewards/rejected": -158.80703735351562, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.8793884515762329, + "rewards/weighted_margins": 0.18951721489429474, + "rewards/weighted_rejected": -1.0688660144805908, + "step": 590 + }, + { + "epoch": 0.3206627029193667, + "grad_norm": 12.866159439086914, + "learning_rate": 8.600872151914451e-07, + "logits/chosen": -0.011664772406220436, + "logits/rejected": 0.0007385254139080644, + "logps/chosen": -169.0890655517578, + "logps/rejected": -180.8078155517578, + "logps/weighted_chosen": -0.693652331829071, + "logps/weighted_rejected": -0.879650890827179, + "loss": 0.6483, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -140.900390625, + "rewards/margins": 15.367383003234863, + "rewards/rejected": -156.2814483642578, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.8526153564453125, + "rewards/weighted_margins": 0.34896546602249146, + "rewards/weighted_rejected": -1.201562523841858, + "step": 600 + }, + { + "epoch": 0.32600708130135614, + "grad_norm": 19.085651397705078, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": 0.02297821082174778, + "logits/rejected": 0.06475830078125, + "logps/chosen": -190.3406219482422, + "logps/rejected": -203.453125, + "logps/weighted_chosen": -0.7635498046875, + "logps/weighted_rejected": -0.876513659954071, + "loss": 0.6785, + "rewards/accuracies": 0.59375, + "rewards/chosen": -164.71640014648438, + "rewards/margins": 23.290233612060547, + "rewards/rejected": -187.9656219482422, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.9807373285293579, + "rewards/weighted_margins": 0.23782959580421448, + "rewards/weighted_rejected": -1.21783447265625, + "step": 610 + }, + { + "epoch": 0.33135145968334556, + "grad_norm": 32.081443786621094, + "learning_rate": 8.468965226918105e-07, + "logits/chosen": -0.011585617437958717, + "logits/rejected": 0.016324615105986595, + "logps/chosen": -189.3132781982422, + "logps/rejected": -203.7937469482422, + "logps/weighted_chosen": -0.7615417242050171, + "logps/weighted_rejected": -0.82696533203125, + "loss": 0.7265, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -167.82070922851562, + "rewards/margins": 21.9140625, + "rewards/rejected": -189.74276733398438, + "rewards/weighted_accuracies": 0.581250011920929, + "rewards/weighted_chosen": -1.022314429283142, + "rewards/weighted_margins": 0.10553588718175888, + "rewards/weighted_rejected": -1.127905249595642, + "step": 620 + }, + { + "epoch": 0.33669583806533504, + "grad_norm": 21.689685821533203, + "learning_rate": 8.40118928203676e-07, + "logits/chosen": 0.06978149712085724, + "logits/rejected": 0.07966003566980362, + "logps/chosen": -171.18905639648438, + "logps/rejected": -187.87344360351562, + "logps/weighted_chosen": -0.8035522699356079, + "logps/weighted_rejected": -0.938281238079071, + "loss": 0.6889, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -159.8556671142578, + "rewards/margins": 22.441015243530273, + "rewards/rejected": -182.2810516357422, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -1.125823974609375, + "rewards/weighted_margins": 0.22769927978515625, + "rewards/weighted_rejected": -1.353857398033142, + "step": 630 + }, + { + "epoch": 0.34204021644732446, + "grad_norm": 15.847578048706055, + "learning_rate": 8.332229658606382e-07, + "logits/chosen": 0.009455871768295765, + "logits/rejected": 0.03966980054974556, + "logps/chosen": -200.93984985351562, + "logps/rejected": -211.3125, + "logps/weighted_chosen": -0.783465564250946, + "logps/weighted_rejected": -0.895214855670929, + "loss": 0.6535, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -177.21133422851562, + "rewards/margins": 26.989063262939453, + "rewards/rejected": -204.1365203857422, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -1.0060241222381592, + "rewards/weighted_margins": 0.2687225341796875, + "rewards/weighted_rejected": -1.274450659751892, + "step": 640 + }, + { + "epoch": 0.34738459482931394, + "grad_norm": 22.713172912597656, + "learning_rate": 8.262110355887302e-07, + "logits/chosen": 0.028325652703642845, + "logits/rejected": 0.06662559509277344, + "logps/chosen": -189.8406219482422, + "logps/rejected": -196.30859375, + "logps/weighted_chosen": -0.7696533203125, + "logps/weighted_rejected": -0.8599487543106079, + "loss": 0.7008, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -176.921875, + "rewards/margins": 18.607227325439453, + "rewards/rejected": -195.5730438232422, + "rewards/weighted_accuracies": 0.6000000238418579, + "rewards/weighted_chosen": -1.030175805091858, + "rewards/weighted_margins": 0.17038574814796448, + "rewards/weighted_rejected": -1.2006652355194092, + "step": 650 + }, + { + "epoch": 0.35272897321130336, + "grad_norm": 21.770971298217773, + "learning_rate": 8.190855776730293e-07, + "logits/chosen": -0.02209014818072319, + "logits/rejected": 0.02955322340130806, + "logps/chosen": -191.64297485351562, + "logps/rejected": -206.6015625, + "logps/weighted_chosen": -0.783374011516571, + "logps/weighted_rejected": -0.9095824956893921, + "loss": 0.6596, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -177.8122100830078, + "rewards/margins": 24.329492568969727, + "rewards/rejected": -202.0851593017578, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -1.0215880870819092, + "rewards/weighted_margins": 0.25640565156936646, + "rewards/weighted_rejected": -1.2780640125274658, + "step": 660 + }, + { + "epoch": 0.3580733515932928, + "grad_norm": 27.83735466003418, + "learning_rate": 8.118490719083917e-07, + "logits/chosen": -0.04197654873132706, + "logits/rejected": -0.011433410458266735, + "logps/chosen": -217.96249389648438, + "logps/rejected": -221.2265625, + "logps/weighted_chosen": -0.8158324956893921, + "logps/weighted_rejected": -0.9188903570175171, + "loss": 0.6605, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -213.95703125, + "rewards/margins": 13.51513671875, + "rewards/rejected": -227.47109985351562, + "rewards/weighted_accuracies": 0.574999988079071, + "rewards/weighted_chosen": -1.0736420154571533, + "rewards/weighted_margins": 0.30453795194625854, + "rewards/weighted_rejected": -1.37799072265625, + "step": 670 + }, + { + "epoch": 0.36341772997528227, + "grad_norm": 63.59722137451172, + "learning_rate": 8.045040367364368e-07, + "logits/chosen": 0.06599731743335724, + "logits/rejected": 0.10958633571863174, + "logps/chosen": -183.30899047851562, + "logps/rejected": -192.22030639648438, + "logps/weighted_chosen": -0.8281921148300171, + "logps/weighted_rejected": -0.978344738483429, + "loss": 0.6439, + "rewards/accuracies": 0.625, + "rewards/chosen": -185.7732391357422, + "rewards/margins": 25.359766006469727, + "rewards/rejected": -211.1357421875, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -1.1530334949493408, + "rewards/weighted_margins": 0.34730225801467896, + "rewards/weighted_rejected": -1.500341773033142, + "step": 680 + }, + { + "epoch": 0.3687621083572717, + "grad_norm": 11.727298736572266, + "learning_rate": 7.970530283690819e-07, + "logits/chosen": 0.10012397915124893, + "logits/rejected": 0.14632339775562286, + "logps/chosen": -193.36328125, + "logps/rejected": -209.76406860351562, + "logps/weighted_chosen": -0.856884777545929, + "logps/weighted_rejected": -0.98321533203125, + "loss": 0.6659, + "rewards/accuracies": 0.640625, + "rewards/chosen": -201.2302703857422, + "rewards/margins": 32.98652267456055, + "rewards/rejected": -234.34707641601562, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -1.2462646961212158, + "rewards/weighted_margins": 0.27206724882125854, + "rewards/weighted_rejected": -1.5183227062225342, + "step": 690 + }, + { + "epoch": 0.3741064867392611, + "grad_norm": 16.15506362915039, + "learning_rate": 7.89498639898931e-07, + "logits/chosen": 0.17198029160499573, + "logits/rejected": 0.22177276015281677, + "logps/chosen": -201.234375, + "logps/rejected": -211.76718139648438, + "logps/weighted_chosen": -0.888415515422821, + "logps/weighted_rejected": -1.013879418373108, + "loss": 0.6659, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -227.2578125, + "rewards/margins": 29.7451171875, + "rewards/rejected": -256.93048095703125, + "rewards/weighted_accuracies": 0.581250011920929, + "rewards/weighted_chosen": -1.3159301280975342, + "rewards/weighted_margins": 0.291015625, + "rewards/weighted_rejected": -1.6072266101837158, + "step": 700 + }, + { + "epoch": 0.3794508651212506, + "grad_norm": 20.944732666015625, + "learning_rate": 7.818435003968305e-07, + "logits/chosen": 0.11227264255285263, + "logits/rejected": 0.1453804075717926, + "logps/chosen": -220.72891235351562, + "logps/rejected": -239.18594360351562, + "logps/weighted_chosen": -0.891796886920929, + "logps/weighted_rejected": -1.0748779773712158, + "loss": 0.6297, + "rewards/accuracies": 0.625, + "rewards/chosen": -247.6140594482422, + "rewards/margins": 40.565818786621094, + "rewards/rejected": -288.025390625, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -1.3091919422149658, + "rewards/weighted_margins": 0.36735838651657104, + "rewards/weighted_rejected": -1.67669677734375, + "step": 710 + }, + { + "epoch": 0.38479524350324, + "grad_norm": 16.061363220214844, + "learning_rate": 7.740902739969008e-07, + "logits/chosen": 0.08527755737304688, + "logits/rejected": 0.10508499294519424, + "logps/chosen": -219.64688110351562, + "logps/rejected": -228.9499969482422, + "logps/weighted_chosen": -0.8562988042831421, + "logps/weighted_rejected": -0.9676269292831421, + "loss": 0.6551, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -247.7687530517578, + "rewards/margins": 23.225780487060547, + "rewards/rejected": -270.9765625, + "rewards/weighted_accuracies": 0.5874999761581421, + "rewards/weighted_chosen": -1.203149437904358, + "rewards/weighted_margins": 0.285797119140625, + "rewards/weighted_rejected": -1.489160180091858, + "step": 720 + }, + { + "epoch": 0.3901396218852295, + "grad_norm": 31.3476505279541, + "learning_rate": 7.662416589693695e-07, + "logits/chosen": 0.127583310008049, + "logits/rejected": 0.17155151069164276, + "logps/chosen": -201.3367156982422, + "logps/rejected": -212.42422485351562, + "logps/weighted_chosen": -0.8604370355606079, + "logps/weighted_rejected": -1.0564453601837158, + "loss": 0.6553, + "rewards/accuracies": 0.671875, + "rewards/chosen": -219.3408203125, + "rewards/margins": 29.0703125, + "rewards/rejected": -248.494140625, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.220678687095642, + "rewards/weighted_margins": 0.39063721895217896, + "rewards/weighted_rejected": -1.611718773841858, + "step": 730 + }, + { + "epoch": 0.3954840002672189, + "grad_norm": 12.7977294921875, + "learning_rate": 7.583003867815192e-07, + "logits/chosen": 0.09622497856616974, + "logits/rejected": 0.12804412841796875, + "logps/chosen": -228.9718780517578, + "logps/rejected": -242.92031860351562, + "logps/weighted_chosen": -0.877368152141571, + "logps/weighted_rejected": -0.99322509765625, + "loss": 0.6549, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -245.8718719482422, + "rewards/margins": 29.608789443969727, + "rewards/rejected": -275.35076904296875, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -1.26458740234375, + "rewards/weighted_margins": 0.26112061738967896, + "rewards/weighted_rejected": -1.525964379310608, + "step": 740 + }, + { + "epoch": 0.40082837864920834, + "grad_norm": 31.35419464111328, + "learning_rate": 7.502692211470869e-07, + "logits/chosen": 0.10113067924976349, + "logits/rejected": 0.14377517998218536, + "logps/chosen": -222.765625, + "logps/rejected": -238.73983764648438, + "logps/weighted_chosen": -0.8651977777481079, + "logps/weighted_rejected": -0.9940551519393921, + "loss": 0.6788, + "rewards/accuracies": 0.609375, + "rewards/chosen": -250.9093780517578, + "rewards/margins": 38.716407775878906, + "rewards/rejected": -289.56915283203125, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -1.2682373523712158, + "rewards/weighted_margins": 0.230926513671875, + "rewards/weighted_rejected": -1.499151587486267, + "step": 750 + }, + { + "epoch": 0.4061727570311978, + "grad_norm": 17.88278579711914, + "learning_rate": 7.421509570644387e-07, + "logits/chosen": 0.13302917778491974, + "logits/rejected": 0.17120666801929474, + "logps/chosen": -220.7859344482422, + "logps/rejected": -228.9296875, + "logps/weighted_chosen": -0.8490234613418579, + "logps/weighted_rejected": -1.0133788585662842, + "loss": 0.6282, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -258.48907470703125, + "rewards/margins": 20.644336700439453, + "rewards/rejected": -279.12579345703125, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -1.235162377357483, + "rewards/weighted_margins": 0.39671021699905396, + "rewards/weighted_rejected": -1.631689429283142, + "step": 760 + }, + { + "epoch": 0.41151713541318724, + "grad_norm": 19.770469665527344, + "learning_rate": 7.339484198438566e-07, + "logits/chosen": 0.2122901976108551, + "logits/rejected": 0.268341064453125, + "logps/chosen": -206.8859405517578, + "logps/rejected": -226.79452514648438, + "logps/weighted_chosen": -0.9818481206893921, + "logps/weighted_rejected": -1.1372802257537842, + "loss": 0.6596, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -249.8464813232422, + "rewards/margins": 49.36640548706055, + "rewards/rejected": -299.32110595703125, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -1.4886353015899658, + "rewards/weighted_margins": 0.38934630155563354, + "rewards/weighted_rejected": -1.8777344226837158, + "step": 770 + }, + { + "epoch": 0.4168615137951767, + "grad_norm": 17.136322021484375, + "learning_rate": 7.256644641242773e-07, + "logits/chosen": 0.2901626527309418, + "logits/rejected": 0.3380989134311676, + "logps/chosen": -231.08633422851562, + "logps/rejected": -247.6374969482422, + "logps/weighted_chosen": -1.022985816001892, + "logps/weighted_rejected": -1.143164038658142, + "loss": 0.7062, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -293.5406188964844, + "rewards/margins": 38.568748474121094, + "rewards/rejected": -331.9984436035156, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -1.6519286632537842, + "rewards/weighted_margins": 0.23250122368335724, + "rewards/weighted_rejected": -1.8852417469024658, + "step": 780 + }, + { + "epoch": 0.42220589217716614, + "grad_norm": 16.78958511352539, + "learning_rate": 7.173019728798234e-07, + "logits/chosen": 0.22947922348976135, + "logits/rejected": 0.2762344479560852, + "logps/chosen": -225.95468139648438, + "logps/rejected": -246.8957061767578, + "logps/weighted_chosen": -0.9952758550643921, + "logps/weighted_rejected": -1.151769995689392, + "loss": 0.6317, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -281.17462158203125, + "rewards/margins": 49.26288986206055, + "rewards/rejected": -330.38671875, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -1.5531127452850342, + "rewards/weighted_margins": 0.3658691346645355, + "rewards/weighted_rejected": -1.918725609779358, + "step": 790 + }, + { + "epoch": 0.42755027055915557, + "grad_norm": 26.674007415771484, + "learning_rate": 7.088638564164738e-07, + "logits/chosen": 0.2732749879360199, + "logits/rejected": 0.32427138090133667, + "logps/chosen": -263.5464782714844, + "logps/rejected": -295.19061279296875, + "logps/weighted_chosen": -1.066674828529358, + "logps/weighted_rejected": -1.1934082508087158, + "loss": 0.6437, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -355.3199157714844, + "rewards/margins": 68.63945007324219, + "rewards/rejected": -424.07501220703125, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -1.735803246498108, + "rewards/weighted_margins": 0.3184448182582855, + "rewards/weighted_rejected": -2.0541014671325684, + "step": 800 + }, + { + "epoch": 0.43289464894114504, + "grad_norm": 53.29134750366211, + "learning_rate": 7.003530513592215e-07, + "logits/chosen": 0.30975571274757385, + "logits/rejected": 0.36716994643211365, + "logps/chosen": -259.92498779296875, + "logps/rejected": -295.8374938964844, + "logps/weighted_chosen": -1.0916259288787842, + "logps/weighted_rejected": -1.302832007408142, + "loss": 0.6407, + "rewards/accuracies": 0.65625, + "rewards/chosen": -362.6820373535156, + "rewards/margins": 78.9546890258789, + "rewards/rejected": -441.49298095703125, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -1.836395263671875, + "rewards/weighted_margins": 0.42828065156936646, + "rewards/weighted_rejected": -2.264721632003784, + "step": 810 + }, + { + "epoch": 0.43823902732313447, + "grad_norm": 21.21747589111328, + "learning_rate": 6.917725196300726e-07, + "logits/chosen": 0.24695205688476562, + "logits/rejected": 0.290322482585907, + "logps/chosen": -282.28790283203125, + "logps/rejected": -317.53045654296875, + "logps/weighted_chosen": -1.034826636314392, + "logps/weighted_rejected": -1.1845214366912842, + "loss": 0.6366, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -389.09100341796875, + "rewards/margins": 74.9951171875, + "rewards/rejected": -464.04376220703125, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -1.68133544921875, + "rewards/weighted_margins": 0.3324523866176605, + "rewards/weighted_rejected": -2.014007568359375, + "step": 820 + }, + { + "epoch": 0.44358340570512395, + "grad_norm": 25.014272689819336, + "learning_rate": 6.831252474172411e-07, + "logits/chosen": 0.2618546485900879, + "logits/rejected": 0.29917725920677185, + "logps/chosen": -255.4765625, + "logps/rejected": -285.0953063964844, + "logps/weighted_chosen": -1.060217261314392, + "logps/weighted_rejected": -1.23773193359375, + "loss": 0.6233, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -353.51092529296875, + "rewards/margins": 61.749610900878906, + "rewards/rejected": -415.51873779296875, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -1.6349976062774658, + "rewards/weighted_margins": 0.495330810546875, + "rewards/weighted_rejected": -2.130786180496216, + "step": 830 + }, + { + "epoch": 0.44892778408711337, + "grad_norm": 16.74439239501953, + "learning_rate": 6.74414244135898e-07, + "logits/chosen": 0.240010067820549, + "logits/rejected": 0.31660157442092896, + "logps/chosen": -265.046875, + "logps/rejected": -296.85626220703125, + "logps/weighted_chosen": -1.003625512123108, + "logps/weighted_rejected": -1.165197730064392, + "loss": 0.6336, + "rewards/accuracies": 0.625, + "rewards/chosen": -371.21112060546875, + "rewards/margins": 74.8984375, + "rewards/rejected": -446.146484375, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -1.5787353515625, + "rewards/weighted_margins": 0.36824339628219604, + "rewards/weighted_rejected": -1.946313500404358, + "step": 840 + }, + { + "epoch": 0.4542721624691028, + "grad_norm": 20.773204803466797, + "learning_rate": 6.656425413808388e-07, + "logits/chosen": 0.2946815490722656, + "logits/rejected": 0.31943321228027344, + "logps/chosen": -284.6898498535156, + "logps/rejected": -300.85858154296875, + "logps/weighted_chosen": -1.0427734851837158, + "logps/weighted_rejected": -1.2700927257537842, + "loss": 0.6239, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -416.8919982910156, + "rewards/margins": 38.0087890625, + "rewards/rejected": -454.7835998535156, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -1.7086913585662842, + "rewards/weighted_margins": 0.4794067442417145, + "rewards/weighted_rejected": -2.187939405441284, + "step": 850 + }, + { + "epoch": 0.45961654085109227, + "grad_norm": 14.845000267028809, + "learning_rate": 6.568131918714294e-07, + "logits/chosen": 0.3231925964355469, + "logits/rejected": 0.36410826444625854, + "logps/chosen": -282.16876220703125, + "logps/rejected": -317.7523498535156, + "logps/weighted_chosen": -1.077661156654358, + "logps/weighted_rejected": -1.253027319908142, + "loss": 0.6359, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -413.7085876464844, + "rewards/margins": 79.2416000366211, + "rewards/rejected": -492.9193420410156, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -1.803625464439392, + "rewards/weighted_margins": 0.37888795137405396, + "rewards/weighted_rejected": -2.1824707984924316, + "step": 860 + }, + { + "epoch": 0.4649609192330817, + "grad_norm": 23.40928077697754, + "learning_rate": 6.47929268389203e-07, + "logits/chosen": 0.48909300565719604, + "logits/rejected": 0.5615142583847046, + "logps/chosen": -364.40155029296875, + "logps/rejected": -441.33123779296875, + "logps/weighted_chosen": -1.275048851966858, + "logps/weighted_rejected": -1.454126000404358, + "loss": 0.6207, + "rewards/accuracies": 0.640625, + "rewards/chosen": -599.5953369140625, + "rewards/margins": 173.45858764648438, + "rewards/rejected": -773.1968994140625, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -2.249072313308716, + "rewards/weighted_margins": 0.4446777403354645, + "rewards/weighted_rejected": -2.6942381858825684, + "step": 870 + }, + { + "epoch": 0.47030529761507117, + "grad_norm": 18.79306411743164, + "learning_rate": 6.389938627084732e-07, + "logits/chosen": 0.577728271484375, + "logits/rejected": 0.6453384160995483, + "logps/chosen": -411.82342529296875, + "logps/rejected": -475.7250061035156, + "logps/weighted_chosen": -1.4719970226287842, + "logps/weighted_rejected": -1.6781737804412842, + "loss": 0.6194, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -739.5625, + "rewards/margins": 146.572265625, + "rewards/rejected": -886.0562744140625, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -2.7164063453674316, + "rewards/weighted_margins": 0.531750500202179, + "rewards/weighted_rejected": -3.248828172683716, + "step": 880 + }, + { + "epoch": 0.4756496759970606, + "grad_norm": 22.009668350219727, + "learning_rate": 6.300100845203373e-07, + "logits/chosen": 0.5401207208633423, + "logits/rejected": 0.5778656005859375, + "logps/chosen": -414.01953125, + "logps/rejected": -455.6171875, + "logps/weighted_chosen": -1.2978026866912842, + "logps/weighted_rejected": -1.476660132408142, + "loss": 0.6357, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -709.7568359375, + "rewards/margins": 100.93086242675781, + "rewards/rejected": -810.8453369140625, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -2.32794189453125, + "rewards/weighted_margins": 0.41016846895217896, + "rewards/weighted_rejected": -2.738818407058716, + "step": 890 + }, + { + "epoch": 0.48099405437905, + "grad_norm": 21.886869430541992, + "learning_rate": 6.20981060350445e-07, + "logits/chosen": 0.39915162324905396, + "logits/rejected": 0.4594573974609375, + "logps/chosen": -342.47186279296875, + "logps/rejected": -374.34063720703125, + "logps/weighted_chosen": -1.194909691810608, + "logps/weighted_rejected": -1.406274437904358, + "loss": 0.5957, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -555.0218505859375, + "rewards/margins": 85.990234375, + "rewards/rejected": -640.8515625, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -2.103381395339966, + "rewards/weighted_margins": 0.46428221464157104, + "rewards/weighted_rejected": -2.5683836936950684, + "step": 900 + }, + { + "epoch": 0.4863384327610395, + "grad_norm": 16.282630920410156, + "learning_rate": 6.119099324709087e-07, + "logits/chosen": 0.3816207945346832, + "logits/rejected": 0.4475570619106293, + "logps/chosen": -316.72186279296875, + "logps/rejected": -365.28045654296875, + "logps/weighted_chosen": -1.1351439952850342, + "logps/weighted_rejected": -1.284033179283142, + "loss": 0.6351, + "rewards/accuracies": 0.625, + "rewards/chosen": -497.158203125, + "rewards/margins": 119.31523132324219, + "rewards/rejected": -616.5943603515625, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -1.967187523841858, + "rewards/weighted_margins": 0.3160339295864105, + "rewards/weighted_rejected": -2.284106492996216, + "step": 910 + }, + { + "epoch": 0.4916828111430289, + "grad_norm": 18.44144630432129, + "learning_rate": 6.027998578067316e-07, + "logits/chosen": 0.4347885251045227, + "logits/rejected": 0.48958054184913635, + "logps/chosen": -366.1976623535156, + "logps/rejected": -429.29998779296875, + "logps/weighted_chosen": -1.150183081626892, + "logps/weighted_rejected": -1.3400390148162842, + "loss": 0.6143, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -597.3343505859375, + "rewards/margins": 151.3175811767578, + "rewards/rejected": -748.48046875, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -1.9908447265625, + "rewards/weighted_margins": 0.4314208924770355, + "rewards/weighted_rejected": -2.4218993186950684, + "step": 920 + }, + { + "epoch": 0.4970271895250184, + "grad_norm": 25.489429473876953, + "learning_rate": 5.936540068371394e-07, + "logits/chosen": 0.45708006620407104, + "logits/rejected": 0.5000015497207642, + "logps/chosen": -349.76251220703125, + "logps/rejected": -390.9984436035156, + "logps/weighted_chosen": -1.220849633216858, + "logps/weighted_rejected": -1.4126708507537842, + "loss": 0.6245, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -577.7796630859375, + "rewards/margins": 97.06953430175781, + "rewards/rejected": -675.0695190429688, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -2.1529297828674316, + "rewards/weighted_margins": 0.42708128690719604, + "rewards/weighted_rejected": -2.580029249191284, + "step": 930 + }, + { + "epoch": 0.5023715679070078, + "grad_norm": 19.472640991210938, + "learning_rate": 5.844755624921918e-07, + "logits/chosen": 0.5162017941474915, + "logits/rejected": 0.609728991985321, + "logps/chosen": -363.6421813964844, + "logps/rejected": -415.4140625, + "logps/weighted_chosen": -1.33294677734375, + "logps/weighted_rejected": -1.4788939952850342, + "loss": 0.6754, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -618.7273559570312, + "rewards/margins": 130.2453155517578, + "rewards/rejected": -749.0703125, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -2.408947706222534, + "rewards/weighted_margins": 0.34906005859375, + "rewards/weighted_rejected": -2.757458448410034, + "step": 940 + }, + { + "epoch": 0.5077159462889973, + "grad_norm": 32.006065368652344, + "learning_rate": 5.752677190450634e-07, + "logits/chosen": 0.688891589641571, + "logits/rejected": 0.7397063970565796, + "logps/chosen": -482.10235595703125, + "logps/rejected": -568.703125, + "logps/weighted_chosen": -1.4970214366912842, + "logps/weighted_rejected": -1.689294457435608, + "loss": 0.6354, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -910.1663818359375, + "rewards/margins": 212.23184204101562, + "rewards/rejected": -1122.731201171875, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -2.8426513671875, + "rewards/weighted_margins": 0.47370606660842896, + "rewards/weighted_rejected": -3.3175048828125, + "step": 950 + }, + { + "epoch": 0.5130603246709867, + "grad_norm": 24.5925235748291, + "learning_rate": 5.66033681000375e-07, + "logits/chosen": 0.743194580078125, + "logits/rejected": 0.8281310796737671, + "logps/chosen": -493.4140625, + "logps/rejected": -624.3343505859375, + "logps/weighted_chosen": -1.50146484375, + "logps/weighted_rejected": -1.747949242591858, + "loss": 0.5978, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -935.1187744140625, + "rewards/margins": 319.94921875, + "rewards/rejected": -1255.2484130859375, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -2.852978467941284, + "rewards/weighted_margins": 0.568621814250946, + "rewards/weighted_rejected": -3.422070264816284, + "step": 960 + }, + { + "epoch": 0.5184047030529761, + "grad_norm": 18.53244400024414, + "learning_rate": 5.567766619789658e-07, + "logits/chosen": 0.5608322024345398, + "logits/rejected": 0.6179229617118835, + "logps/chosen": -431.1343688964844, + "logps/rejected": -476.00701904296875, + "logps/weighted_chosen": -1.348486304283142, + "logps/weighted_rejected": -1.502197265625, + "loss": 0.6584, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -772.9921875, + "rewards/margins": 109.23124694824219, + "rewards/rejected": -882.5031127929688, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -2.4359130859375, + "rewards/weighted_margins": 0.3783813416957855, + "rewards/weighted_rejected": -2.814013719558716, + "step": 970 + }, + { + "epoch": 0.5237490814349656, + "grad_norm": 32.86454772949219, + "learning_rate": 5.474998835994916e-07, + "logits/chosen": 0.539044201374054, + "logits/rejected": 0.6025703549385071, + "logps/chosen": -401.83905029296875, + "logps/rejected": -486.6234436035156, + "logps/weighted_chosen": -1.262475609779358, + "logps/weighted_rejected": -1.458227515220642, + "loss": 0.6286, + "rewards/accuracies": 0.609375, + "rewards/chosen": -694.048828125, + "rewards/margins": 198.5105438232422, + "rewards/rejected": -892.8984375, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -2.239306688308716, + "rewards/weighted_margins": 0.41253662109375, + "rewards/weighted_rejected": -2.6525635719299316, + "step": 980 + }, + { + "epoch": 0.529093459816955, + "grad_norm": 29.745466232299805, + "learning_rate": 5.382065743572411e-07, + "logits/chosen": 0.5060653686523438, + "logits/rejected": 0.6303970217704773, + "logps/chosen": -385.96405029296875, + "logps/rejected": -484.296875, + "logps/weighted_chosen": -1.272680640220642, + "logps/weighted_rejected": -1.508935570716858, + "loss": 0.5645, + "rewards/accuracies": 0.671875, + "rewards/chosen": -660.4656372070312, + "rewards/margins": 236.1494140625, + "rewards/rejected": -896.5562744140625, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -2.2663817405700684, + "rewards/weighted_margins": 0.583996593952179, + "rewards/weighted_rejected": -2.849902391433716, + "step": 990 + }, + { + "epoch": 0.5344378381989445, + "grad_norm": 23.71160125732422, + "learning_rate": 5.288999685005571e-07, + "logits/chosen": 0.6983550786972046, + "logits/rejected": 0.7909576296806335, + "logps/chosen": -492.20623779296875, + "logps/rejected": -586.3453369140625, + "logps/weighted_chosen": -1.4745604991912842, + "logps/weighted_rejected": -1.7019531726837158, + "loss": 0.5934, + "rewards/accuracies": 0.625, + "rewards/chosen": -929.5031127929688, + "rewards/margins": 230.67343139648438, + "rewards/rejected": -1159.8773193359375, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -2.7806153297424316, + "rewards/weighted_margins": 0.5472351312637329, + "rewards/weighted_rejected": -3.3272705078125, + "step": 1000 + }, + { + "epoch": 0.5344378381989445, + "eval_logits/chosen": 0.8178273439407349, + "eval_logits/rejected": 0.9030020833015442, + "eval_logps/chosen": -545.8350219726562, + "eval_logps/rejected": -631.2993774414062, + "eval_logps/weighted_chosen": -1.671688437461853, + "eval_logps/weighted_rejected": -1.9300999641418457, + "eval_loss": 0.6100515723228455, + "eval_rewards/accuracies": 0.612525463104248, + "eval_rewards/chosen": -1062.7760009765625, + "eval_rewards/margins": 211.23606872558594, + "eval_rewards/rejected": -1274.2708740234375, + "eval_rewards/weighted_accuracies": 0.6924643516540527, + "eval_rewards/weighted_chosen": -3.270533561706543, + "eval_rewards/weighted_margins": 0.5912336111068726, + "eval_rewards/weighted_rejected": -3.861767292022705, + "eval_runtime": 1069.2656, + "eval_samples_per_second": 1.834, + "eval_steps_per_second": 0.459, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 1872, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000..5ca5510 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101724c70f22fcebab803e2165d08660fa3875b4ac47496a35436a812fde407f +size 8785 diff --git a/checkpoint-1500/chat_template.jinja b/checkpoint-1500/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-1500/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-1500/config.json b/checkpoint-1500/config.json new file mode 100644 index 0000000..ceed8ae --- /dev/null +++ b/checkpoint-1500/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-1500/generation_config.json b/checkpoint-1500/generation_config.json new file mode 100644 index 0000000..38e1b41 --- /dev/null +++ b/checkpoint-1500/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128009 + ], + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-1500/model-00001-of-00004.safetensors b/checkpoint-1500/model-00001-of-00004.safetensors new file mode 100644 index 0000000..c979d69 --- /dev/null +++ b/checkpoint-1500/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979651e84d387b127aaac8d3d6de0236f213113b1fc30b7e69b594628862814d +size 4976698672 diff --git a/checkpoint-1500/model-00002-of-00004.safetensors b/checkpoint-1500/model-00002-of-00004.safetensors new file mode 100644 index 0000000..9ba7240 --- /dev/null +++ b/checkpoint-1500/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:484227fd6bd9b830cf0f05a2b06357eeadf17947dcc8790a996733ade3452e23 +size 4999802720 diff --git a/checkpoint-1500/model-00003-of-00004.safetensors b/checkpoint-1500/model-00003-of-00004.safetensors new file mode 100644 index 0000000..a32f104 --- /dev/null +++ b/checkpoint-1500/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34b95d45e13679fe9991d579491110cb9991c0878a1cdd9d5ce06c9a50c927f +size 4915916176 diff --git a/checkpoint-1500/model-00004-of-00004.safetensors b/checkpoint-1500/model-00004-of-00004.safetensors new file mode 100644 index 0000000..205d37f --- /dev/null +++ b/checkpoint-1500/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e463167fe1c8b6423c2efda6efe92fc52496d6ffa06fa2f708e5ffdf845f1959 +size 1168138808 diff --git a/checkpoint-1500/model.safetensors.index.json b/checkpoint-1500/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-1500/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-1500/special_tokens_map.json b/checkpoint-1500/special_tokens_map.json new file mode 100644 index 0000000..b43be96 --- /dev/null +++ b/checkpoint-1500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-1500/tokenizer.json b/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/checkpoint-1500/tokenizer_config.json b/checkpoint-1500/tokenizer_config.json new file mode 100644 index 0000000..34d134f --- /dev/null +++ b/checkpoint-1500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000..7e25c55 --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,3271 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8016567572984167, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005344378381989445, + "grad_norm": 31.37949562072754, + "learning_rate": 0.0, + "logits/chosen": -0.2252655029296875, + "logits/rejected": -0.192626953125, + "logps/chosen": -110.828125, + "logps/rejected": -115.515625, + "logps/weighted_chosen": -0.31903076171875, + "logps/weighted_rejected": -0.333709716796875, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005344378381989445, + "grad_norm": 75.76042938232422, + "learning_rate": 4.787234042553191e-08, + "logits/chosen": -0.3284708559513092, + "logits/rejected": -0.3214448392391205, + "logps/chosen": -134.46006774902344, + "logps/rejected": -138.06597900390625, + "logps/weighted_chosen": -0.3890923261642456, + "logps/weighted_rejected": -0.4067721962928772, + "loss": 0.6953, + "rewards/accuracies": 0.2951388955116272, + "rewards/chosen": -0.2072482705116272, + "rewards/margins": -0.1790364533662796, + "rewards/rejected": -0.0282118059694767, + "rewards/weighted_accuracies": 0.3472222089767456, + "rewards/weighted_chosen": -0.0032717387657612562, + "rewards/weighted_margins": -0.0047516291961073875, + "rewards/weighted_rejected": 0.0014813741436228156, + "step": 10 + }, + { + "epoch": 0.01068875676397889, + "grad_norm": 24.94420623779297, + "learning_rate": 1.0106382978723404e-07, + "logits/chosen": -0.2780090272426605, + "logits/rejected": -0.2689048647880554, + "logps/chosen": -115.3070297241211, + "logps/rejected": -114.8101577758789, + "logps/weighted_chosen": -0.354583740234375, + "logps/weighted_rejected": -0.36929017305374146, + "loss": 0.6925, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": 0.0087890625, + "rewards/margins": -0.02734375, + "rewards/rejected": 0.0361328125, + "rewards/weighted_accuracies": 0.4312500059604645, + "rewards/weighted_chosen": 0.0016719817649573088, + "rewards/weighted_margins": 0.0011638641590252519, + "rewards/weighted_rejected": 0.0005052566411904991, + "step": 20 + }, + { + "epoch": 0.016033135145968335, + "grad_norm": 26.90618133544922, + "learning_rate": 1.5425531914893615e-07, + "logits/chosen": -0.26707762479782104, + "logits/rejected": -0.2697288393974304, + "logps/chosen": -122.49687194824219, + "logps/rejected": -128.2218780517578, + "logps/weighted_chosen": -0.364663690328598, + "logps/weighted_rejected": -0.40430909395217896, + "loss": 0.6919, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": 0.099609375, + "rewards/margins": 0.2177734375, + "rewards/rejected": -0.1181640625, + "rewards/weighted_accuracies": 0.4468750059604645, + "rewards/weighted_chosen": 0.0011037830263376236, + "rewards/weighted_margins": 0.0029600143898278475, + "rewards/weighted_rejected": -0.0018524170154705644, + "step": 30 + }, + { + "epoch": 0.02137751352795778, + "grad_norm": 19.056455612182617, + "learning_rate": 2.074468085106383e-07, + "logits/chosen": -0.31552428007125854, + "logits/rejected": -0.309671014547348, + "logps/chosen": -126.5132827758789, + "logps/rejected": -127.7515640258789, + "logps/weighted_chosen": -0.3717803955078125, + "logps/weighted_rejected": -0.36720579862594604, + "loss": 0.6927, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.041015625, + "rewards/margins": 0.150390625, + "rewards/rejected": -0.109375, + "rewards/weighted_accuracies": 0.44062501192092896, + "rewards/weighted_chosen": 0.0009314537164755166, + "rewards/weighted_margins": 0.0009433746454305947, + "rewards/weighted_rejected": -1.831054760259576e-05, + "step": 40 + }, + { + "epoch": 0.026721891909947223, + "grad_norm": 94.1146469116211, + "learning_rate": 2.6063829787234044e-07, + "logits/chosen": -0.2799697816371918, + "logits/rejected": -0.2664199769496918, + "logps/chosen": -120.34375, + "logps/rejected": -120.19062805175781, + "logps/weighted_chosen": -0.367788702249527, + "logps/weighted_rejected": -0.37299805879592896, + "loss": 0.6934, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.01806640625, + "rewards/margins": -0.11865234375, + "rewards/rejected": 0.1005859375, + "rewards/weighted_accuracies": 0.421875, + "rewards/weighted_chosen": 0.0011091709602624178, + "rewards/weighted_margins": -0.0005058288807049394, + "rewards/weighted_rejected": 0.00161571498028934, + "step": 50 + }, + { + "epoch": 0.03206627029193667, + "grad_norm": 47.161922454833984, + "learning_rate": 3.1382978723404253e-07, + "logits/chosen": -0.22172394394874573, + "logits/rejected": -0.2157600373029709, + "logps/chosen": -115.8382797241211, + "logps/rejected": -118.75859069824219, + "logps/weighted_chosen": -0.36602783203125, + "logps/weighted_rejected": -0.369253545999527, + "loss": 0.6908, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.10546875, + "rewards/margins": -0.1259765625, + "rewards/rejected": 0.0205078125, + "rewards/weighted_accuracies": 0.4000000059604645, + "rewards/weighted_chosen": 0.008862781338393688, + "rewards/weighted_margins": 0.00811080913990736, + "rewards/weighted_rejected": 0.0007405281066894531, + "step": 60 + }, + { + "epoch": 0.037410648673926114, + "grad_norm": 42.38877868652344, + "learning_rate": 3.6702127659574467e-07, + "logits/chosen": -0.299722284078598, + "logits/rejected": -0.29665374755859375, + "logps/chosen": -114.1656265258789, + "logps/rejected": -118.0765609741211, + "logps/weighted_chosen": -0.35313719511032104, + "logps/weighted_rejected": -0.3739013671875, + "loss": 0.6949, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0693359375, + "rewards/margins": -0.0458984375, + "rewards/rejected": -0.0234375, + "rewards/weighted_accuracies": 0.390625, + "rewards/weighted_chosen": 0.0011390686267986894, + "rewards/weighted_margins": -0.0028884888160973787, + "rewards/weighted_rejected": 0.0040260315872728825, + "step": 70 + }, + { + "epoch": 0.04275502705591556, + "grad_norm": 35.451927185058594, + "learning_rate": 4.202127659574468e-07, + "logits/chosen": -0.30262452363967896, + "logits/rejected": -0.24024733901023865, + "logps/chosen": -112.3812484741211, + "logps/rejected": -111.8375015258789, + "logps/weighted_chosen": -0.37481385469436646, + "logps/weighted_rejected": -0.38435667753219604, + "loss": 0.694, + "rewards/accuracies": 0.390625, + "rewards/chosen": -0.1552734375, + "rewards/margins": -0.0341796875, + "rewards/rejected": -0.12109375, + "rewards/weighted_accuracies": 0.48750001192092896, + "rewards/weighted_chosen": 0.01006317138671875, + "rewards/weighted_margins": 0.0036018372047692537, + "rewards/weighted_rejected": 0.006462156772613525, + "step": 80 + }, + { + "epoch": 0.048099405437905, + "grad_norm": 42.264678955078125, + "learning_rate": 4.734042553191489e-07, + "logits/chosen": -0.2891853451728821, + "logits/rejected": -0.23835448920726776, + "logps/chosen": -118.0296859741211, + "logps/rejected": -116.484375, + "logps/weighted_chosen": -0.376077264547348, + "logps/weighted_rejected": -0.3848114013671875, + "loss": 0.6935, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": 0.0458984375, + "rewards/margins": 0.267578125, + "rewards/rejected": -0.2216796875, + "rewards/weighted_accuracies": 0.46562498807907104, + "rewards/weighted_chosen": 0.016840171068906784, + "rewards/weighted_margins": 0.014923477545380592, + "rewards/weighted_rejected": 0.00187511439435184, + "step": 90 + }, + { + "epoch": 0.053443783819894446, + "grad_norm": 36.87267303466797, + "learning_rate": 5.26595744680851e-07, + "logits/chosen": -0.3333755433559418, + "logits/rejected": -0.28821104764938354, + "logps/chosen": -124.0367202758789, + "logps/rejected": -124.33906555175781, + "logps/weighted_chosen": -0.39268797636032104, + "logps/weighted_rejected": -0.4093261659145355, + "loss": 0.6875, + "rewards/accuracies": 0.44062501192092896, + "rewards/chosen": -0.0302734375, + "rewards/margins": 0.36835938692092896, + "rewards/rejected": -0.39863282442092896, + "rewards/weighted_accuracies": 0.484375, + "rewards/weighted_chosen": 0.012582575902342796, + "rewards/weighted_margins": 0.026942063122987747, + "rewards/weighted_rejected": -0.014329910278320312, + "step": 100 + }, + { + "epoch": 0.058788162201883896, + "grad_norm": 17.8848876953125, + "learning_rate": 5.797872340425531e-07, + "logits/chosen": -0.3335327208042145, + "logits/rejected": -0.32384032011032104, + "logps/chosen": -117.6968765258789, + "logps/rejected": -119.85859680175781, + "logps/weighted_chosen": -0.35866087675094604, + "logps/weighted_rejected": -0.37585145235061646, + "loss": 0.7015, + "rewards/accuracies": 0.3968749940395355, + "rewards/chosen": -0.524609386920929, + "rewards/margins": -0.01328125037252903, + "rewards/rejected": -0.511523425579071, + "rewards/weighted_accuracies": 0.4281249940395355, + "rewards/weighted_chosen": -0.0057319640181958675, + "rewards/weighted_margins": 0.0012493133544921875, + "rewards/weighted_rejected": -0.0070056915283203125, + "step": 110 + }, + { + "epoch": 0.06413254058387334, + "grad_norm": 37.190059661865234, + "learning_rate": 6.329787234042553e-07, + "logits/chosen": -0.29607391357421875, + "logits/rejected": -0.2735137939453125, + "logps/chosen": -120.81718444824219, + "logps/rejected": -127.04219055175781, + "logps/weighted_chosen": -0.41831666231155396, + "logps/weighted_rejected": -0.42036741971969604, + "loss": 0.7443, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.43964844942092896, + "rewards/margins": 0.690234363079071, + "rewards/rejected": -1.1298828125, + "rewards/weighted_accuracies": 0.45625001192092896, + "rewards/weighted_chosen": -0.07387389987707138, + "rewards/weighted_margins": -0.04258232191205025, + "rewards/weighted_rejected": -0.031409453600645065, + "step": 120 + }, + { + "epoch": 0.06947691896586278, + "grad_norm": 22.76742172241211, + "learning_rate": 6.861702127659574e-07, + "logits/chosen": -0.30709609389305115, + "logits/rejected": -0.29381561279296875, + "logps/chosen": -120.3140640258789, + "logps/rejected": -121.73515319824219, + "logps/weighted_chosen": -0.38916015625, + "logps/weighted_rejected": -0.38067322969436646, + "loss": 0.6906, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.8837890625, + "rewards/margins": 0.4273437559604645, + "rewards/rejected": -1.310937523841858, + "rewards/weighted_accuracies": 0.46875, + "rewards/weighted_chosen": -0.0019147873390465975, + "rewards/weighted_margins": 0.025261688977479935, + "rewards/weighted_rejected": -0.027143806219100952, + "step": 130 + }, + { + "epoch": 0.07482129734785223, + "grad_norm": 25.72498321533203, + "learning_rate": 7.393617021276596e-07, + "logits/chosen": -0.2590804994106293, + "logits/rejected": -0.25146180391311646, + "logps/chosen": -114.2992172241211, + "logps/rejected": -118.26094055175781, + "logps/weighted_chosen": -0.376434326171875, + "logps/weighted_rejected": -0.39961546659469604, + "loss": 0.6968, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": -1.7428710460662842, + "rewards/margins": 1.043554663658142, + "rewards/rejected": -2.7867188453674316, + "rewards/weighted_accuracies": 0.46562498807907104, + "rewards/weighted_chosen": -0.01796722412109375, + "rewards/weighted_margins": 0.044054411351680756, + "rewards/weighted_rejected": -0.062059782445430756, + "step": 140 + }, + { + "epoch": 0.08016567572984168, + "grad_norm": 27.819217681884766, + "learning_rate": 7.925531914893616e-07, + "logits/chosen": -0.260824590921402, + "logits/rejected": -0.2456924468278885, + "logps/chosen": -118.3109359741211, + "logps/rejected": -116.1898422241211, + "logps/weighted_chosen": -0.3584175109863281, + "logps/weighted_rejected": -0.389230340719223, + "loss": 0.693, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -2.796679735183716, + "rewards/margins": 1.470312476158142, + "rewards/rejected": -4.267773628234863, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": 0.02875671349465847, + "rewards/weighted_margins": 0.08138389885425568, + "rewards/weighted_rejected": -0.05276889726519585, + "step": 150 + }, + { + "epoch": 0.08551005411183112, + "grad_norm": 20.792280197143555, + "learning_rate": 8.457446808510637e-07, + "logits/chosen": -0.27181702852249146, + "logits/rejected": -0.26198044419288635, + "logps/chosen": -119.7906265258789, + "logps/rejected": -120.09687805175781, + "logps/weighted_chosen": -0.3836608827114105, + "logps/weighted_rejected": -0.40611571073532104, + "loss": 0.6795, + "rewards/accuracies": 0.503125011920929, + "rewards/chosen": -3.6748046875, + "rewards/margins": 1.8689453601837158, + "rewards/rejected": -5.542382717132568, + "rewards/weighted_accuracies": 0.543749988079071, + "rewards/weighted_chosen": -0.013747024349868298, + "rewards/weighted_margins": 0.10787200927734375, + "rewards/weighted_rejected": -0.12159118801355362, + "step": 160 + }, + { + "epoch": 0.09085443249382057, + "grad_norm": 28.162086486816406, + "learning_rate": 8.989361702127659e-07, + "logits/chosen": -0.30354803800582886, + "logits/rejected": -0.28291016817092896, + "logps/chosen": -118.81172180175781, + "logps/rejected": -123.3851547241211, + "logps/weighted_chosen": -0.364785760641098, + "logps/weighted_rejected": -0.40638428926467896, + "loss": 0.7104, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -4.450781345367432, + "rewards/margins": 1.540624976158142, + "rewards/rejected": -5.989843845367432, + "rewards/weighted_accuracies": 0.49687498807907104, + "rewards/weighted_chosen": -0.05272483825683594, + "rewards/weighted_margins": 0.03959999233484268, + "rewards/weighted_rejected": -0.0922950729727745, + "step": 170 + }, + { + "epoch": 0.09619881087581, + "grad_norm": 62.7450065612793, + "learning_rate": 9.52127659574468e-07, + "logits/chosen": -0.3086685240268707, + "logits/rejected": -0.29756468534469604, + "logps/chosen": -120.9000015258789, + "logps/rejected": -121.6031265258789, + "logps/weighted_chosen": -0.4021057188510895, + "logps/weighted_rejected": -0.43016356229782104, + "loss": 0.6902, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -5.364843845367432, + "rewards/margins": 0.95654296875, + "rewards/rejected": -6.323437690734863, + "rewards/weighted_accuracies": 0.515625, + "rewards/weighted_chosen": -0.07762374728918076, + "rewards/weighted_margins": 0.042090605944395065, + "rewards/weighted_rejected": -0.11983337253332138, + "step": 180 + }, + { + "epoch": 0.10154318925779945, + "grad_norm": 107.93684387207031, + "learning_rate": 9.99999129927477e-07, + "logits/chosen": -0.2998809814453125, + "logits/rejected": -0.2729034423828125, + "logps/chosen": -121.68046569824219, + "logps/rejected": -122.90547180175781, + "logps/weighted_chosen": -0.4000488221645355, + "logps/weighted_rejected": -0.49153441190719604, + "loss": 0.6846, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": -7.084570407867432, + "rewards/margins": 1.763671875, + "rewards/rejected": -8.850390434265137, + "rewards/weighted_accuracies": 0.5625, + "rewards/weighted_chosen": -0.06708984076976776, + "rewards/weighted_margins": 0.19685058295726776, + "rewards/weighted_rejected": -0.2636512815952301, + "step": 190 + }, + { + "epoch": 0.10688756763978889, + "grad_norm": 42.62810516357422, + "learning_rate": 9.99894724888679e-07, + "logits/chosen": -0.2994216978549957, + "logits/rejected": -0.2775813937187195, + "logps/chosen": -129.10311889648438, + "logps/rejected": -127.71015930175781, + "logps/weighted_chosen": -0.40317994356155396, + "logps/weighted_rejected": -0.4718689024448395, + "loss": 0.7052, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -8.422460556030273, + "rewards/margins": 1.215234398841858, + "rewards/rejected": -9.638280868530273, + "rewards/weighted_accuracies": 0.518750011920929, + "rewards/weighted_chosen": -0.10227356106042862, + "rewards/weighted_margins": 0.08837012946605682, + "rewards/weighted_rejected": -0.19074249267578125, + "step": 200 + }, + { + "epoch": 0.11223194602177834, + "grad_norm": 56.36786651611328, + "learning_rate": 9.996163469793475e-07, + "logits/chosen": -0.3200393617153168, + "logits/rejected": -0.28080445528030396, + "logps/chosen": -127.65625, + "logps/rejected": -122.5406265258789, + "logps/weighted_chosen": -0.402487188577652, + "logps/weighted_rejected": -0.4784179627895355, + "loss": 0.6855, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -9.696874618530273, + "rewards/margins": 3.138671875, + "rewards/rejected": -12.8359375, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.07228164374828339, + "rewards/weighted_margins": 0.1660926789045334, + "rewards/weighted_rejected": -0.23847046494483948, + "step": 210 + }, + { + "epoch": 0.11757632440376779, + "grad_norm": 24.45851707458496, + "learning_rate": 9.991640930802883e-07, + "logits/chosen": -0.30699461698532104, + "logits/rejected": -0.3066558837890625, + "logps/chosen": -125.8734359741211, + "logps/rejected": -129.6999969482422, + "logps/weighted_chosen": -0.41710203886032104, + "logps/weighted_rejected": -0.4827117919921875, + "loss": 0.6884, + "rewards/accuracies": 0.546875, + "rewards/chosen": -12.277539253234863, + "rewards/margins": 2.571484327316284, + "rewards/rejected": -14.851171493530273, + "rewards/weighted_accuracies": 0.5218750238418579, + "rewards/weighted_chosen": -0.12371826171875, + "rewards/weighted_margins": 0.1390731781721115, + "rewards/weighted_rejected": -0.26273268461227417, + "step": 220 + }, + { + "epoch": 0.12292070278575723, + "grad_norm": 27.215944290161133, + "learning_rate": 9.98538120584459e-07, + "logits/chosen": -0.3107505738735199, + "logits/rejected": -0.283193975687027, + "logps/chosen": -134.8015594482422, + "logps/rejected": -138.5890655517578, + "logps/weighted_chosen": -0.4462524354457855, + "logps/weighted_rejected": -0.511853039264679, + "loss": 0.6939, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -15.346875190734863, + "rewards/margins": 2.830273389816284, + "rewards/rejected": -18.179492950439453, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": -0.1530204713344574, + "rewards/weighted_margins": 0.1789344847202301, + "rewards/weighted_rejected": -0.3320491909980774, + "step": 230 + }, + { + "epoch": 0.12826508116774668, + "grad_norm": 17.173702239990234, + "learning_rate": 9.977386473421917e-07, + "logits/chosen": -0.27986279129981995, + "logits/rejected": -0.2772073745727539, + "logps/chosen": -121.0453109741211, + "logps/rejected": -124.65312194824219, + "logps/weighted_chosen": -0.4528869688510895, + "logps/weighted_rejected": -0.501416027545929, + "loss": 0.7222, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -18.59375, + "rewards/margins": 2.5132813453674316, + "rewards/rejected": -21.106054306030273, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.2343955934047699, + "rewards/weighted_margins": 0.07334060966968536, + "rewards/weighted_rejected": -0.30777662992477417, + "step": 240 + }, + { + "epoch": 0.13360945954973613, + "grad_norm": 24.67556381225586, + "learning_rate": 9.96765951585378e-07, + "logits/chosen": -0.262664794921875, + "logits/rejected": -0.24544373154640198, + "logps/chosen": -121.7421875, + "logps/rejected": -126.42500305175781, + "logps/weighted_chosen": -0.46705931425094604, + "logps/weighted_rejected": -0.547576904296875, + "loss": 0.6683, + "rewards/accuracies": 0.5625, + "rewards/chosen": -21.9228515625, + "rewards/margins": 2.5238280296325684, + "rewards/rejected": -24.447071075439453, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.2455238401889801, + "rewards/weighted_margins": 0.1756332367658615, + "rewards/weighted_rejected": -0.4210983216762543, + "step": 250 + }, + { + "epoch": 0.13895383793172555, + "grad_norm": 33.55352783203125, + "learning_rate": 9.956203718306388e-07, + "logits/chosen": -0.18781813979148865, + "logits/rejected": -0.156982421875, + "logps/chosen": -127.8578109741211, + "logps/rejected": -132.35546875, + "logps/weighted_chosen": -0.4968322813510895, + "logps/weighted_rejected": -0.5148254632949829, + "loss": 0.7213, + "rewards/accuracies": 0.5531250238418579, + "rewards/chosen": -26.109960556030273, + "rewards/margins": 2.660937547683716, + "rewards/rejected": -28.761327743530273, + "rewards/weighted_accuracies": 0.534375011920929, + "rewards/weighted_chosen": -0.29717254638671875, + "rewards/weighted_margins": 0.04170074313879013, + "rewards/weighted_rejected": -0.3386779725551605, + "step": 260 + }, + { + "epoch": 0.144298216313715, + "grad_norm": 242.56521606445312, + "learning_rate": 9.943023067615136e-07, + "logits/chosen": -0.17297974228858948, + "logits/rejected": -0.1584724485874176, + "logps/chosen": -139.23046875, + "logps/rejected": -142.3390655517578, + "logps/weighted_chosen": -0.48270875215530396, + "logps/weighted_rejected": -0.560772716999054, + "loss": 0.7059, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -33.55195236206055, + "rewards/margins": 3.075976610183716, + "rewards/rejected": -36.6328125, + "rewards/weighted_accuracies": 0.4937500059604645, + "rewards/weighted_chosen": -0.321615606546402, + "rewards/weighted_margins": 0.13364562392234802, + "rewards/weighted_rejected": -0.45517730712890625, + "step": 270 + }, + { + "epoch": 0.14964259469570446, + "grad_norm": 16.8142147064209, + "learning_rate": 9.928122150897112e-07, + "logits/chosen": -0.21183013916015625, + "logits/rejected": -0.168986514210701, + "logps/chosen": -130.66250610351562, + "logps/rejected": -132.0812530517578, + "logps/weighted_chosen": -0.48672789335250854, + "logps/weighted_rejected": -0.578961193561554, + "loss": 0.6614, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -34.4287109375, + "rewards/margins": 3.7144532203674316, + "rewards/rejected": -38.127342224121094, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.28246229887008667, + "rewards/weighted_margins": 0.23281364142894745, + "rewards/weighted_rejected": -0.5149310827255249, + "step": 280 + }, + { + "epoch": 0.1549869730776939, + "grad_norm": 17.110448837280273, + "learning_rate": 9.9115061539547e-07, + "logits/chosen": -0.20588979125022888, + "logits/rejected": -0.18258285522460938, + "logps/chosen": -141.0203094482422, + "logps/rejected": -142.50625610351562, + "logps/weighted_chosen": -0.4897003173828125, + "logps/weighted_rejected": -0.5554351806640625, + "loss": 0.7051, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -38.395896911621094, + "rewards/margins": 3.002734422683716, + "rewards/rejected": -41.394920349121094, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": -0.3098343014717102, + "rewards/weighted_margins": 0.1397857666015625, + "rewards/weighted_rejected": -0.4498863220214844, + "step": 290 + }, + { + "epoch": 0.16033135145968336, + "grad_norm": 21.960878372192383, + "learning_rate": 9.893180859470818e-07, + "logits/chosen": -0.1905662566423416, + "logits/rejected": -0.16956177353858948, + "logps/chosen": -131.609375, + "logps/rejected": -133.328125, + "logps/weighted_chosen": -0.5007995367050171, + "logps/weighted_rejected": -0.5999816656112671, + "loss": 0.6676, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -38.5810546875, + "rewards/margins": 5.082226753234863, + "rewards/rejected": -43.662498474121094, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.27146607637405396, + "rewards/weighted_margins": 0.2570602297782898, + "rewards/weighted_rejected": -0.5287536382675171, + "step": 300 + }, + { + "epoch": 0.16567572984167278, + "grad_norm": 45.954952239990234, + "learning_rate": 9.873152644996424e-07, + "logits/chosen": -0.23566055297851562, + "logits/rejected": -0.23574523627758026, + "logps/chosen": -134.5734405517578, + "logps/rejected": -136.5500030517578, + "logps/weighted_chosen": -0.565338134765625, + "logps/weighted_rejected": -0.620849609375, + "loss": 0.7314, + "rewards/accuracies": 0.59375, + "rewards/chosen": -44.349021911621094, + "rewards/margins": 6.559765815734863, + "rewards/rejected": -50.908592224121094, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": -0.44298553466796875, + "rewards/weighted_margins": 0.1432647705078125, + "rewards/weighted_rejected": -0.5857940912246704, + "step": 310 + }, + { + "epoch": 0.17102010822366223, + "grad_norm": 22.280086517333984, + "learning_rate": 9.85142848073103e-07, + "logits/chosen": -0.2385093718767166, + "logits/rejected": -0.21721191704273224, + "logps/chosen": -138.27499389648438, + "logps/rejected": -138.13516235351562, + "logps/weighted_chosen": -0.5242675542831421, + "logps/weighted_rejected": -0.591705322265625, + "loss": 0.6903, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -48.820899963378906, + "rewards/margins": 1.562109351158142, + "rewards/rejected": -50.394920349121094, + "rewards/weighted_accuracies": 0.5718749761581421, + "rewards/weighted_chosen": -0.3802032470703125, + "rewards/weighted_margins": 0.14343567192554474, + "rewards/weighted_rejected": -0.523608386516571, + "step": 320 + }, + { + "epoch": 0.17636448660565168, + "grad_norm": 19.125673294067383, + "learning_rate": 9.828015927096914e-07, + "logits/chosen": -0.2693939208984375, + "logits/rejected": -0.23780974745750427, + "logps/chosen": -148.56875610351562, + "logps/rejected": -147.24453735351562, + "logps/weighted_chosen": -0.521636962890625, + "logps/weighted_rejected": -0.5547729730606079, + "loss": 0.6703, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -50.613868713378906, + "rewards/margins": 3.5126953125, + "rewards/rejected": -54.113670349121094, + "rewards/weighted_accuracies": 0.550000011920929, + "rewards/weighted_chosen": -0.321145623922348, + "rewards/weighted_margins": 0.18524780869483948, + "rewards/weighted_rejected": -0.5064395666122437, + "step": 330 + }, + { + "epoch": 0.18170886498764113, + "grad_norm": 14.343570709228516, + "learning_rate": 9.802923132107968e-07, + "logits/chosen": -0.25108033418655396, + "logits/rejected": -0.2313240021467209, + "logps/chosen": -149.78671264648438, + "logps/rejected": -153.48046875, + "logps/weighted_chosen": -0.562329113483429, + "logps/weighted_rejected": -0.583233654499054, + "loss": 0.741, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -58.06855392456055, + "rewards/margins": 4.334374904632568, + "rewards/rejected": -62.40312576293945, + "rewards/weighted_accuracies": 0.512499988079071, + "rewards/weighted_chosen": -0.47095948457717896, + "rewards/weighted_margins": 0.02723388746380806, + "rewards/weighted_rejected": -0.498382568359375, + "step": 340 + }, + { + "epoch": 0.18705324336963056, + "grad_norm": 66.20745849609375, + "learning_rate": 9.776158828534024e-07, + "logits/chosen": -0.2837265133857727, + "logits/rejected": -0.2537124752998352, + "logps/chosen": -149.5359344482422, + "logps/rejected": -194.4765625, + "logps/weighted_chosen": -0.540771484375, + "logps/weighted_rejected": -0.589801013469696, + "loss": 0.6779, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -59.75273513793945, + "rewards/margins": 7.256640434265137, + "rewards/rejected": -66.99531555175781, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.4245468080043793, + "rewards/weighted_margins": 0.146717831492424, + "rewards/weighted_rejected": -0.571148693561554, + "step": 350 + }, + { + "epoch": 0.19239762175162, + "grad_norm": 13.690327644348145, + "learning_rate": 9.747732330861695e-07, + "logits/chosen": -0.18397827446460724, + "logits/rejected": -0.15533828735351562, + "logps/chosen": -143.4562530517578, + "logps/rejected": -145.44686889648438, + "logps/weighted_chosen": -0.5499817132949829, + "logps/weighted_rejected": -0.6820312738418579, + "loss": 0.6644, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -63.58203125, + "rewards/margins": 7.447851657867432, + "rewards/rejected": -71.0308609008789, + "rewards/weighted_accuracies": 0.6031249761581421, + "rewards/weighted_chosen": -0.43936461210250854, + "rewards/weighted_margins": 0.3239502012729645, + "rewards/weighted_rejected": -0.7634918093681335, + "step": 360 + }, + { + "epoch": 0.19774200013360946, + "grad_norm": 20.364688873291016, + "learning_rate": 9.717653532052742e-07, + "logits/chosen": -0.16991272568702698, + "logits/rejected": -0.16076354682445526, + "logps/chosen": -137.11172485351562, + "logps/rejected": -146.09375, + "logps/weighted_chosen": -0.609234631061554, + "logps/weighted_rejected": -0.658831775188446, + "loss": 0.7108, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -63.02363204956055, + "rewards/margins": 9.1494140625, + "rewards/rejected": -72.1488265991211, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.5135604739189148, + "rewards/weighted_margins": 0.1833236664533615, + "rewards/weighted_rejected": -0.6966766119003296, + "step": 370 + }, + { + "epoch": 0.2030863785155989, + "grad_norm": 15.459892272949219, + "learning_rate": 9.685932900101146e-07, + "logits/chosen": -0.17396697402000427, + "logits/rejected": -0.1525276154279709, + "logps/chosen": -143.62655639648438, + "logps/rejected": -147.03515625, + "logps/weighted_chosen": -0.584460437297821, + "logps/weighted_rejected": -0.642120361328125, + "loss": 0.7159, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -68.6537094116211, + "rewards/margins": 5.928124904632568, + "rewards/rejected": -74.57109069824219, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.528277575969696, + "rewards/weighted_margins": 0.11352996528148651, + "rewards/weighted_rejected": -0.6417190432548523, + "step": 380 + }, + { + "epoch": 0.20843075689758836, + "grad_norm": 25.710723876953125, + "learning_rate": 9.652581474390043e-07, + "logits/chosen": -0.17167052626609802, + "logits/rejected": -0.14335784316062927, + "logps/chosen": -147.3156280517578, + "logps/rejected": -151.1484375, + "logps/weighted_chosen": -0.5953735113143921, + "logps/weighted_rejected": -0.649151623249054, + "loss": 0.7014, + "rewards/accuracies": 0.59375, + "rewards/chosen": -70.93769836425781, + "rewards/margins": 9.441015243530273, + "rewards/rejected": -80.35664367675781, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.5777953863143921, + "rewards/weighted_margins": 0.10624237358570099, + "rewards/weighted_rejected": -0.6836212277412415, + "step": 390 + }, + { + "epoch": 0.21377513527957778, + "grad_norm": 42.97126007080078, + "learning_rate": 9.61761086184981e-07, + "logits/chosen": -0.192851260304451, + "logits/rejected": -0.16070251166820526, + "logps/chosen": -148.39688110351562, + "logps/rejected": -149.6046905517578, + "logps/weighted_chosen": -0.6315368413925171, + "logps/weighted_rejected": -0.7087768316268921, + "loss": 0.7113, + "rewards/accuracies": 0.565625011920929, + "rewards/chosen": -73.91288757324219, + "rewards/margins": 6.164453029632568, + "rewards/rejected": -80.05976867675781, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.6400848627090454, + "rewards/weighted_margins": 0.11194305121898651, + "rewards/weighted_rejected": -0.752105712890625, + "step": 400 + }, + { + "epoch": 0.21911951366156723, + "grad_norm": 17.99481773376465, + "learning_rate": 9.581033232918629e-07, + "logits/chosen": -0.14135894179344177, + "logits/rejected": -0.11229457706212997, + "logps/chosen": -145.88827514648438, + "logps/rejected": -149.74063110351562, + "logps/weighted_chosen": -0.6018310785293579, + "logps/weighted_rejected": -0.7620849609375, + "loss": 0.6764, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -74.9345703125, + "rewards/margins": 8.443944931030273, + "rewards/rejected": -83.36601257324219, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.578625500202179, + "rewards/weighted_margins": 0.35536497831344604, + "rewards/weighted_rejected": -0.9342681765556335, + "step": 410 + }, + { + "epoch": 0.22446389204355668, + "grad_norm": 19.236024856567383, + "learning_rate": 9.542861317306952e-07, + "logits/chosen": -0.1445457488298416, + "logits/rejected": -0.1324237883090973, + "logps/chosen": -148.76718139648438, + "logps/rejected": -150.97811889648438, + "logps/weighted_chosen": -0.5991576910018921, + "logps/weighted_rejected": -0.674072265625, + "loss": 0.6735, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -80.28242492675781, + "rewards/margins": 4.427929878234863, + "rewards/rejected": -84.7109375, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.543652355670929, + "rewards/weighted_margins": 0.184315487742424, + "rewards/weighted_rejected": -0.727569580078125, + "step": 420 + }, + { + "epoch": 0.22980827042554614, + "grad_norm": 14.300553321838379, + "learning_rate": 9.503108399567308e-07, + "logits/chosen": -0.14830398559570312, + "logits/rejected": -0.09484557807445526, + "logps/chosen": -162.40625, + "logps/rejected": -167.7195281982422, + "logps/weighted_chosen": -0.5840820074081421, + "logps/weighted_rejected": -0.647021472454071, + "loss": 0.672, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -89.32051086425781, + "rewards/margins": 8.4365234375, + "rewards/rejected": -97.79023742675781, + "rewards/weighted_accuracies": 0.596875011920929, + "rewards/weighted_chosen": -0.545318603515625, + "rewards/weighted_margins": 0.13585510849952698, + "rewards/weighted_rejected": -0.681243896484375, + "step": 430 + }, + { + "epoch": 0.23515264880753559, + "grad_norm": 21.087541580200195, + "learning_rate": 9.461788314471034e-07, + "logits/chosen": -0.10236664116382599, + "logits/rejected": -0.05696678161621094, + "logps/chosen": -155.13827514648438, + "logps/rejected": -159.828125, + "logps/weighted_chosen": -0.6568237543106079, + "logps/weighted_rejected": -0.7309814691543579, + "loss": 0.6898, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -92.36328125, + "rewards/margins": 7.942968845367432, + "rewards/rejected": -100.3109359741211, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.6760101318359375, + "rewards/weighted_margins": 0.16096191108226776, + "rewards/weighted_rejected": -0.836883544921875, + "step": 440 + }, + { + "epoch": 0.240497027189525, + "grad_norm": 19.655607223510742, + "learning_rate": 9.418915442193509e-07, + "logits/chosen": -0.10150299221277237, + "logits/rejected": -0.05734825134277344, + "logps/chosen": -152.95858764648438, + "logps/rejected": -165.30078125, + "logps/weighted_chosen": -0.6426635980606079, + "logps/weighted_rejected": -0.69622802734375, + "loss": 0.7073, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -89.14433288574219, + "rewards/margins": 14.668359756469727, + "rewards/rejected": -103.8412094116211, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.694122314453125, + "rewards/weighted_margins": 0.11862488090991974, + "rewards/weighted_rejected": -0.8125030398368835, + "step": 450 + }, + { + "epoch": 0.24584140557151446, + "grad_norm": 35.64816665649414, + "learning_rate": 9.374504703309579e-07, + "logits/chosen": -0.1730697602033615, + "logits/rejected": -0.1376514434814453, + "logps/chosen": -149.97811889648438, + "logps/rejected": -152.8171844482422, + "logps/weighted_chosen": -0.638531506061554, + "logps/weighted_rejected": -0.77386474609375, + "loss": 0.6563, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -86.3251953125, + "rewards/margins": 8.056055068969727, + "rewards/rejected": -94.4281234741211, + "rewards/weighted_accuracies": 0.5625, + "rewards/weighted_chosen": -0.6749175786972046, + "rewards/weighted_margins": 0.30900877714157104, + "rewards/weighted_rejected": -0.983563244342804, + "step": 460 + }, + { + "epoch": 0.2511857839535039, + "grad_norm": 20.12373161315918, + "learning_rate": 9.328571553600915e-07, + "logits/chosen": -0.14519290626049042, + "logits/rejected": -0.11081619560718536, + "logps/chosen": -154.73828125, + "logps/rejected": -158.703125, + "logps/weighted_chosen": -0.656390368938446, + "logps/weighted_rejected": -0.7307983636856079, + "loss": 0.7214, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -92.1263656616211, + "rewards/margins": 7.731054782867432, + "rewards/rejected": -99.86836242675781, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.6984283328056335, + "rewards/weighted_margins": 0.14166870713233948, + "rewards/weighted_rejected": -0.8403259515762329, + "step": 470 + }, + { + "epoch": 0.25653016233549336, + "grad_norm": 19.16153907775879, + "learning_rate": 9.281131978677106e-07, + "logits/chosen": -0.1819503754377365, + "logits/rejected": -0.14701232314109802, + "logps/chosen": -160.57968139648438, + "logps/rejected": -164.2414093017578, + "logps/weighted_chosen": -0.609344482421875, + "logps/weighted_rejected": -0.750903308391571, + "loss": 0.6525, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -93.19140625, + "rewards/margins": 9.908788681030273, + "rewards/rejected": -103.0589828491211, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.6482604742050171, + "rewards/weighted_margins": 0.2539626955986023, + "rewards/weighted_rejected": -0.9027160406112671, + "step": 480 + }, + { + "epoch": 0.2618745407174828, + "grad_norm": 47.42090606689453, + "learning_rate": 9.232202488412361e-07, + "logits/chosen": -0.18560639023780823, + "logits/rejected": -0.152149960398674, + "logps/chosen": -150.15078735351562, + "logps/rejected": -156.22109985351562, + "logps/weighted_chosen": -0.6249145269393921, + "logps/weighted_rejected": -0.755505383014679, + "loss": 0.6793, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -89.3853530883789, + "rewards/margins": 8.602734565734863, + "rewards/rejected": -97.9830093383789, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.6580413579940796, + "rewards/weighted_margins": 0.2508483827114105, + "rewards/weighted_rejected": -0.909197986125946, + "step": 490 + }, + { + "epoch": 0.26721891909947226, + "grad_norm": 21.92582893371582, + "learning_rate": 9.181800111199766e-07, + "logits/chosen": -0.2139892578125, + "logits/rejected": -0.185211181640625, + "logps/chosen": -153.05624389648438, + "logps/rejected": -155.6671905517578, + "logps/weighted_chosen": -0.660186767578125, + "logps/weighted_rejected": -0.740765392780304, + "loss": 0.6631, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -91.49101257324219, + "rewards/margins": 12.721094131469727, + "rewards/rejected": -104.1937484741211, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.679455578327179, + "rewards/weighted_margins": 0.2384185791015625, + "rewards/weighted_rejected": -0.9178100824356079, + "step": 500 + }, + { + "epoch": 0.26721891909947226, + "eval_logits/chosen": -0.27163267135620117, + "eval_logits/rejected": -0.24348750710487366, + "eval_logps/chosen": -159.72760009765625, + "eval_logps/rejected": -165.05091857910156, + "eval_logps/weighted_chosen": -0.6352449059486389, + "eval_logps/weighted_rejected": -0.7405111789703369, + "eval_loss": 0.6863088607788086, + "eval_rewards/accuracies": 0.5992871522903442, + "eval_rewards/chosen": -97.6285629272461, + "eval_rewards/margins": 10.883528709411621, + "eval_rewards/rejected": -108.50712585449219, + "eval_rewards/weighted_accuracies": 0.5972505211830139, + "eval_rewards/weighted_chosen": -0.6794247031211853, + "eval_rewards/weighted_margins": 0.20837070047855377, + "eval_rewards/weighted_rejected": -0.8877954483032227, + "eval_runtime": 1137.126, + "eval_samples_per_second": 1.725, + "eval_steps_per_second": 0.432, + "step": 500 + }, + { + "epoch": 0.2725632974814617, + "grad_norm": 13.315505027770996, + "learning_rate": 9.129942388025066e-07, + "logits/chosen": -0.16319426894187927, + "logits/rejected": -0.12026214599609375, + "logps/chosen": -149.140625, + "logps/rejected": -157.3195343017578, + "logps/weighted_chosen": -0.686871349811554, + "logps/weighted_rejected": -0.76031494140625, + "loss": 0.6974, + "rewards/accuracies": 0.609375, + "rewards/chosen": -96.3252944946289, + "rewards/margins": 11.934374809265137, + "rewards/rejected": -108.2183609008789, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.7806671261787415, + "rewards/weighted_margins": 0.146159365773201, + "rewards/weighted_rejected": -0.926666259765625, + "step": 510 + }, + { + "epoch": 0.2779076758634511, + "grad_norm": 32.33969497680664, + "learning_rate": 9.076647366362082e-07, + "logits/chosen": -0.12646484375, + "logits/rejected": -0.08115959167480469, + "logps/chosen": -154.6750030517578, + "logps/rejected": -163.578125, + "logps/weighted_chosen": -0.655651867389679, + "logps/weighted_rejected": -0.7443176507949829, + "loss": 0.7056, + "rewards/accuracies": 0.625, + "rewards/chosen": -102.30839538574219, + "rewards/margins": 14.767578125, + "rewards/rejected": -117.12773132324219, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": -0.716845691204071, + "rewards/weighted_margins": 0.19047698378562927, + "rewards/weighted_rejected": -0.9073394536972046, + "step": 520 + }, + { + "epoch": 0.2832520542454406, + "grad_norm": 37.720340728759766, + "learning_rate": 9.021933593891841e-07, + "logits/chosen": -0.08947906643152237, + "logits/rejected": -0.060358427464962006, + "logps/chosen": -156.99063110351562, + "logps/rejected": -160.6953125, + "logps/weighted_chosen": -0.643658459186554, + "logps/weighted_rejected": -0.7408691644668579, + "loss": 0.6914, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -111.7953109741211, + "rewards/margins": 9.351758003234863, + "rewards/rejected": -121.13594055175781, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.7329131960868835, + "rewards/weighted_margins": 0.17979125678539276, + "rewards/weighted_rejected": -0.9128783941268921, + "step": 530 + }, + { + "epoch": 0.28859643262743, + "grad_norm": 14.224921226501465, + "learning_rate": 8.965820112047629e-07, + "logits/chosen": -0.09932632744312286, + "logits/rejected": -0.08042526245117188, + "logps/chosen": -167.265625, + "logps/rejected": -178.6164093017578, + "logps/weighted_chosen": -0.663098156452179, + "logps/weighted_rejected": -0.736401379108429, + "loss": 0.6642, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -116.47245788574219, + "rewards/margins": 15.925390243530273, + "rewards/rejected": -132.3351593017578, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -0.7144225835800171, + "rewards/weighted_margins": 0.17554016411304474, + "rewards/weighted_rejected": -0.889788806438446, + "step": 540 + }, + { + "epoch": 0.2939408110094195, + "grad_norm": 25.44819450378418, + "learning_rate": 8.908326449388213e-07, + "logits/chosen": -0.09075965732336044, + "logits/rejected": -0.0493927001953125, + "logps/chosen": -160.14608764648438, + "logps/rejected": -166.7140655517578, + "logps/weighted_chosen": -0.66522216796875, + "logps/weighted_rejected": -0.734057605266571, + "loss": 0.6806, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -115.48554992675781, + "rewards/margins": 15.63916015625, + "rewards/rejected": -131.1242218017578, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.7069762945175171, + "rewards/weighted_margins": 0.17700500786304474, + "rewards/weighted_rejected": -0.8841186761856079, + "step": 550 + }, + { + "epoch": 0.2992851893914089, + "grad_norm": 16.921142578125, + "learning_rate": 8.849472614801527e-07, + "logits/chosen": -0.09975433349609375, + "logits/rejected": -0.07463760673999786, + "logps/chosen": -165.10311889648438, + "logps/rejected": -171.703125, + "logps/weighted_chosen": -0.670971691608429, + "logps/weighted_rejected": -0.7793823480606079, + "loss": 0.6625, + "rewards/accuracies": 0.609375, + "rewards/chosen": -121.6898422241211, + "rewards/margins": 17.183399200439453, + "rewards/rejected": -138.83261108398438, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.7858215570449829, + "rewards/weighted_margins": 0.20911255478858948, + "rewards/weighted_rejected": -0.994738757610321, + "step": 560 + }, + { + "epoch": 0.30462956777339834, + "grad_norm": 25.59569549560547, + "learning_rate": 8.789279090541208e-07, + "logits/chosen": -0.12498245388269424, + "logits/rejected": -0.07875537872314453, + "logps/chosen": -168.8429718017578, + "logps/rejected": -182.640625, + "logps/weighted_chosen": -0.729663074016571, + "logps/weighted_rejected": -0.8487914800643921, + "loss": 0.7176, + "rewards/accuracies": 0.625, + "rewards/chosen": -128.54061889648438, + "rewards/margins": 19.065624237060547, + "rewards/rejected": -147.580078125, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.860760509967804, + "rewards/weighted_margins": 0.2542465329170227, + "rewards/weighted_rejected": -1.1147247552871704, + "step": 570 + }, + { + "epoch": 0.3099739461553878, + "grad_norm": 29.514760971069336, + "learning_rate": 8.72776682509837e-07, + "logits/chosen": -0.06974849849939346, + "logits/rejected": -0.010189438238739967, + "logps/chosen": -165.45547485351562, + "logps/rejected": -175.70938110351562, + "logps/weighted_chosen": -0.70281982421875, + "logps/weighted_rejected": -0.855236828327179, + "loss": 0.6413, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -129.60879516601562, + "rewards/margins": 22.657421112060547, + "rewards/rejected": -152.33944702148438, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.8432861566543579, + "rewards/weighted_margins": 0.32232969999313354, + "rewards/weighted_rejected": -1.1654784679412842, + "step": 580 + }, + { + "epoch": 0.31531832453737724, + "grad_norm": 30.710643768310547, + "learning_rate": 8.664957225911138e-07, + "logits/chosen": -0.06455497443675995, + "logits/rejected": -0.048407744616270065, + "logps/chosen": -178.4812469482422, + "logps/rejected": -201.52969360351562, + "logps/weighted_chosen": -0.7231200933456421, + "logps/weighted_rejected": -0.8162475824356079, + "loss": 0.6554, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -144.26171875, + "rewards/margins": 14.5029296875, + "rewards/rejected": -158.80703735351562, + "rewards/weighted_accuracies": 0.6187499761581421, + "rewards/weighted_chosen": -0.8793884515762329, + "rewards/weighted_margins": 0.18951721489429474, + "rewards/weighted_rejected": -1.0688660144805908, + "step": 590 + }, + { + "epoch": 0.3206627029193667, + "grad_norm": 12.866159439086914, + "learning_rate": 8.600872151914451e-07, + "logits/chosen": -0.011664772406220436, + "logits/rejected": 0.0007385254139080644, + "logps/chosen": -169.0890655517578, + "logps/rejected": -180.8078155517578, + "logps/weighted_chosen": -0.693652331829071, + "logps/weighted_rejected": -0.879650890827179, + "loss": 0.6483, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -140.900390625, + "rewards/margins": 15.367383003234863, + "rewards/rejected": -156.2814483642578, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.8526153564453125, + "rewards/weighted_margins": 0.34896546602249146, + "rewards/weighted_rejected": -1.201562523841858, + "step": 600 + }, + { + "epoch": 0.32600708130135614, + "grad_norm": 19.085651397705078, + "learning_rate": 8.535533905932737e-07, + "logits/chosen": 0.02297821082174778, + "logits/rejected": 0.06475830078125, + "logps/chosen": -190.3406219482422, + "logps/rejected": -203.453125, + "logps/weighted_chosen": -0.7635498046875, + "logps/weighted_rejected": -0.876513659954071, + "loss": 0.6785, + "rewards/accuracies": 0.59375, + "rewards/chosen": -164.71640014648438, + "rewards/margins": 23.290233612060547, + "rewards/rejected": -187.9656219482422, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.9807373285293579, + "rewards/weighted_margins": 0.23782959580421448, + "rewards/weighted_rejected": -1.21783447265625, + "step": 610 + }, + { + "epoch": 0.33135145968334556, + "grad_norm": 32.081443786621094, + "learning_rate": 8.468965226918105e-07, + "logits/chosen": -0.011585617437958717, + "logits/rejected": 0.016324615105986595, + "logps/chosen": -189.3132781982422, + "logps/rejected": -203.7937469482422, + "logps/weighted_chosen": -0.7615417242050171, + "logps/weighted_rejected": -0.82696533203125, + "loss": 0.7265, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -167.82070922851562, + "rewards/margins": 21.9140625, + "rewards/rejected": -189.74276733398438, + "rewards/weighted_accuracies": 0.581250011920929, + "rewards/weighted_chosen": -1.022314429283142, + "rewards/weighted_margins": 0.10553588718175888, + "rewards/weighted_rejected": -1.127905249595642, + "step": 620 + }, + { + "epoch": 0.33669583806533504, + "grad_norm": 21.689685821533203, + "learning_rate": 8.40118928203676e-07, + "logits/chosen": 0.06978149712085724, + "logits/rejected": 0.07966003566980362, + "logps/chosen": -171.18905639648438, + "logps/rejected": -187.87344360351562, + "logps/weighted_chosen": -0.8035522699356079, + "logps/weighted_rejected": -0.938281238079071, + "loss": 0.6889, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -159.8556671142578, + "rewards/margins": 22.441015243530273, + "rewards/rejected": -182.2810516357422, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -1.125823974609375, + "rewards/weighted_margins": 0.22769927978515625, + "rewards/weighted_rejected": -1.353857398033142, + "step": 630 + }, + { + "epoch": 0.34204021644732446, + "grad_norm": 15.847578048706055, + "learning_rate": 8.332229658606382e-07, + "logits/chosen": 0.009455871768295765, + "logits/rejected": 0.03966980054974556, + "logps/chosen": -200.93984985351562, + "logps/rejected": -211.3125, + "logps/weighted_chosen": -0.783465564250946, + "logps/weighted_rejected": -0.895214855670929, + "loss": 0.6535, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -177.21133422851562, + "rewards/margins": 26.989063262939453, + "rewards/rejected": -204.1365203857422, + "rewards/weighted_accuracies": 0.637499988079071, + "rewards/weighted_chosen": -1.0060241222381592, + "rewards/weighted_margins": 0.2687225341796875, + "rewards/weighted_rejected": -1.274450659751892, + "step": 640 + }, + { + "epoch": 0.34738459482931394, + "grad_norm": 22.713172912597656, + "learning_rate": 8.262110355887302e-07, + "logits/chosen": 0.028325652703642845, + "logits/rejected": 0.06662559509277344, + "logps/chosen": -189.8406219482422, + "logps/rejected": -196.30859375, + "logps/weighted_chosen": -0.7696533203125, + "logps/weighted_rejected": -0.8599487543106079, + "loss": 0.7008, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -176.921875, + "rewards/margins": 18.607227325439453, + "rewards/rejected": -195.5730438232422, + "rewards/weighted_accuracies": 0.6000000238418579, + "rewards/weighted_chosen": -1.030175805091858, + "rewards/weighted_margins": 0.17038574814796448, + "rewards/weighted_rejected": -1.2006652355194092, + "step": 650 + }, + { + "epoch": 0.35272897321130336, + "grad_norm": 21.770971298217773, + "learning_rate": 8.190855776730293e-07, + "logits/chosen": -0.02209014818072319, + "logits/rejected": 0.02955322340130806, + "logps/chosen": -191.64297485351562, + "logps/rejected": -206.6015625, + "logps/weighted_chosen": -0.783374011516571, + "logps/weighted_rejected": -0.9095824956893921, + "loss": 0.6596, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -177.8122100830078, + "rewards/margins": 24.329492568969727, + "rewards/rejected": -202.0851593017578, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -1.0215880870819092, + "rewards/weighted_margins": 0.25640565156936646, + "rewards/weighted_rejected": -1.2780640125274658, + "step": 660 + }, + { + "epoch": 0.3580733515932928, + "grad_norm": 27.83735466003418, + "learning_rate": 8.118490719083917e-07, + "logits/chosen": -0.04197654873132706, + "logits/rejected": -0.011433410458266735, + "logps/chosen": -217.96249389648438, + "logps/rejected": -221.2265625, + "logps/weighted_chosen": -0.8158324956893921, + "logps/weighted_rejected": -0.9188903570175171, + "loss": 0.6605, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -213.95703125, + "rewards/margins": 13.51513671875, + "rewards/rejected": -227.47109985351562, + "rewards/weighted_accuracies": 0.574999988079071, + "rewards/weighted_chosen": -1.0736420154571533, + "rewards/weighted_margins": 0.30453795194625854, + "rewards/weighted_rejected": -1.37799072265625, + "step": 670 + }, + { + "epoch": 0.36341772997528227, + "grad_norm": 63.59722137451172, + "learning_rate": 8.045040367364368e-07, + "logits/chosen": 0.06599731743335724, + "logits/rejected": 0.10958633571863174, + "logps/chosen": -183.30899047851562, + "logps/rejected": -192.22030639648438, + "logps/weighted_chosen": -0.8281921148300171, + "logps/weighted_rejected": -0.978344738483429, + "loss": 0.6439, + "rewards/accuracies": 0.625, + "rewards/chosen": -185.7732391357422, + "rewards/margins": 25.359766006469727, + "rewards/rejected": -211.1357421875, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -1.1530334949493408, + "rewards/weighted_margins": 0.34730225801467896, + "rewards/weighted_rejected": -1.500341773033142, + "step": 680 + }, + { + "epoch": 0.3687621083572717, + "grad_norm": 11.727298736572266, + "learning_rate": 7.970530283690819e-07, + "logits/chosen": 0.10012397915124893, + "logits/rejected": 0.14632339775562286, + "logps/chosen": -193.36328125, + "logps/rejected": -209.76406860351562, + "logps/weighted_chosen": -0.856884777545929, + "logps/weighted_rejected": -0.98321533203125, + "loss": 0.6659, + "rewards/accuracies": 0.640625, + "rewards/chosen": -201.2302703857422, + "rewards/margins": 32.98652267456055, + "rewards/rejected": -234.34707641601562, + "rewards/weighted_accuracies": 0.6343749761581421, + "rewards/weighted_chosen": -1.2462646961212158, + "rewards/weighted_margins": 0.27206724882125854, + "rewards/weighted_rejected": -1.5183227062225342, + "step": 690 + }, + { + "epoch": 0.3741064867392611, + "grad_norm": 16.15506362915039, + "learning_rate": 7.89498639898931e-07, + "logits/chosen": 0.17198029160499573, + "logits/rejected": 0.22177276015281677, + "logps/chosen": -201.234375, + "logps/rejected": -211.76718139648438, + "logps/weighted_chosen": -0.888415515422821, + "logps/weighted_rejected": -1.013879418373108, + "loss": 0.6659, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -227.2578125, + "rewards/margins": 29.7451171875, + "rewards/rejected": -256.93048095703125, + "rewards/weighted_accuracies": 0.581250011920929, + "rewards/weighted_chosen": -1.3159301280975342, + "rewards/weighted_margins": 0.291015625, + "rewards/weighted_rejected": -1.6072266101837158, + "step": 700 + }, + { + "epoch": 0.3794508651212506, + "grad_norm": 20.944732666015625, + "learning_rate": 7.818435003968305e-07, + "logits/chosen": 0.11227264255285263, + "logits/rejected": 0.1453804075717926, + "logps/chosen": -220.72891235351562, + "logps/rejected": -239.18594360351562, + "logps/weighted_chosen": -0.891796886920929, + "logps/weighted_rejected": -1.0748779773712158, + "loss": 0.6297, + "rewards/accuracies": 0.625, + "rewards/chosen": -247.6140594482422, + "rewards/margins": 40.565818786621094, + "rewards/rejected": -288.025390625, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -1.3091919422149658, + "rewards/weighted_margins": 0.36735838651657104, + "rewards/weighted_rejected": -1.67669677734375, + "step": 710 + }, + { + "epoch": 0.38479524350324, + "grad_norm": 16.061363220214844, + "learning_rate": 7.740902739969008e-07, + "logits/chosen": 0.08527755737304688, + "logits/rejected": 0.10508499294519424, + "logps/chosen": -219.64688110351562, + "logps/rejected": -228.9499969482422, + "logps/weighted_chosen": -0.8562988042831421, + "logps/weighted_rejected": -0.9676269292831421, + "loss": 0.6551, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -247.7687530517578, + "rewards/margins": 23.225780487060547, + "rewards/rejected": -270.9765625, + "rewards/weighted_accuracies": 0.5874999761581421, + "rewards/weighted_chosen": -1.203149437904358, + "rewards/weighted_margins": 0.285797119140625, + "rewards/weighted_rejected": -1.489160180091858, + "step": 720 + }, + { + "epoch": 0.3901396218852295, + "grad_norm": 31.3476505279541, + "learning_rate": 7.662416589693695e-07, + "logits/chosen": 0.127583310008049, + "logits/rejected": 0.17155151069164276, + "logps/chosen": -201.3367156982422, + "logps/rejected": -212.42422485351562, + "logps/weighted_chosen": -0.8604370355606079, + "logps/weighted_rejected": -1.0564453601837158, + "loss": 0.6553, + "rewards/accuracies": 0.671875, + "rewards/chosen": -219.3408203125, + "rewards/margins": 29.0703125, + "rewards/rejected": -248.494140625, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -1.220678687095642, + "rewards/weighted_margins": 0.39063721895217896, + "rewards/weighted_rejected": -1.611718773841858, + "step": 730 + }, + { + "epoch": 0.3954840002672189, + "grad_norm": 12.7977294921875, + "learning_rate": 7.583003867815192e-07, + "logits/chosen": 0.09622497856616974, + "logits/rejected": 0.12804412841796875, + "logps/chosen": -228.9718780517578, + "logps/rejected": -242.92031860351562, + "logps/weighted_chosen": -0.877368152141571, + "logps/weighted_rejected": -0.99322509765625, + "loss": 0.6549, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -245.8718719482422, + "rewards/margins": 29.608789443969727, + "rewards/rejected": -275.35076904296875, + "rewards/weighted_accuracies": 0.621874988079071, + "rewards/weighted_chosen": -1.26458740234375, + "rewards/weighted_margins": 0.26112061738967896, + "rewards/weighted_rejected": -1.525964379310608, + "step": 740 + }, + { + "epoch": 0.40082837864920834, + "grad_norm": 31.35419464111328, + "learning_rate": 7.502692211470869e-07, + "logits/chosen": 0.10113067924976349, + "logits/rejected": 0.14377517998218536, + "logps/chosen": -222.765625, + "logps/rejected": -238.73983764648438, + "logps/weighted_chosen": -0.8651977777481079, + "logps/weighted_rejected": -0.9940551519393921, + "loss": 0.6788, + "rewards/accuracies": 0.609375, + "rewards/chosen": -250.9093780517578, + "rewards/margins": 38.716407775878906, + "rewards/rejected": -289.56915283203125, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -1.2682373523712158, + "rewards/weighted_margins": 0.230926513671875, + "rewards/weighted_rejected": -1.499151587486267, + "step": 750 + }, + { + "epoch": 0.4061727570311978, + "grad_norm": 17.88278579711914, + "learning_rate": 7.421509570644387e-07, + "logits/chosen": 0.13302917778491974, + "logits/rejected": 0.17120666801929474, + "logps/chosen": -220.7859344482422, + "logps/rejected": -228.9296875, + "logps/weighted_chosen": -0.8490234613418579, + "logps/weighted_rejected": -1.0133788585662842, + "loss": 0.6282, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -258.48907470703125, + "rewards/margins": 20.644336700439453, + "rewards/rejected": -279.12579345703125, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -1.235162377357483, + "rewards/weighted_margins": 0.39671021699905396, + "rewards/weighted_rejected": -1.631689429283142, + "step": 760 + }, + { + "epoch": 0.41151713541318724, + "grad_norm": 19.770469665527344, + "learning_rate": 7.339484198438566e-07, + "logits/chosen": 0.2122901976108551, + "logits/rejected": 0.268341064453125, + "logps/chosen": -206.8859405517578, + "logps/rejected": -226.79452514648438, + "logps/weighted_chosen": -0.9818481206893921, + "logps/weighted_rejected": -1.1372802257537842, + "loss": 0.6596, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -249.8464813232422, + "rewards/margins": 49.36640548706055, + "rewards/rejected": -299.32110595703125, + "rewards/weighted_accuracies": 0.640625, + "rewards/weighted_chosen": -1.4886353015899658, + "rewards/weighted_margins": 0.38934630155563354, + "rewards/weighted_rejected": -1.8777344226837158, + "step": 770 + }, + { + "epoch": 0.4168615137951767, + "grad_norm": 17.136322021484375, + "learning_rate": 7.256644641242773e-07, + "logits/chosen": 0.2901626527309418, + "logits/rejected": 0.3380989134311676, + "logps/chosen": -231.08633422851562, + "logps/rejected": -247.6374969482422, + "logps/weighted_chosen": -1.022985816001892, + "logps/weighted_rejected": -1.143164038658142, + "loss": 0.7062, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -293.5406188964844, + "rewards/margins": 38.568748474121094, + "rewards/rejected": -331.9984436035156, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -1.6519286632537842, + "rewards/weighted_margins": 0.23250122368335724, + "rewards/weighted_rejected": -1.8852417469024658, + "step": 780 + }, + { + "epoch": 0.42220589217716614, + "grad_norm": 16.78958511352539, + "learning_rate": 7.173019728798234e-07, + "logits/chosen": 0.22947922348976135, + "logits/rejected": 0.2762344479560852, + "logps/chosen": -225.95468139648438, + "logps/rejected": -246.8957061767578, + "logps/weighted_chosen": -0.9952758550643921, + "logps/weighted_rejected": -1.151769995689392, + "loss": 0.6317, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -281.17462158203125, + "rewards/margins": 49.26288986206055, + "rewards/rejected": -330.38671875, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -1.5531127452850342, + "rewards/weighted_margins": 0.3658691346645355, + "rewards/weighted_rejected": -1.918725609779358, + "step": 790 + }, + { + "epoch": 0.42755027055915557, + "grad_norm": 26.674007415771484, + "learning_rate": 7.088638564164738e-07, + "logits/chosen": 0.2732749879360199, + "logits/rejected": 0.32427138090133667, + "logps/chosen": -263.5464782714844, + "logps/rejected": -295.19061279296875, + "logps/weighted_chosen": -1.066674828529358, + "logps/weighted_rejected": -1.1934082508087158, + "loss": 0.6437, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -355.3199157714844, + "rewards/margins": 68.63945007324219, + "rewards/rejected": -424.07501220703125, + "rewards/weighted_accuracies": 0.6499999761581421, + "rewards/weighted_chosen": -1.735803246498108, + "rewards/weighted_margins": 0.3184448182582855, + "rewards/weighted_rejected": -2.0541014671325684, + "step": 800 + }, + { + "epoch": 0.43289464894114504, + "grad_norm": 53.29134750366211, + "learning_rate": 7.003530513592215e-07, + "logits/chosen": 0.30975571274757385, + "logits/rejected": 0.36716994643211365, + "logps/chosen": -259.92498779296875, + "logps/rejected": -295.8374938964844, + "logps/weighted_chosen": -1.0916259288787842, + "logps/weighted_rejected": -1.302832007408142, + "loss": 0.6407, + "rewards/accuracies": 0.65625, + "rewards/chosen": -362.6820373535156, + "rewards/margins": 78.9546890258789, + "rewards/rejected": -441.49298095703125, + "rewards/weighted_accuracies": 0.6656249761581421, + "rewards/weighted_chosen": -1.836395263671875, + "rewards/weighted_margins": 0.42828065156936646, + "rewards/weighted_rejected": -2.264721632003784, + "step": 810 + }, + { + "epoch": 0.43823902732313447, + "grad_norm": 21.21747589111328, + "learning_rate": 6.917725196300726e-07, + "logits/chosen": 0.24695205688476562, + "logits/rejected": 0.290322482585907, + "logps/chosen": -282.28790283203125, + "logps/rejected": -317.53045654296875, + "logps/weighted_chosen": -1.034826636314392, + "logps/weighted_rejected": -1.1845214366912842, + "loss": 0.6366, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -389.09100341796875, + "rewards/margins": 74.9951171875, + "rewards/rejected": -464.04376220703125, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -1.68133544921875, + "rewards/weighted_margins": 0.3324523866176605, + "rewards/weighted_rejected": -2.014007568359375, + "step": 820 + }, + { + "epoch": 0.44358340570512395, + "grad_norm": 25.014272689819336, + "learning_rate": 6.831252474172411e-07, + "logits/chosen": 0.2618546485900879, + "logits/rejected": 0.29917725920677185, + "logps/chosen": -255.4765625, + "logps/rejected": -285.0953063964844, + "logps/weighted_chosen": -1.060217261314392, + "logps/weighted_rejected": -1.23773193359375, + "loss": 0.6233, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -353.51092529296875, + "rewards/margins": 61.749610900878906, + "rewards/rejected": -415.51873779296875, + "rewards/weighted_accuracies": 0.628125011920929, + "rewards/weighted_chosen": -1.6349976062774658, + "rewards/weighted_margins": 0.495330810546875, + "rewards/weighted_rejected": -2.130786180496216, + "step": 830 + }, + { + "epoch": 0.44892778408711337, + "grad_norm": 16.74439239501953, + "learning_rate": 6.74414244135898e-07, + "logits/chosen": 0.240010067820549, + "logits/rejected": 0.31660157442092896, + "logps/chosen": -265.046875, + "logps/rejected": -296.85626220703125, + "logps/weighted_chosen": -1.003625512123108, + "logps/weighted_rejected": -1.165197730064392, + "loss": 0.6336, + "rewards/accuracies": 0.625, + "rewards/chosen": -371.21112060546875, + "rewards/margins": 74.8984375, + "rewards/rejected": -446.146484375, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -1.5787353515625, + "rewards/weighted_margins": 0.36824339628219604, + "rewards/weighted_rejected": -1.946313500404358, + "step": 840 + }, + { + "epoch": 0.4542721624691028, + "grad_norm": 20.773204803466797, + "learning_rate": 6.656425413808388e-07, + "logits/chosen": 0.2946815490722656, + "logits/rejected": 0.31943321228027344, + "logps/chosen": -284.6898498535156, + "logps/rejected": -300.85858154296875, + "logps/weighted_chosen": -1.0427734851837158, + "logps/weighted_rejected": -1.2700927257537842, + "loss": 0.6239, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -416.8919982910156, + "rewards/margins": 38.0087890625, + "rewards/rejected": -454.7835998535156, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -1.7086913585662842, + "rewards/weighted_margins": 0.4794067442417145, + "rewards/weighted_rejected": -2.187939405441284, + "step": 850 + }, + { + "epoch": 0.45961654085109227, + "grad_norm": 14.845000267028809, + "learning_rate": 6.568131918714294e-07, + "logits/chosen": 0.3231925964355469, + "logits/rejected": 0.36410826444625854, + "logps/chosen": -282.16876220703125, + "logps/rejected": -317.7523498535156, + "logps/weighted_chosen": -1.077661156654358, + "logps/weighted_rejected": -1.253027319908142, + "loss": 0.6359, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -413.7085876464844, + "rewards/margins": 79.2416000366211, + "rewards/rejected": -492.9193420410156, + "rewards/weighted_accuracies": 0.653124988079071, + "rewards/weighted_chosen": -1.803625464439392, + "rewards/weighted_margins": 0.37888795137405396, + "rewards/weighted_rejected": -2.1824707984924316, + "step": 860 + }, + { + "epoch": 0.4649609192330817, + "grad_norm": 23.40928077697754, + "learning_rate": 6.47929268389203e-07, + "logits/chosen": 0.48909300565719604, + "logits/rejected": 0.5615142583847046, + "logps/chosen": -364.40155029296875, + "logps/rejected": -441.33123779296875, + "logps/weighted_chosen": -1.275048851966858, + "logps/weighted_rejected": -1.454126000404358, + "loss": 0.6207, + "rewards/accuracies": 0.640625, + "rewards/chosen": -599.5953369140625, + "rewards/margins": 173.45858764648438, + "rewards/rejected": -773.1968994140625, + "rewards/weighted_accuracies": 0.65625, + "rewards/weighted_chosen": -2.249072313308716, + "rewards/weighted_margins": 0.4446777403354645, + "rewards/weighted_rejected": -2.6942381858825684, + "step": 870 + }, + { + "epoch": 0.47030529761507117, + "grad_norm": 18.79306411743164, + "learning_rate": 6.389938627084732e-07, + "logits/chosen": 0.577728271484375, + "logits/rejected": 0.6453384160995483, + "logps/chosen": -411.82342529296875, + "logps/rejected": -475.7250061035156, + "logps/weighted_chosen": -1.4719970226287842, + "logps/weighted_rejected": -1.6781737804412842, + "loss": 0.6194, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -739.5625, + "rewards/margins": 146.572265625, + "rewards/rejected": -886.0562744140625, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -2.7164063453674316, + "rewards/weighted_margins": 0.531750500202179, + "rewards/weighted_rejected": -3.248828172683716, + "step": 880 + }, + { + "epoch": 0.4756496759970606, + "grad_norm": 22.009668350219727, + "learning_rate": 6.300100845203373e-07, + "logits/chosen": 0.5401207208633423, + "logits/rejected": 0.5778656005859375, + "logps/chosen": -414.01953125, + "logps/rejected": -455.6171875, + "logps/weighted_chosen": -1.2978026866912842, + "logps/weighted_rejected": -1.476660132408142, + "loss": 0.6357, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -709.7568359375, + "rewards/margins": 100.93086242675781, + "rewards/rejected": -810.8453369140625, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -2.32794189453125, + "rewards/weighted_margins": 0.41016846895217896, + "rewards/weighted_rejected": -2.738818407058716, + "step": 890 + }, + { + "epoch": 0.48099405437905, + "grad_norm": 21.886869430541992, + "learning_rate": 6.20981060350445e-07, + "logits/chosen": 0.39915162324905396, + "logits/rejected": 0.4594573974609375, + "logps/chosen": -342.47186279296875, + "logps/rejected": -374.34063720703125, + "logps/weighted_chosen": -1.194909691810608, + "logps/weighted_rejected": -1.406274437904358, + "loss": 0.5957, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -555.0218505859375, + "rewards/margins": 85.990234375, + "rewards/rejected": -640.8515625, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -2.103381395339966, + "rewards/weighted_margins": 0.46428221464157104, + "rewards/weighted_rejected": -2.5683836936950684, + "step": 900 + }, + { + "epoch": 0.4863384327610395, + "grad_norm": 16.282630920410156, + "learning_rate": 6.119099324709087e-07, + "logits/chosen": 0.3816207945346832, + "logits/rejected": 0.4475570619106293, + "logps/chosen": -316.72186279296875, + "logps/rejected": -365.28045654296875, + "logps/weighted_chosen": -1.1351439952850342, + "logps/weighted_rejected": -1.284033179283142, + "loss": 0.6351, + "rewards/accuracies": 0.625, + "rewards/chosen": -497.158203125, + "rewards/margins": 119.31523132324219, + "rewards/rejected": -616.5943603515625, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -1.967187523841858, + "rewards/weighted_margins": 0.3160339295864105, + "rewards/weighted_rejected": -2.284106492996216, + "step": 910 + }, + { + "epoch": 0.4916828111430289, + "grad_norm": 18.44144630432129, + "learning_rate": 6.027998578067316e-07, + "logits/chosen": 0.4347885251045227, + "logits/rejected": 0.48958054184913635, + "logps/chosen": -366.1976623535156, + "logps/rejected": -429.29998779296875, + "logps/weighted_chosen": -1.150183081626892, + "logps/weighted_rejected": -1.3400390148162842, + "loss": 0.6143, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -597.3343505859375, + "rewards/margins": 151.3175811767578, + "rewards/rejected": -748.48046875, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -1.9908447265625, + "rewards/weighted_margins": 0.4314208924770355, + "rewards/weighted_rejected": -2.4218993186950684, + "step": 920 + }, + { + "epoch": 0.4970271895250184, + "grad_norm": 25.489429473876953, + "learning_rate": 5.936540068371394e-07, + "logits/chosen": 0.45708006620407104, + "logits/rejected": 0.5000015497207642, + "logps/chosen": -349.76251220703125, + "logps/rejected": -390.9984436035156, + "logps/weighted_chosen": -1.220849633216858, + "logps/weighted_rejected": -1.4126708507537842, + "loss": 0.6245, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -577.7796630859375, + "rewards/margins": 97.06953430175781, + "rewards/rejected": -675.0695190429688, + "rewards/weighted_accuracies": 0.643750011920929, + "rewards/weighted_chosen": -2.1529297828674316, + "rewards/weighted_margins": 0.42708128690719604, + "rewards/weighted_rejected": -2.580029249191284, + "step": 930 + }, + { + "epoch": 0.5023715679070078, + "grad_norm": 19.472640991210938, + "learning_rate": 5.844755624921918e-07, + "logits/chosen": 0.5162017941474915, + "logits/rejected": 0.609728991985321, + "logps/chosen": -363.6421813964844, + "logps/rejected": -415.4140625, + "logps/weighted_chosen": -1.33294677734375, + "logps/weighted_rejected": -1.4788939952850342, + "loss": 0.6754, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -618.7273559570312, + "rewards/margins": 130.2453155517578, + "rewards/rejected": -749.0703125, + "rewards/weighted_accuracies": 0.6468750238418579, + "rewards/weighted_chosen": -2.408947706222534, + "rewards/weighted_margins": 0.34906005859375, + "rewards/weighted_rejected": -2.757458448410034, + "step": 940 + }, + { + "epoch": 0.5077159462889973, + "grad_norm": 32.006065368652344, + "learning_rate": 5.752677190450634e-07, + "logits/chosen": 0.688891589641571, + "logits/rejected": 0.7397063970565796, + "logps/chosen": -482.10235595703125, + "logps/rejected": -568.703125, + "logps/weighted_chosen": -1.4970214366912842, + "logps/weighted_rejected": -1.689294457435608, + "loss": 0.6354, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -910.1663818359375, + "rewards/margins": 212.23184204101562, + "rewards/rejected": -1122.731201171875, + "rewards/weighted_accuracies": 0.6812499761581421, + "rewards/weighted_chosen": -2.8426513671875, + "rewards/weighted_margins": 0.47370606660842896, + "rewards/weighted_rejected": -3.3175048828125, + "step": 950 + }, + { + "epoch": 0.5130603246709867, + "grad_norm": 24.5925235748291, + "learning_rate": 5.66033681000375e-07, + "logits/chosen": 0.743194580078125, + "logits/rejected": 0.8281310796737671, + "logps/chosen": -493.4140625, + "logps/rejected": -624.3343505859375, + "logps/weighted_chosen": -1.50146484375, + "logps/weighted_rejected": -1.747949242591858, + "loss": 0.5978, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -935.1187744140625, + "rewards/margins": 319.94921875, + "rewards/rejected": -1255.2484130859375, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -2.852978467941284, + "rewards/weighted_margins": 0.568621814250946, + "rewards/weighted_rejected": -3.422070264816284, + "step": 960 + }, + { + "epoch": 0.5184047030529761, + "grad_norm": 18.53244400024414, + "learning_rate": 5.567766619789658e-07, + "logits/chosen": 0.5608322024345398, + "logits/rejected": 0.6179229617118835, + "logps/chosen": -431.1343688964844, + "logps/rejected": -476.00701904296875, + "logps/weighted_chosen": -1.348486304283142, + "logps/weighted_rejected": -1.502197265625, + "loss": 0.6584, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -772.9921875, + "rewards/margins": 109.23124694824219, + "rewards/rejected": -882.5031127929688, + "rewards/weighted_accuracies": 0.6312500238418579, + "rewards/weighted_chosen": -2.4359130859375, + "rewards/weighted_margins": 0.3783813416957855, + "rewards/weighted_rejected": -2.814013719558716, + "step": 970 + }, + { + "epoch": 0.5237490814349656, + "grad_norm": 32.86454772949219, + "learning_rate": 5.474998835994916e-07, + "logits/chosen": 0.539044201374054, + "logits/rejected": 0.6025703549385071, + "logps/chosen": -401.83905029296875, + "logps/rejected": -486.6234436035156, + "logps/weighted_chosen": -1.262475609779358, + "logps/weighted_rejected": -1.458227515220642, + "loss": 0.6286, + "rewards/accuracies": 0.609375, + "rewards/chosen": -694.048828125, + "rewards/margins": 198.5105438232422, + "rewards/rejected": -892.8984375, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -2.239306688308716, + "rewards/weighted_margins": 0.41253662109375, + "rewards/weighted_rejected": -2.6525635719299316, + "step": 980 + }, + { + "epoch": 0.529093459816955, + "grad_norm": 29.745466232299805, + "learning_rate": 5.382065743572411e-07, + "logits/chosen": 0.5060653686523438, + "logits/rejected": 0.6303970217704773, + "logps/chosen": -385.96405029296875, + "logps/rejected": -484.296875, + "logps/weighted_chosen": -1.272680640220642, + "logps/weighted_rejected": -1.508935570716858, + "loss": 0.5645, + "rewards/accuracies": 0.671875, + "rewards/chosen": -660.4656372070312, + "rewards/margins": 236.1494140625, + "rewards/rejected": -896.5562744140625, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -2.2663817405700684, + "rewards/weighted_margins": 0.583996593952179, + "rewards/weighted_rejected": -2.849902391433716, + "step": 990 + }, + { + "epoch": 0.5344378381989445, + "grad_norm": 23.71160125732422, + "learning_rate": 5.288999685005571e-07, + "logits/chosen": 0.6983550786972046, + "logits/rejected": 0.7909576296806335, + "logps/chosen": -492.20623779296875, + "logps/rejected": -586.3453369140625, + "logps/weighted_chosen": -1.4745604991912842, + "logps/weighted_rejected": -1.7019531726837158, + "loss": 0.5934, + "rewards/accuracies": 0.625, + "rewards/chosen": -929.5031127929688, + "rewards/margins": 230.67343139648438, + "rewards/rejected": -1159.8773193359375, + "rewards/weighted_accuracies": 0.671875, + "rewards/weighted_chosen": -2.7806153297424316, + "rewards/weighted_margins": 0.5472351312637329, + "rewards/weighted_rejected": -3.3272705078125, + "step": 1000 + }, + { + "epoch": 0.5344378381989445, + "eval_logits/chosen": 0.8178273439407349, + "eval_logits/rejected": 0.9030020833015442, + "eval_logps/chosen": -545.8350219726562, + "eval_logps/rejected": -631.2993774414062, + "eval_logps/weighted_chosen": -1.671688437461853, + "eval_logps/weighted_rejected": -1.9300999641418457, + "eval_loss": 0.6100515723228455, + "eval_rewards/accuracies": 0.612525463104248, + "eval_rewards/chosen": -1062.7760009765625, + "eval_rewards/margins": 211.23606872558594, + "eval_rewards/rejected": -1274.2708740234375, + "eval_rewards/weighted_accuracies": 0.6924643516540527, + "eval_rewards/weighted_chosen": -3.270533561706543, + "eval_rewards/weighted_margins": 0.5912336111068726, + "eval_rewards/weighted_rejected": -3.861767292022705, + "eval_runtime": 1069.2656, + "eval_samples_per_second": 1.834, + "eval_steps_per_second": 0.459, + "step": 1000 + }, + { + "epoch": 0.539782216580934, + "grad_norm": 19.396331787109375, + "learning_rate": 5.19583304905257e-07, + "logits/chosen": 0.8581451177597046, + "logits/rejected": 0.945892333984375, + "logps/chosen": -555.1343994140625, + "logps/rejected": -627.6500244140625, + "logps/weighted_chosen": -1.8346436023712158, + "logps/weighted_rejected": -2.0589356422424316, + "loss": 0.6853, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -1099.8359375, + "rewards/margins": 179.13436889648438, + "rewards/rejected": -1279.1468505859375, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -3.672802686691284, + "rewards/weighted_margins": 0.510241687297821, + "rewards/weighted_rejected": -4.183545112609863, + "step": 1010 + }, + { + "epoch": 0.5451265949629234, + "grad_norm": 27.648876190185547, + "learning_rate": 5.102598259474434e-07, + "logits/chosen": 0.9334442019462585, + "logits/rejected": 0.9845215082168579, + "logps/chosen": -561.9749755859375, + "logps/rejected": -624.1734619140625, + "logps/weighted_chosen": -1.9507324695587158, + "logps/weighted_rejected": -2.140209913253784, + "loss": 0.6361, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1126.956298828125, + "rewards/margins": 161.8835906982422, + "rewards/rejected": -1288.768798828125, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -3.981640577316284, + "rewards/weighted_margins": 0.44477540254592896, + "rewards/weighted_rejected": -4.425146579742432, + "step": 1020 + }, + { + "epoch": 0.5504709733449128, + "grad_norm": 25.49768829345703, + "learning_rate": 5.009327763750932e-07, + "logits/chosen": 0.855987548828125, + "logits/rejected": 0.9198578000068665, + "logps/chosen": -564.9375, + "logps/rejected": -675.9593505859375, + "logps/weighted_chosen": -1.9149658679962158, + "logps/weighted_rejected": -2.202954053878784, + "loss": 0.5673, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -1117.120361328125, + "rewards/margins": 272.2593688964844, + "rewards/rejected": -1389.168701171875, + "rewards/weighted_accuracies": 0.715624988079071, + "rewards/weighted_chosen": -3.8498778343200684, + "rewards/weighted_margins": 0.687426745891571, + "rewards/weighted_rejected": -4.537939548492432, + "step": 1030 + }, + { + "epoch": 0.5558153517269022, + "grad_norm": 40.38433074951172, + "learning_rate": 4.916054021788265e-07, + "logits/chosen": 0.8551849126815796, + "logits/rejected": 0.9495452642440796, + "logps/chosen": -524.4718627929688, + "logps/rejected": -593.6937255859375, + "logps/weighted_chosen": -1.8265869617462158, + "logps/weighted_rejected": -2.054638624191284, + "loss": 0.6191, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -1018.3140869140625, + "rewards/margins": 175.7589874267578, + "rewards/rejected": -1193.996826171875, + "rewards/weighted_accuracies": 0.675000011920929, + "rewards/weighted_chosen": -3.683789014816284, + "rewards/weighted_margins": 0.547253429889679, + "rewards/weighted_rejected": -4.230688571929932, + "step": 1040 + }, + { + "epoch": 0.5611597301088918, + "grad_norm": 19.668304443359375, + "learning_rate": 4.822809494622379e-07, + "logits/chosen": 0.934344470500946, + "logits/rejected": 1.0057556629180908, + "logps/chosen": -563.5859375, + "logps/rejected": -682.7578125, + "logps/weighted_chosen": -2.012744188308716, + "logps/weighted_rejected": -2.237011671066284, + "loss": 0.6414, + "rewards/accuracies": 0.578125, + "rewards/chosen": -1111.606201171875, + "rewards/margins": 287.8667907714844, + "rewards/rejected": -1399.8656005859375, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": -4.129345893859863, + "rewards/weighted_margins": 0.4779907166957855, + "rewards/weighted_rejected": -4.606591701507568, + "step": 1050 + }, + { + "epoch": 0.5665041084908812, + "grad_norm": 24.138713836669922, + "learning_rate": 4.729626633121914e-07, + "logits/chosen": 0.9796997308731079, + "logits/rejected": 1.047949194908142, + "logps/chosen": -553.5140380859375, + "logps/rejected": -684.1593627929688, + "logps/weighted_chosen": -2.029101610183716, + "logps/weighted_rejected": -2.305004835128784, + "loss": 0.5864, + "rewards/accuracies": 0.609375, + "rewards/chosen": -1090.3616943359375, + "rewards/margins": 319.7816467285156, + "rewards/rejected": -1410.275390625, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -4.131543159484863, + "rewards/weighted_margins": 0.675646960735321, + "rewards/weighted_rejected": -4.809277534484863, + "step": 1060 + }, + { + "epoch": 0.5718484868728706, + "grad_norm": 18.742746353149414, + "learning_rate": 4.636537866694692e-07, + "logits/chosen": 0.880596935749054, + "logits/rejected": 0.9508880376815796, + "logps/chosen": -538.2796630859375, + "logps/rejected": -615.1593627929688, + "logps/weighted_chosen": -1.745996117591858, + "logps/weighted_rejected": -2.0263915061950684, + "loss": 0.5655, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -1052.207763671875, + "rewards/margins": 191.07656860351562, + "rewards/rejected": -1243.984375, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -3.4351563453674316, + "rewards/weighted_margins": 0.646618664264679, + "rewards/weighted_rejected": -4.082568168640137, + "step": 1070 + }, + { + "epoch": 0.57719286525486, + "grad_norm": 19.411636352539062, + "learning_rate": 4.5435755920016557e-07, + "logits/chosen": 0.818310558795929, + "logits/rejected": 0.903167724609375, + "logps/chosen": -588.6546630859375, + "logps/rejected": -667.2093505859375, + "logps/weighted_chosen": -1.659887671470642, + "logps/weighted_rejected": -1.882080078125, + "loss": 0.5754, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -1131.526611328125, + "rewards/margins": 199.96875, + "rewards/rejected": -1331.278076171875, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -3.266357421875, + "rewards/weighted_margins": 0.545117199420929, + "rewards/weighted_rejected": -3.8111815452575684, + "step": 1080 + }, + { + "epoch": 0.5825372436368494, + "grad_norm": 32.21624755859375, + "learning_rate": 4.450772161682221e-07, + "logits/chosen": 0.8610595464706421, + "logits/rejected": 0.9808715581893921, + "logps/chosen": -544.76953125, + "logps/rejected": -654.7804565429688, + "logps/weighted_chosen": -1.832617163658142, + "logps/weighted_rejected": -2.125561475753784, + "loss": 0.5858, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1051.157470703125, + "rewards/margins": 287.2027282714844, + "rewards/rejected": -1338.409423828125, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -3.6090331077575684, + "rewards/weighted_margins": 0.7149658203125, + "rewards/weighted_rejected": -4.322216987609863, + "step": 1090 + }, + { + "epoch": 0.587881622018839, + "grad_norm": 20.99070930480957, + "learning_rate": 4.3581598730949376e-07, + "logits/chosen": 0.934826672077179, + "logits/rejected": 1.047216773033142, + "logps/chosen": -623.3093872070312, + "logps/rejected": -790.65625, + "logps/weighted_chosen": -1.8826415538787842, + "logps/weighted_rejected": -2.158398389816284, + "loss": 0.5787, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -1247.6937255859375, + "rewards/margins": 408.20001220703125, + "rewards/rejected": -1655.59765625, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -3.787158250808716, + "rewards/weighted_margins": 0.648754894733429, + "rewards/weighted_rejected": -4.436816215515137, + "step": 1100 + }, + { + "epoch": 0.5932260004008284, + "grad_norm": 21.928516387939453, + "learning_rate": 4.2657709570774007e-07, + "logits/chosen": 0.866748034954071, + "logits/rejected": 0.9647918939590454, + "logps/chosen": -545.96875, + "logps/rejected": -667.3937377929688, + "logps/weighted_chosen": -1.7513306140899658, + "logps/weighted_rejected": -2.078869581222534, + "loss": 0.5605, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -1069.492919921875, + "rewards/margins": 299.98046875, + "rewards/rejected": -1369.5546875, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -3.476513624191284, + "rewards/weighted_margins": 0.772656261920929, + "rewards/weighted_rejected": -4.249463081359863, + "step": 1110 + }, + { + "epoch": 0.5985703787828178, + "grad_norm": 26.116018295288086, + "learning_rate": 4.173637566729304e-07, + "logits/chosen": 0.8911193609237671, + "logits/rejected": 1.0266631841659546, + "logps/chosen": -583.8984375, + "logps/rejected": -725.0593872070312, + "logps/weighted_chosen": -1.813574194908142, + "logps/weighted_rejected": -2.167529344558716, + "loss": 0.5538, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1161.6859130859375, + "rewards/margins": 345.7093811035156, + "rewards/rejected": -1506.765625, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -3.581494092941284, + "rewards/weighted_margins": 0.86077880859375, + "rewards/weighted_rejected": -4.441454887390137, + "step": 1120 + }, + { + "epoch": 0.6039147571648072, + "grad_norm": 23.125049591064453, + "learning_rate": 4.0817917662225467e-07, + "logits/chosen": 0.8893982172012329, + "logits/rejected": 1.0195739269256592, + "logps/chosen": -599.5546875, + "logps/rejected": -724.9249877929688, + "logps/weighted_chosen": -1.800683617591858, + "logps/weighted_rejected": -2.1365723609924316, + "loss": 0.5315, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1189.1500244140625, + "rewards/margins": 309.51092529296875, + "rewards/rejected": -1499.3638916015625, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -3.582812547683716, + "rewards/weighted_margins": 0.800048828125, + "rewards/weighted_rejected": -4.383740425109863, + "step": 1130 + }, + { + "epoch": 0.6092591355467967, + "grad_norm": 23.62869644165039, + "learning_rate": 3.9902655196422957e-07, + "logits/chosen": 0.9099365472793579, + "logits/rejected": 1.0202209949493408, + "logps/chosen": -575.7062377929688, + "logps/rejected": -705.3125, + "logps/weighted_chosen": -1.896215796470642, + "logps/weighted_rejected": -2.168652296066284, + "loss": 0.6004, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1132.53125, + "rewards/margins": 321.29376220703125, + "rewards/rejected": -1453.9281005859375, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -3.7870116233825684, + "rewards/weighted_margins": 0.6872802972793579, + "rewards/weighted_rejected": -4.474609375, + "step": 1140 + }, + { + "epoch": 0.6146035139287862, + "grad_norm": 21.071313858032227, + "learning_rate": 3.8990906798628763e-07, + "logits/chosen": 0.8584686517715454, + "logits/rejected": 0.9651733636856079, + "logps/chosen": -586.2062377929688, + "logps/rejected": -703.8968505859375, + "logps/weighted_chosen": -1.742700219154358, + "logps/weighted_rejected": -2.0602784156799316, + "loss": 0.6005, + "rewards/accuracies": 0.59375, + "rewards/chosen": -1143.8687744140625, + "rewards/margins": 301.03143310546875, + "rewards/rejected": -1445.2093505859375, + "rewards/weighted_accuracies": 0.684374988079071, + "rewards/weighted_chosen": -3.407958984375, + "rewards/weighted_margins": 0.7567504644393921, + "rewards/weighted_rejected": -4.164892673492432, + "step": 1150 + }, + { + "epoch": 0.6199478923107756, + "grad_norm": 23.55873680114746, + "learning_rate": 3.8082989774623587e-07, + "logits/chosen": 0.787274181842804, + "logits/rejected": 0.899890124797821, + "logps/chosen": -415.5601501464844, + "logps/rejected": -553.6296997070312, + "logps/weighted_chosen": -1.6149170398712158, + "logps/weighted_rejected": -1.8993408679962158, + "loss": 0.5757, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -762.5363159179688, + "rewards/margins": 328.75079345703125, + "rewards/rejected": -1091.199951171875, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -3.096923828125, + "rewards/weighted_margins": 0.6490112543106079, + "rewards/weighted_rejected": -3.7450194358825684, + "step": 1160 + }, + { + "epoch": 0.625292270692765, + "grad_norm": 31.037813186645508, + "learning_rate": 3.717922009679725e-07, + "logits/chosen": 0.8276687860488892, + "logits/rejected": 0.953723132610321, + "logps/chosen": -525.4249877929688, + "logps/rejected": -649.7890625, + "logps/weighted_chosen": -1.603540062904358, + "logps/weighted_rejected": -1.9658203125, + "loss": 0.5843, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -1007.1129150390625, + "rewards/margins": 311.43048095703125, + "rewards/rejected": -1318.315673828125, + "rewards/weighted_accuracies": 0.71875, + "rewards/weighted_chosen": -3.10888671875, + "rewards/weighted_margins": 0.7830810546875, + "rewards/weighted_rejected": -3.8904786109924316, + "step": 1170 + }, + { + "epoch": 0.6306366490747545, + "grad_norm": 27.499135971069336, + "learning_rate": 3.6279912294184207e-07, + "logits/chosen": 0.940417468547821, + "logits/rejected": 1.0576568841934204, + "logps/chosen": -595.1968994140625, + "logps/rejected": -710.9718627929688, + "logps/weighted_chosen": -1.754150390625, + "logps/weighted_rejected": -2.096972703933716, + "loss": 0.5481, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -1177.729736328125, + "rewards/margins": 288.2601623535156, + "rewards/rejected": -1466.1734619140625, + "rewards/weighted_accuracies": 0.6937500238418579, + "rewards/weighted_chosen": -3.4781250953674316, + "rewards/weighted_margins": 0.79449462890625, + "rewards/weighted_rejected": -4.271752834320068, + "step": 1180 + }, + { + "epoch": 0.6359810274567439, + "grad_norm": 38.12095642089844, + "learning_rate": 3.5385379343001507e-07, + "logits/chosen": 1.052221655845642, + "logits/rejected": 1.1613280773162842, + "logps/chosen": -627.6281127929688, + "logps/rejected": -768.5999755859375, + "logps/weighted_chosen": -1.926293969154358, + "logps/weighted_rejected": -2.304394483566284, + "loss": 0.5413, + "rewards/accuracies": 0.628125011920929, + "rewards/chosen": -1274.1656494140625, + "rewards/margins": 355.8843688964844, + "rewards/rejected": -1630.153076171875, + "rewards/weighted_accuracies": 0.699999988079071, + "rewards/weighted_chosen": -3.9078125953674316, + "rewards/weighted_margins": 0.8879638910293579, + "rewards/weighted_rejected": -4.796435356140137, + "step": 1190 + }, + { + "epoch": 0.6413254058387334, + "grad_norm": 21.637426376342773, + "learning_rate": 3.4495932557727114e-07, + "logits/chosen": 0.9920654296875, + "logits/rejected": 1.1169312000274658, + "logps/chosen": -612.4187622070312, + "logps/rejected": -752.953125, + "logps/weighted_chosen": -1.9196045398712158, + "logps/weighted_rejected": -2.310791015625, + "loss": 0.529, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1234.0250244140625, + "rewards/margins": 344.92108154296875, + "rewards/rejected": -1579.0625, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -3.847216844558716, + "rewards/weighted_margins": 0.968029797077179, + "rewards/weighted_rejected": -4.815478324890137, + "step": 1200 + }, + { + "epoch": 0.6466697842207229, + "grad_norm": 21.316312789916992, + "learning_rate": 3.3611881482756464e-07, + "logits/chosen": 1.0450623035430908, + "logits/rejected": 1.146942138671875, + "logps/chosen": -651.48046875, + "logps/rejected": -767.4921875, + "logps/weighted_chosen": -1.8710448741912842, + "logps/weighted_rejected": -2.1524658203125, + "loss": 0.5774, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -1320.1126708984375, + "rewards/margins": 294.1742248535156, + "rewards/rejected": -1614.6484375, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -3.768994092941284, + "rewards/weighted_margins": 0.6971191167831421, + "rewards/weighted_rejected": -4.467041015625, + "step": 1210 + }, + { + "epoch": 0.6520141626027123, + "grad_norm": 35.07364273071289, + "learning_rate": 3.2733533784675273e-07, + "logits/chosen": 1.062841773033142, + "logits/rejected": 1.19732666015625, + "logps/chosen": -599.9226684570312, + "logps/rejected": -778.9343872070312, + "logps/weighted_chosen": -1.9327881336212158, + "logps/weighted_rejected": -2.28955078125, + "loss": 0.5435, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1206.220703125, + "rewards/margins": 443.5640563964844, + "rewards/rejected": -1650.046875, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -3.9222168922424316, + "rewards/weighted_margins": 0.853991687297821, + "rewards/weighted_rejected": -4.778906345367432, + "step": 1220 + }, + { + "epoch": 0.6573585409847017, + "grad_norm": 27.86652946472168, + "learning_rate": 3.1861195145185603e-07, + "logits/chosen": 1.1198608875274658, + "logits/rejected": 1.235009789466858, + "logps/chosen": -666.2359619140625, + "logps/rejected": -824.7156372070312, + "logps/weighted_chosen": -1.962792992591858, + "logps/weighted_rejected": -2.3662352561950684, + "loss": 0.5388, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1361.340576171875, + "rewards/margins": 392.4281311035156, + "rewards/rejected": -1753.3968505859375, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -4.012939453125, + "rewards/weighted_margins": 0.936450183391571, + "rewards/weighted_rejected": -4.948388576507568, + "step": 1230 + }, + { + "epoch": 0.6627029193666911, + "grad_norm": 52.15452575683594, + "learning_rate": 3.099516915472289e-07, + "logits/chosen": 1.065942406654358, + "logits/rejected": 1.1804687976837158, + "logps/chosen": -606.4312744140625, + "logps/rejected": -736.2906494140625, + "logps/weighted_chosen": -1.873071312904358, + "logps/weighted_rejected": -2.2518067359924316, + "loss": 0.567, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1215.331298828125, + "rewards/margins": 327.46014404296875, + "rewards/rejected": -1542.9296875, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -3.7672362327575684, + "rewards/weighted_margins": 0.8380126953125, + "rewards/weighted_rejected": -4.605224609375, + "step": 1240 + }, + { + "epoch": 0.6680472977486807, + "grad_norm": 25.46024513244629, + "learning_rate": 3.013575720680062e-07, + "logits/chosen": 0.985394299030304, + "logits/rejected": 1.0974853038787842, + "logps/chosen": -595.3812255859375, + "logps/rejected": -677.296875, + "logps/weighted_chosen": -1.850610375404358, + "logps/weighted_rejected": -2.2041258811950684, + "loss": 0.5561, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1189.073486328125, + "rewards/margins": 208.9484405517578, + "rewards/rejected": -1397.5875244140625, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -3.701464891433716, + "rewards/weighted_margins": 0.787341296672821, + "rewards/weighted_rejected": -4.488329887390137, + "step": 1250 + }, + { + "epoch": 0.6733916761306701, + "grad_norm": 25.47864532470703, + "learning_rate": 2.928325839311974e-07, + "logits/chosen": 0.936535656452179, + "logits/rejected": 1.0761229991912842, + "logps/chosen": -514.7562255859375, + "logps/rejected": -628.9890747070312, + "logps/weighted_chosen": -1.8023681640625, + "logps/weighted_rejected": -2.1927490234375, + "loss": 0.544, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1000.9328002929688, + "rewards/margins": 284.53240966796875, + "rewards/rejected": -1285.4281005859375, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -3.518261671066284, + "rewards/weighted_margins": 0.955517590045929, + "rewards/weighted_rejected": -4.473974704742432, + "step": 1260 + }, + { + "epoch": 0.6787360545126595, + "grad_norm": 16.07379150390625, + "learning_rate": 2.843796939947896e-07, + "logits/chosen": 0.960919201374054, + "logits/rejected": 1.0973907709121704, + "logps/chosen": -558.1945190429688, + "logps/rejected": -723.6312255859375, + "logps/weighted_chosen": -1.818383812904358, + "logps/weighted_rejected": -2.211865186691284, + "loss": 0.5085, + "rewards/accuracies": 0.684374988079071, + "rewards/chosen": -1104.028564453125, + "rewards/margins": 407.412109375, + "rewards/rejected": -1511.315673828125, + "rewards/weighted_accuracies": 0.768750011920929, + "rewards/weighted_chosen": -3.625439405441284, + "rewards/weighted_margins": 0.9406493902206421, + "rewards/weighted_rejected": -4.566552639007568, + "step": 1270 + }, + { + "epoch": 0.6840804328946489, + "grad_norm": 29.7365665435791, + "learning_rate": 2.7600184402522575e-07, + "logits/chosen": 1.015173316001892, + "logits/rejected": 1.1536743640899658, + "logps/chosen": -536.77734375, + "logps/rejected": -727.6687622070312, + "logps/weighted_chosen": -1.752343773841858, + "logps/weighted_rejected": -2.166577100753784, + "loss": 0.5346, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1053.72265625, + "rewards/margins": 470.87188720703125, + "rewards/rejected": -1524.448486328125, + "rewards/weighted_accuracies": 0.753125011920929, + "rewards/weighted_chosen": -3.4381346702575684, + "rewards/weighted_margins": 0.9781128168106079, + "rewards/weighted_rejected": -4.416308403015137, + "step": 1280 + }, + { + "epoch": 0.6894248112766383, + "grad_norm": 27.42215347290039, + "learning_rate": 2.6770194967361303e-07, + "logits/chosen": 1.0281555652618408, + "logits/rejected": 1.1298096179962158, + "logps/chosen": -694.5578002929688, + "logps/rejected": -820.9890747070312, + "logps/weighted_chosen": -1.787255883216858, + "logps/weighted_rejected": -2.1067872047424316, + "loss": 0.6008, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -1419.802490234375, + "rewards/margins": 305.42950439453125, + "rewards/rejected": -1725.576904296875, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -3.591064453125, + "rewards/weighted_margins": 0.7381957769393921, + "rewards/weighted_rejected": -4.3291015625, + "step": 1290 + }, + { + "epoch": 0.6947691896586279, + "grad_norm": 23.734582901000977, + "learning_rate": 2.594828994610211e-07, + "logits/chosen": 0.939746081829071, + "logits/rejected": 1.0722777843475342, + "logps/chosen": -475.05157470703125, + "logps/rejected": -603.234375, + "logps/weighted_chosen": -1.6553466320037842, + "logps/weighted_rejected": -1.932226538658142, + "loss": 0.6243, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -907.328125, + "rewards/margins": 309.60040283203125, + "rewards/rejected": -1216.059326171875, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -3.2413573265075684, + "rewards/weighted_margins": 0.6575561761856079, + "rewards/weighted_rejected": -3.898730516433716, + "step": 1300 + }, + { + "epoch": 0.7001135680406173, + "grad_norm": 22.069576263427734, + "learning_rate": 2.513475537732219e-07, + "logits/chosen": 0.999957263469696, + "logits/rejected": 1.119329810142517, + "logps/chosen": -569.3125, + "logps/rejected": -719.7687377929688, + "logps/weighted_chosen": -1.728369116783142, + "logps/weighted_rejected": -2.099169969558716, + "loss": 0.5476, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -1125.2562255859375, + "rewards/margins": 364.587890625, + "rewards/rejected": -1490.2672119140625, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -3.418164014816284, + "rewards/weighted_margins": 0.8717285394668579, + "rewards/weighted_rejected": -4.290673732757568, + "step": 1310 + }, + { + "epoch": 0.7054579464226067, + "grad_norm": 37.95624923706055, + "learning_rate": 2.4329874386522113e-07, + "logits/chosen": 1.0810425281524658, + "logits/rejected": 1.1805846691131592, + "logps/chosen": -571.2937622070312, + "logps/rejected": -689.4156494140625, + "logps/weighted_chosen": -1.87744140625, + "logps/weighted_rejected": -2.1846680641174316, + "loss": 0.5862, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -1139.307861328125, + "rewards/margins": 296.4810485839844, + "rewards/rejected": -1436.0484619140625, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -3.7635741233825684, + "rewards/weighted_margins": 0.7281249761581421, + "rewards/weighted_rejected": -4.493066310882568, + "step": 1320 + }, + { + "epoch": 0.7108023248045962, + "grad_norm": 35.145660400390625, + "learning_rate": 2.3533927087592713e-07, + "logits/chosen": 1.0449066162109375, + "logits/rejected": 1.158959984779358, + "logps/chosen": -630.4202880859375, + "logps/rejected": -776.75, + "logps/weighted_chosen": -1.7941162586212158, + "logps/weighted_rejected": -2.094433546066284, + "loss": 0.6067, + "rewards/accuracies": 0.6468750238418579, + "rewards/chosen": -1269.651611328125, + "rewards/margins": 358.5513610839844, + "rewards/rejected": -1627.846923828125, + "rewards/weighted_accuracies": 0.737500011920929, + "rewards/weighted_chosen": -3.615527391433716, + "rewards/weighted_margins": 0.7017577886581421, + "rewards/weighted_rejected": -4.317187309265137, + "step": 1330 + }, + { + "epoch": 0.7161467031865856, + "grad_norm": 27.11147689819336, + "learning_rate": 2.2747190485330193e-07, + "logits/chosen": 1.043206810951233, + "logits/rejected": 1.1683349609375, + "logps/chosen": -561.8843994140625, + "logps/rejected": -702.2242431640625, + "logps/weighted_chosen": -1.7777831554412842, + "logps/weighted_rejected": -2.166455030441284, + "loss": 0.5198, + "rewards/accuracies": 0.653124988079071, + "rewards/chosen": -1107.512451171875, + "rewards/margins": 352.84259033203125, + "rewards/rejected": -1460.1614990234375, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -3.54296875, + "rewards/weighted_margins": 0.9533447027206421, + "rewards/weighted_rejected": -4.496337890625, + "step": 1340 + }, + { + "epoch": 0.7214910815685751, + "grad_norm": 26.207210540771484, + "learning_rate": 2.1969938379033166e-07, + "logits/chosen": 1.031854271888733, + "logits/rejected": 1.132165551185608, + "logps/chosen": -612.7374877929688, + "logps/rejected": -739.6328125, + "logps/weighted_chosen": -1.765527367591858, + "logps/weighted_rejected": -2.033642530441284, + "loss": 0.651, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1225.0250244140625, + "rewards/margins": 304.65313720703125, + "rewards/rejected": -1529.754638671875, + "rewards/weighted_accuracies": 0.6625000238418579, + "rewards/weighted_chosen": -3.520800828933716, + "rewards/weighted_margins": 0.6206909418106079, + "rewards/weighted_rejected": -4.140466213226318, + "step": 1350 + }, + { + "epoch": 0.7268354599505645, + "grad_norm": 34.468292236328125, + "learning_rate": 2.1202441267215265e-07, + "logits/chosen": 1.0174560546875, + "logits/rejected": 1.1193969249725342, + "logps/chosen": -470.8296813964844, + "logps/rejected": -591.5484619140625, + "logps/weighted_chosen": -1.7814452648162842, + "logps/weighted_rejected": -2.140429735183716, + "loss": 0.5305, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -917.5382690429688, + "rewards/margins": 288.85626220703125, + "rewards/rejected": -1205.9417724609375, + "rewards/weighted_accuracies": 0.75, + "rewards/weighted_chosen": -3.5008788108825684, + "rewards/weighted_margins": 0.9064697027206421, + "rewards/weighted_rejected": -4.407177925109863, + "step": 1360 + }, + { + "epoch": 0.732179838332554, + "grad_norm": 21.90385627746582, + "learning_rate": 2.0444966253466572e-07, + "logits/chosen": 1.0034301280975342, + "logits/rejected": 1.10748291015625, + "logps/chosen": -540.3875122070312, + "logps/rejected": -647.7062377929688, + "logps/weighted_chosen": -1.706689476966858, + "logps/weighted_rejected": -2.0198731422424316, + "loss": 0.5812, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1060.621826171875, + "rewards/margins": 257.01739501953125, + "rewards/rejected": -1317.5814208984375, + "rewards/weighted_accuracies": 0.7093750238418579, + "rewards/weighted_chosen": -3.3475098609924316, + "rewards/weighted_margins": 0.73974609375, + "rewards/weighted_rejected": -4.087500095367432, + "step": 1370 + }, + { + "epoch": 0.7375242167145434, + "grad_norm": 33.499942779541016, + "learning_rate": 1.9697776953496397e-07, + "logits/chosen": 0.9693847894668579, + "logits/rejected": 1.10760498046875, + "logps/chosen": -488.953125, + "logps/rejected": -595.0203247070312, + "logps/weighted_chosen": -1.684667944908142, + "logps/weighted_rejected": -1.96923828125, + "loss": 0.5746, + "rewards/accuracies": 0.625, + "rewards/chosen": -918.1976318359375, + "rewards/margins": 263.83868408203125, + "rewards/rejected": -1182.3046875, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -3.234545946121216, + "rewards/weighted_margins": 0.729565441608429, + "rewards/weighted_rejected": -3.9642090797424316, + "step": 1380 + }, + { + "epoch": 0.7428685950965328, + "grad_norm": 35.277217864990234, + "learning_rate": 1.8961133403390113e-07, + "logits/chosen": 0.9752136468887329, + "logits/rejected": 1.10723876953125, + "logps/chosen": -523.109375, + "logps/rejected": -648.828125, + "logps/weighted_chosen": -1.7232666015625, + "logps/weighted_rejected": -2.091503858566284, + "loss": 0.5315, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -1000.2140502929688, + "rewards/margins": 316.61407470703125, + "rewards/rejected": -1316.784423828125, + "rewards/weighted_accuracies": 0.7406250238418579, + "rewards/weighted_chosen": -3.356005907058716, + "rewards/weighted_margins": 0.911975085735321, + "rewards/weighted_rejected": -4.268701076507568, + "step": 1390 + }, + { + "epoch": 0.7482129734785222, + "grad_norm": 21.35260581970215, + "learning_rate": 1.823529196911156e-07, + "logits/chosen": 0.9263717532157898, + "logits/rejected": 1.057672142982483, + "logps/chosen": -540.96875, + "logps/rejected": -634.2281494140625, + "logps/weighted_chosen": -1.689355492591858, + "logps/weighted_rejected": -2.0255370140075684, + "loss": 0.5502, + "rewards/accuracies": 0.6656249761581421, + "rewards/chosen": -1045.257080078125, + "rewards/margins": 237.46640014648438, + "rewards/rejected": -1282.846923828125, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -3.2940430641174316, + "rewards/weighted_margins": 0.8021484613418579, + "rewards/weighted_rejected": -4.097070217132568, + "step": 1400 + }, + { + "epoch": 0.7535573518605118, + "grad_norm": 30.216272354125977, + "learning_rate": 1.7520505257282886e-07, + "logits/chosen": 1.0029723644256592, + "logits/rejected": 1.1317260265350342, + "logps/chosen": -565.8390502929688, + "logps/rejected": -697.296875, + "logps/weighted_chosen": -1.7831542491912842, + "logps/weighted_rejected": -2.0927491188049316, + "loss": 0.5786, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1122.6875, + "rewards/margins": 330.94451904296875, + "rewards/rejected": -1453.018798828125, + "rewards/weighted_accuracies": 0.721875011920929, + "rewards/weighted_chosen": -3.546679735183716, + "rewards/weighted_margins": 0.7677246332168579, + "rewards/weighted_rejected": -4.313330173492432, + "step": 1410 + }, + { + "epoch": 0.7589017302425012, + "grad_norm": 20.653310775756836, + "learning_rate": 1.6817022027272572e-07, + "logits/chosen": 1.0297119617462158, + "logits/rejected": 1.171728491783142, + "logps/chosen": -586.5125122070312, + "logps/rejected": -767.6124877929688, + "logps/weighted_chosen": -1.7421143054962158, + "logps/weighted_rejected": -2.044604539871216, + "loss": 0.5964, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1158.762451171875, + "rewards/margins": 440.51837158203125, + "rewards/rejected": -1598.609375, + "rewards/weighted_accuracies": 0.668749988079071, + "rewards/weighted_chosen": -3.4945311546325684, + "rewards/weighted_margins": 0.729174792766571, + "rewards/weighted_rejected": -4.224023342132568, + "step": 1420 + }, + { + "epoch": 0.7642461086244906, + "grad_norm": 26.14088249206543, + "learning_rate": 1.6125087104622502e-07, + "logits/chosen": 0.964739978313446, + "logits/rejected": 1.1062530279159546, + "logps/chosen": -527.21875, + "logps/rejected": -644.2249755859375, + "logps/weighted_chosen": -1.7293701171875, + "logps/weighted_rejected": -2.035961866378784, + "loss": 0.549, + "rewards/accuracies": 0.671875, + "rewards/chosen": -1012.3078002929688, + "rewards/margins": 297.9566345214844, + "rewards/rejected": -1310.5374755859375, + "rewards/weighted_accuracies": 0.7124999761581421, + "rewards/weighted_chosen": -3.399121046066284, + "rewards/weighted_margins": 0.7321411371231079, + "rewards/weighted_rejected": -4.131494045257568, + "step": 1430 + }, + { + "epoch": 0.76959048700648, + "grad_norm": 55.1640625, + "learning_rate": 1.5444941295843945e-07, + "logits/chosen": 1.019598364830017, + "logits/rejected": 1.130334496498108, + "logps/chosen": -563.7281494140625, + "logps/rejected": -702.3312377929688, + "logps/weighted_chosen": -1.7079346179962158, + "logps/weighted_rejected": -2.0197997093200684, + "loss": 0.6096, + "rewards/accuracies": 0.659375011920929, + "rewards/chosen": -1101.768798828125, + "rewards/margins": 330.1796875, + "rewards/rejected": -1431.9156494140625, + "rewards/weighted_accuracies": 0.706250011920929, + "rewards/weighted_chosen": -3.331787109375, + "rewards/weighted_margins": 0.747509777545929, + "rewards/weighted_rejected": -4.078906059265137, + "step": 1440 + }, + { + "epoch": 0.7749348653884695, + "grad_norm": 18.344297409057617, + "learning_rate": 1.4776821304612392e-07, + "logits/chosen": 1.002709984779358, + "logits/rejected": 1.1254394054412842, + "logps/chosen": -528.4046630859375, + "logps/rejected": -664.4578247070312, + "logps/weighted_chosen": -1.745263695716858, + "logps/weighted_rejected": -2.0626220703125, + "loss": 0.5895, + "rewards/accuracies": 0.640625, + "rewards/chosen": -1030.324951171875, + "rewards/margins": 332.5367126464844, + "rewards/rejected": -1362.7054443359375, + "rewards/weighted_accuracies": 0.6781250238418579, + "rewards/weighted_chosen": -3.437744140625, + "rewards/weighted_margins": 0.7569335699081421, + "rewards/weighted_rejected": -4.194043159484863, + "step": 1450 + }, + { + "epoch": 0.780279243770459, + "grad_norm": 29.112760543823242, + "learning_rate": 1.4120959649390136e-07, + "logits/chosen": 0.989703357219696, + "logits/rejected": 1.140380859375, + "logps/chosen": -545.0640869140625, + "logps/rejected": -697.6968994140625, + "logps/weighted_chosen": -1.7493896484375, + "logps/weighted_rejected": -2.0991454124450684, + "loss": 0.5681, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -1055.081298828125, + "rewards/margins": 379.7699279785156, + "rewards/rejected": -1433.981201171875, + "rewards/weighted_accuracies": 0.7281249761581421, + "rewards/weighted_chosen": -3.4771485328674316, + "rewards/weighted_margins": 0.823376476764679, + "rewards/weighted_rejected": -4.301074028015137, + "step": 1460 + }, + { + "epoch": 0.7856236221524484, + "grad_norm": 57.93193054199219, + "learning_rate": 1.347758458250541e-07, + "logits/chosen": 1.0691406726837158, + "logits/rejected": 1.1666259765625, + "logps/chosen": -585.9695434570312, + "logps/rejected": -730.2203369140625, + "logps/weighted_chosen": -1.85400390625, + "logps/weighted_rejected": -2.194042921066284, + "loss": 0.5631, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1163.90234375, + "rewards/margins": 340.4307556152344, + "rewards/rejected": -1504.21484375, + "rewards/weighted_accuracies": 0.7250000238418579, + "rewards/weighted_chosen": -3.6827635765075684, + "rewards/weighted_margins": 0.797412097454071, + "rewards/weighted_rejected": -4.481103420257568, + "step": 1470 + }, + { + "epoch": 0.7909680005344378, + "grad_norm": 29.835397720336914, + "learning_rate": 1.2846920010716266e-07, + "logits/chosen": 1.075353980064392, + "logits/rejected": 1.196508765220642, + "logps/chosen": -549.4031372070312, + "logps/rejected": -679.5828247070312, + "logps/weighted_chosen": -1.759301781654358, + "logps/weighted_rejected": -2.1464600563049316, + "loss": 0.507, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1083.15625, + "rewards/margins": 321.015625, + "rewards/rejected": -1403.971923828125, + "rewards/weighted_accuracies": 0.734375, + "rewards/weighted_chosen": -3.5008788108825684, + "rewards/weighted_margins": 0.897204577922821, + "rewards/weighted_rejected": -4.398486137390137, + "step": 1480 + }, + { + "epoch": 0.7963123789164273, + "grad_norm": 23.083351135253906, + "learning_rate": 1.2229185417286764e-07, + "logits/chosen": 1.098974585533142, + "logits/rejected": 1.2252686023712158, + "logps/chosen": -625.7210693359375, + "logps/rejected": -787.33203125, + "logps/weighted_chosen": -1.838476538658142, + "logps/weighted_rejected": -2.1626954078674316, + "loss": 0.5567, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1248.914794921875, + "rewards/margins": 402.00604248046875, + "rewards/rejected": -1650.8570556640625, + "rewards/weighted_accuracies": 0.690625011920929, + "rewards/weighted_chosen": -3.658154249191284, + "rewards/weighted_margins": 0.83709716796875, + "rewards/weighted_rejected": -4.4951171875, + "step": 1490 + }, + { + "epoch": 0.8016567572984167, + "grad_norm": 32.6721076965332, + "learning_rate": 1.1624595785602576e-07, + "logits/chosen": 1.08929443359375, + "logits/rejected": 1.2185547351837158, + "logps/chosen": -602.4078369140625, + "logps/rejected": -713.8046875, + "logps/weighted_chosen": -1.90576171875, + "logps/weighted_rejected": -2.267749071121216, + "loss": 0.5729, + "rewards/accuracies": 0.65625, + "rewards/chosen": -1194.6865234375, + "rewards/margins": 281.7783203125, + "rewards/rejected": -1477.046875, + "rewards/weighted_accuracies": 0.6968749761581421, + "rewards/weighted_chosen": -3.792309522628784, + "rewards/weighted_margins": 0.8580077886581421, + "rewards/weighted_rejected": -4.649755954742432, + "step": 1500 + }, + { + "epoch": 0.8016567572984167, + "eval_logits/chosen": 1.0791990756988525, + "eval_logits/rejected": 1.2038549184799194, + "eval_logps/chosen": -568.4256591796875, + "eval_logps/rejected": -692.4755859375, + "eval_logps/weighted_chosen": -1.8026981353759766, + "eval_logps/weighted_rejected": -2.1537508964538574, + "eval_loss": 0.5527533888816833, + "eval_rewards/accuracies": 0.6425662040710449, + "eval_rewards/chosen": -1119.2159423828125, + "eval_rewards/margins": 307.88360595703125, + "eval_rewards/rejected": -1427.0916748046875, + "eval_rewards/weighted_accuracies": 0.7235234379768372, + "eval_rewards/weighted_chosen": -3.598057270050049, + "eval_rewards/weighted_margins": 0.8228372931480408, + "eval_rewards/weighted_rejected": -4.420894622802734, + "eval_runtime": 1093.9192, + "eval_samples_per_second": 1.793, + "eval_steps_per_second": 0.449, + "step": 1500 + } + ], + "logging_steps": 10, + "max_steps": 1872, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000..5ca5510 --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101724c70f22fcebab803e2165d08660fa3875b4ac47496a35436a812fde407f +size 8785 diff --git a/checkpoint-500/chat_template.jinja b/checkpoint-500/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/checkpoint-500/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000..ceed8ae --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/checkpoint-500/generation_config.json b/checkpoint-500/generation_config.json new file mode 100644 index 0000000..38e1b41 --- /dev/null +++ b/checkpoint-500/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128009 + ], + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.54.1" +} diff --git a/checkpoint-500/model-00001-of-00004.safetensors b/checkpoint-500/model-00001-of-00004.safetensors new file mode 100644 index 0000000..74bc104 --- /dev/null +++ b/checkpoint-500/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b160b74254fcb49d54589d4643f5b894098bd57d44c98744ba27dedb0218fad6 +size 4976698672 diff --git a/checkpoint-500/model-00002-of-00004.safetensors b/checkpoint-500/model-00002-of-00004.safetensors new file mode 100644 index 0000000..37c8a34 --- /dev/null +++ b/checkpoint-500/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979b12483a8ffc3b358825331dbe5812e15c057059d96c7427619c6907c96aaa +size 4999802720 diff --git a/checkpoint-500/model-00003-of-00004.safetensors b/checkpoint-500/model-00003-of-00004.safetensors new file mode 100644 index 0000000..4c42612 --- /dev/null +++ b/checkpoint-500/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f842e2dde8ade31eb650713241b2b13005368f8bd90f39d665551e36875fc124 +size 4915916176 diff --git a/checkpoint-500/model-00004-of-00004.safetensors b/checkpoint-500/model-00004-of-00004.safetensors new file mode 100644 index 0000000..6bdc53a --- /dev/null +++ b/checkpoint-500/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cc4af84c2589fedf4887c3ca01f15c90949e06a9c1ae25789daed3e469ac47 +size 1168138808 diff --git a/checkpoint-500/model.safetensors.index.json b/checkpoint-500/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/checkpoint-500/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000..b43be96 --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000..34d134f --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000..ccef79d --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,1127 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.26721891909947226, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0005344378381989445, + "grad_norm": 31.37949562072754, + "learning_rate": 0.0, + "logits/chosen": -0.2252655029296875, + "logits/rejected": -0.192626953125, + "logps/chosen": -110.828125, + "logps/rejected": -115.515625, + "logps/weighted_chosen": -0.31903076171875, + "logps/weighted_rejected": -0.333709716796875, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/weighted_accuracies": 0.0, + "rewards/weighted_chosen": 0.0, + "rewards/weighted_margins": 0.0, + "rewards/weighted_rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.005344378381989445, + "grad_norm": 75.76042938232422, + "learning_rate": 4.787234042553191e-08, + "logits/chosen": -0.3284708559513092, + "logits/rejected": -0.3214448392391205, + "logps/chosen": -134.46006774902344, + "logps/rejected": -138.06597900390625, + "logps/weighted_chosen": -0.3890923261642456, + "logps/weighted_rejected": -0.4067721962928772, + "loss": 0.6953, + "rewards/accuracies": 0.2951388955116272, + "rewards/chosen": -0.2072482705116272, + "rewards/margins": -0.1790364533662796, + "rewards/rejected": -0.0282118059694767, + "rewards/weighted_accuracies": 0.3472222089767456, + "rewards/weighted_chosen": -0.0032717387657612562, + "rewards/weighted_margins": -0.0047516291961073875, + "rewards/weighted_rejected": 0.0014813741436228156, + "step": 10 + }, + { + "epoch": 0.01068875676397889, + "grad_norm": 24.94420623779297, + "learning_rate": 1.0106382978723404e-07, + "logits/chosen": -0.2780090272426605, + "logits/rejected": -0.2689048647880554, + "logps/chosen": -115.3070297241211, + "logps/rejected": -114.8101577758789, + "logps/weighted_chosen": -0.354583740234375, + "logps/weighted_rejected": -0.36929017305374146, + "loss": 0.6925, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": 0.0087890625, + "rewards/margins": -0.02734375, + "rewards/rejected": 0.0361328125, + "rewards/weighted_accuracies": 0.4312500059604645, + "rewards/weighted_chosen": 0.0016719817649573088, + "rewards/weighted_margins": 0.0011638641590252519, + "rewards/weighted_rejected": 0.0005052566411904991, + "step": 20 + }, + { + "epoch": 0.016033135145968335, + "grad_norm": 26.90618133544922, + "learning_rate": 1.5425531914893615e-07, + "logits/chosen": -0.26707762479782104, + "logits/rejected": -0.2697288393974304, + "logps/chosen": -122.49687194824219, + "logps/rejected": -128.2218780517578, + "logps/weighted_chosen": -0.364663690328598, + "logps/weighted_rejected": -0.40430909395217896, + "loss": 0.6919, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": 0.099609375, + "rewards/margins": 0.2177734375, + "rewards/rejected": -0.1181640625, + "rewards/weighted_accuracies": 0.4468750059604645, + "rewards/weighted_chosen": 0.0011037830263376236, + "rewards/weighted_margins": 0.0029600143898278475, + "rewards/weighted_rejected": -0.0018524170154705644, + "step": 30 + }, + { + "epoch": 0.02137751352795778, + "grad_norm": 19.056455612182617, + "learning_rate": 2.074468085106383e-07, + "logits/chosen": -0.31552428007125854, + "logits/rejected": -0.309671014547348, + "logps/chosen": -126.5132827758789, + "logps/rejected": -127.7515640258789, + "logps/weighted_chosen": -0.3717803955078125, + "logps/weighted_rejected": -0.36720579862594604, + "loss": 0.6927, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.041015625, + "rewards/margins": 0.150390625, + "rewards/rejected": -0.109375, + "rewards/weighted_accuracies": 0.44062501192092896, + "rewards/weighted_chosen": 0.0009314537164755166, + "rewards/weighted_margins": 0.0009433746454305947, + "rewards/weighted_rejected": -1.831054760259576e-05, + "step": 40 + }, + { + "epoch": 0.026721891909947223, + "grad_norm": 94.1146469116211, + "learning_rate": 2.6063829787234044e-07, + "logits/chosen": -0.2799697816371918, + "logits/rejected": -0.2664199769496918, + "logps/chosen": -120.34375, + "logps/rejected": -120.19062805175781, + "logps/weighted_chosen": -0.367788702249527, + "logps/weighted_rejected": -0.37299805879592896, + "loss": 0.6934, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.01806640625, + "rewards/margins": -0.11865234375, + "rewards/rejected": 0.1005859375, + "rewards/weighted_accuracies": 0.421875, + "rewards/weighted_chosen": 0.0011091709602624178, + "rewards/weighted_margins": -0.0005058288807049394, + "rewards/weighted_rejected": 0.00161571498028934, + "step": 50 + }, + { + "epoch": 0.03206627029193667, + "grad_norm": 47.161922454833984, + "learning_rate": 3.1382978723404253e-07, + "logits/chosen": -0.22172394394874573, + "logits/rejected": -0.2157600373029709, + "logps/chosen": -115.8382797241211, + "logps/rejected": -118.75859069824219, + "logps/weighted_chosen": -0.36602783203125, + "logps/weighted_rejected": -0.369253545999527, + "loss": 0.6908, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.10546875, + "rewards/margins": -0.1259765625, + "rewards/rejected": 0.0205078125, + "rewards/weighted_accuracies": 0.4000000059604645, + "rewards/weighted_chosen": 0.008862781338393688, + "rewards/weighted_margins": 0.00811080913990736, + "rewards/weighted_rejected": 0.0007405281066894531, + "step": 60 + }, + { + "epoch": 0.037410648673926114, + "grad_norm": 42.38877868652344, + "learning_rate": 3.6702127659574467e-07, + "logits/chosen": -0.299722284078598, + "logits/rejected": -0.29665374755859375, + "logps/chosen": -114.1656265258789, + "logps/rejected": -118.0765609741211, + "logps/weighted_chosen": -0.35313719511032104, + "logps/weighted_rejected": -0.3739013671875, + "loss": 0.6949, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0693359375, + "rewards/margins": -0.0458984375, + "rewards/rejected": -0.0234375, + "rewards/weighted_accuracies": 0.390625, + "rewards/weighted_chosen": 0.0011390686267986894, + "rewards/weighted_margins": -0.0028884888160973787, + "rewards/weighted_rejected": 0.0040260315872728825, + "step": 70 + }, + { + "epoch": 0.04275502705591556, + "grad_norm": 35.451927185058594, + "learning_rate": 4.202127659574468e-07, + "logits/chosen": -0.30262452363967896, + "logits/rejected": -0.24024733901023865, + "logps/chosen": -112.3812484741211, + "logps/rejected": -111.8375015258789, + "logps/weighted_chosen": -0.37481385469436646, + "logps/weighted_rejected": -0.38435667753219604, + "loss": 0.694, + "rewards/accuracies": 0.390625, + "rewards/chosen": -0.1552734375, + "rewards/margins": -0.0341796875, + "rewards/rejected": -0.12109375, + "rewards/weighted_accuracies": 0.48750001192092896, + "rewards/weighted_chosen": 0.01006317138671875, + "rewards/weighted_margins": 0.0036018372047692537, + "rewards/weighted_rejected": 0.006462156772613525, + "step": 80 + }, + { + "epoch": 0.048099405437905, + "grad_norm": 42.264678955078125, + "learning_rate": 4.734042553191489e-07, + "logits/chosen": -0.2891853451728821, + "logits/rejected": -0.23835448920726776, + "logps/chosen": -118.0296859741211, + "logps/rejected": -116.484375, + "logps/weighted_chosen": -0.376077264547348, + "logps/weighted_rejected": -0.3848114013671875, + "loss": 0.6935, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": 0.0458984375, + "rewards/margins": 0.267578125, + "rewards/rejected": -0.2216796875, + "rewards/weighted_accuracies": 0.46562498807907104, + "rewards/weighted_chosen": 0.016840171068906784, + "rewards/weighted_margins": 0.014923477545380592, + "rewards/weighted_rejected": 0.00187511439435184, + "step": 90 + }, + { + "epoch": 0.053443783819894446, + "grad_norm": 36.87267303466797, + "learning_rate": 5.26595744680851e-07, + "logits/chosen": -0.3333755433559418, + "logits/rejected": -0.28821104764938354, + "logps/chosen": -124.0367202758789, + "logps/rejected": -124.33906555175781, + "logps/weighted_chosen": -0.39268797636032104, + "logps/weighted_rejected": -0.4093261659145355, + "loss": 0.6875, + "rewards/accuracies": 0.44062501192092896, + "rewards/chosen": -0.0302734375, + "rewards/margins": 0.36835938692092896, + "rewards/rejected": -0.39863282442092896, + "rewards/weighted_accuracies": 0.484375, + "rewards/weighted_chosen": 0.012582575902342796, + "rewards/weighted_margins": 0.026942063122987747, + "rewards/weighted_rejected": -0.014329910278320312, + "step": 100 + }, + { + "epoch": 0.058788162201883896, + "grad_norm": 17.8848876953125, + "learning_rate": 5.797872340425531e-07, + "logits/chosen": -0.3335327208042145, + "logits/rejected": -0.32384032011032104, + "logps/chosen": -117.6968765258789, + "logps/rejected": -119.85859680175781, + "logps/weighted_chosen": -0.35866087675094604, + "logps/weighted_rejected": -0.37585145235061646, + "loss": 0.7015, + "rewards/accuracies": 0.3968749940395355, + "rewards/chosen": -0.524609386920929, + "rewards/margins": -0.01328125037252903, + "rewards/rejected": -0.511523425579071, + "rewards/weighted_accuracies": 0.4281249940395355, + "rewards/weighted_chosen": -0.0057319640181958675, + "rewards/weighted_margins": 0.0012493133544921875, + "rewards/weighted_rejected": -0.0070056915283203125, + "step": 110 + }, + { + "epoch": 0.06413254058387334, + "grad_norm": 37.190059661865234, + "learning_rate": 6.329787234042553e-07, + "logits/chosen": -0.29607391357421875, + "logits/rejected": -0.2735137939453125, + "logps/chosen": -120.81718444824219, + "logps/rejected": -127.04219055175781, + "logps/weighted_chosen": -0.41831666231155396, + "logps/weighted_rejected": -0.42036741971969604, + "loss": 0.7443, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.43964844942092896, + "rewards/margins": 0.690234363079071, + "rewards/rejected": -1.1298828125, + "rewards/weighted_accuracies": 0.45625001192092896, + "rewards/weighted_chosen": -0.07387389987707138, + "rewards/weighted_margins": -0.04258232191205025, + "rewards/weighted_rejected": -0.031409453600645065, + "step": 120 + }, + { + "epoch": 0.06947691896586278, + "grad_norm": 22.76742172241211, + "learning_rate": 6.861702127659574e-07, + "logits/chosen": -0.30709609389305115, + "logits/rejected": -0.29381561279296875, + "logps/chosen": -120.3140640258789, + "logps/rejected": -121.73515319824219, + "logps/weighted_chosen": -0.38916015625, + "logps/weighted_rejected": -0.38067322969436646, + "loss": 0.6906, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.8837890625, + "rewards/margins": 0.4273437559604645, + "rewards/rejected": -1.310937523841858, + "rewards/weighted_accuracies": 0.46875, + "rewards/weighted_chosen": -0.0019147873390465975, + "rewards/weighted_margins": 0.025261688977479935, + "rewards/weighted_rejected": -0.027143806219100952, + "step": 130 + }, + { + "epoch": 0.07482129734785223, + "grad_norm": 25.72498321533203, + "learning_rate": 7.393617021276596e-07, + "logits/chosen": -0.2590804994106293, + "logits/rejected": -0.25146180391311646, + "logps/chosen": -114.2992172241211, + "logps/rejected": -118.26094055175781, + "logps/weighted_chosen": -0.376434326171875, + "logps/weighted_rejected": -0.39961546659469604, + "loss": 0.6968, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": -1.7428710460662842, + "rewards/margins": 1.043554663658142, + "rewards/rejected": -2.7867188453674316, + "rewards/weighted_accuracies": 0.46562498807907104, + "rewards/weighted_chosen": -0.01796722412109375, + "rewards/weighted_margins": 0.044054411351680756, + "rewards/weighted_rejected": -0.062059782445430756, + "step": 140 + }, + { + "epoch": 0.08016567572984168, + "grad_norm": 27.819217681884766, + "learning_rate": 7.925531914893616e-07, + "logits/chosen": -0.260824590921402, + "logits/rejected": -0.2456924468278885, + "logps/chosen": -118.3109359741211, + "logps/rejected": -116.1898422241211, + "logps/weighted_chosen": -0.3584175109863281, + "logps/weighted_rejected": -0.389230340719223, + "loss": 0.693, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -2.796679735183716, + "rewards/margins": 1.470312476158142, + "rewards/rejected": -4.267773628234863, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": 0.02875671349465847, + "rewards/weighted_margins": 0.08138389885425568, + "rewards/weighted_rejected": -0.05276889726519585, + "step": 150 + }, + { + "epoch": 0.08551005411183112, + "grad_norm": 20.792280197143555, + "learning_rate": 8.457446808510637e-07, + "logits/chosen": -0.27181702852249146, + "logits/rejected": -0.26198044419288635, + "logps/chosen": -119.7906265258789, + "logps/rejected": -120.09687805175781, + "logps/weighted_chosen": -0.3836608827114105, + "logps/weighted_rejected": -0.40611571073532104, + "loss": 0.6795, + "rewards/accuracies": 0.503125011920929, + "rewards/chosen": -3.6748046875, + "rewards/margins": 1.8689453601837158, + "rewards/rejected": -5.542382717132568, + "rewards/weighted_accuracies": 0.543749988079071, + "rewards/weighted_chosen": -0.013747024349868298, + "rewards/weighted_margins": 0.10787200927734375, + "rewards/weighted_rejected": -0.12159118801355362, + "step": 160 + }, + { + "epoch": 0.09085443249382057, + "grad_norm": 28.162086486816406, + "learning_rate": 8.989361702127659e-07, + "logits/chosen": -0.30354803800582886, + "logits/rejected": -0.28291016817092896, + "logps/chosen": -118.81172180175781, + "logps/rejected": -123.3851547241211, + "logps/weighted_chosen": -0.364785760641098, + "logps/weighted_rejected": -0.40638428926467896, + "loss": 0.7104, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -4.450781345367432, + "rewards/margins": 1.540624976158142, + "rewards/rejected": -5.989843845367432, + "rewards/weighted_accuracies": 0.49687498807907104, + "rewards/weighted_chosen": -0.05272483825683594, + "rewards/weighted_margins": 0.03959999233484268, + "rewards/weighted_rejected": -0.0922950729727745, + "step": 170 + }, + { + "epoch": 0.09619881087581, + "grad_norm": 62.7450065612793, + "learning_rate": 9.52127659574468e-07, + "logits/chosen": -0.3086685240268707, + "logits/rejected": -0.29756468534469604, + "logps/chosen": -120.9000015258789, + "logps/rejected": -121.6031265258789, + "logps/weighted_chosen": -0.4021057188510895, + "logps/weighted_rejected": -0.43016356229782104, + "loss": 0.6902, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -5.364843845367432, + "rewards/margins": 0.95654296875, + "rewards/rejected": -6.323437690734863, + "rewards/weighted_accuracies": 0.515625, + "rewards/weighted_chosen": -0.07762374728918076, + "rewards/weighted_margins": 0.042090605944395065, + "rewards/weighted_rejected": -0.11983337253332138, + "step": 180 + }, + { + "epoch": 0.10154318925779945, + "grad_norm": 107.93684387207031, + "learning_rate": 9.99999129927477e-07, + "logits/chosen": -0.2998809814453125, + "logits/rejected": -0.2729034423828125, + "logps/chosen": -121.68046569824219, + "logps/rejected": -122.90547180175781, + "logps/weighted_chosen": -0.4000488221645355, + "logps/weighted_rejected": -0.49153441190719604, + "loss": 0.6846, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": -7.084570407867432, + "rewards/margins": 1.763671875, + "rewards/rejected": -8.850390434265137, + "rewards/weighted_accuracies": 0.5625, + "rewards/weighted_chosen": -0.06708984076976776, + "rewards/weighted_margins": 0.19685058295726776, + "rewards/weighted_rejected": -0.2636512815952301, + "step": 190 + }, + { + "epoch": 0.10688756763978889, + "grad_norm": 42.62810516357422, + "learning_rate": 9.99894724888679e-07, + "logits/chosen": -0.2994216978549957, + "logits/rejected": -0.2775813937187195, + "logps/chosen": -129.10311889648438, + "logps/rejected": -127.71015930175781, + "logps/weighted_chosen": -0.40317994356155396, + "logps/weighted_rejected": -0.4718689024448395, + "loss": 0.7052, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -8.422460556030273, + "rewards/margins": 1.215234398841858, + "rewards/rejected": -9.638280868530273, + "rewards/weighted_accuracies": 0.518750011920929, + "rewards/weighted_chosen": -0.10227356106042862, + "rewards/weighted_margins": 0.08837012946605682, + "rewards/weighted_rejected": -0.19074249267578125, + "step": 200 + }, + { + "epoch": 0.11223194602177834, + "grad_norm": 56.36786651611328, + "learning_rate": 9.996163469793475e-07, + "logits/chosen": -0.3200393617153168, + "logits/rejected": -0.28080445528030396, + "logps/chosen": -127.65625, + "logps/rejected": -122.5406265258789, + "logps/weighted_chosen": -0.402487188577652, + "logps/weighted_rejected": -0.4784179627895355, + "loss": 0.6855, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -9.696874618530273, + "rewards/margins": 3.138671875, + "rewards/rejected": -12.8359375, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.07228164374828339, + "rewards/weighted_margins": 0.1660926789045334, + "rewards/weighted_rejected": -0.23847046494483948, + "step": 210 + }, + { + "epoch": 0.11757632440376779, + "grad_norm": 24.45851707458496, + "learning_rate": 9.991640930802883e-07, + "logits/chosen": -0.30699461698532104, + "logits/rejected": -0.3066558837890625, + "logps/chosen": -125.8734359741211, + "logps/rejected": -129.6999969482422, + "logps/weighted_chosen": -0.41710203886032104, + "logps/weighted_rejected": -0.4827117919921875, + "loss": 0.6884, + "rewards/accuracies": 0.546875, + "rewards/chosen": -12.277539253234863, + "rewards/margins": 2.571484327316284, + "rewards/rejected": -14.851171493530273, + "rewards/weighted_accuracies": 0.5218750238418579, + "rewards/weighted_chosen": -0.12371826171875, + "rewards/weighted_margins": 0.1390731781721115, + "rewards/weighted_rejected": -0.26273268461227417, + "step": 220 + }, + { + "epoch": 0.12292070278575723, + "grad_norm": 27.215944290161133, + "learning_rate": 9.98538120584459e-07, + "logits/chosen": -0.3107505738735199, + "logits/rejected": -0.283193975687027, + "logps/chosen": -134.8015594482422, + "logps/rejected": -138.5890655517578, + "logps/weighted_chosen": -0.4462524354457855, + "logps/weighted_rejected": -0.511853039264679, + "loss": 0.6939, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -15.346875190734863, + "rewards/margins": 2.830273389816284, + "rewards/rejected": -18.179492950439453, + "rewards/weighted_accuracies": 0.5562499761581421, + "rewards/weighted_chosen": -0.1530204713344574, + "rewards/weighted_margins": 0.1789344847202301, + "rewards/weighted_rejected": -0.3320491909980774, + "step": 230 + }, + { + "epoch": 0.12826508116774668, + "grad_norm": 17.173702239990234, + "learning_rate": 9.977386473421917e-07, + "logits/chosen": -0.27986279129981995, + "logits/rejected": -0.2772073745727539, + "logps/chosen": -121.0453109741211, + "logps/rejected": -124.65312194824219, + "logps/weighted_chosen": -0.4528869688510895, + "logps/weighted_rejected": -0.501416027545929, + "loss": 0.7222, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -18.59375, + "rewards/margins": 2.5132813453674316, + "rewards/rejected": -21.106054306030273, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.2343955934047699, + "rewards/weighted_margins": 0.07334060966968536, + "rewards/weighted_rejected": -0.30777662992477417, + "step": 240 + }, + { + "epoch": 0.13360945954973613, + "grad_norm": 24.67556381225586, + "learning_rate": 9.96765951585378e-07, + "logits/chosen": -0.262664794921875, + "logits/rejected": -0.24544373154640198, + "logps/chosen": -121.7421875, + "logps/rejected": -126.42500305175781, + "logps/weighted_chosen": -0.46705931425094604, + "logps/weighted_rejected": -0.547576904296875, + "loss": 0.6683, + "rewards/accuracies": 0.5625, + "rewards/chosen": -21.9228515625, + "rewards/margins": 2.5238280296325684, + "rewards/rejected": -24.447071075439453, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.2455238401889801, + "rewards/weighted_margins": 0.1756332367658615, + "rewards/weighted_rejected": -0.4210983216762543, + "step": 250 + }, + { + "epoch": 0.13895383793172555, + "grad_norm": 33.55352783203125, + "learning_rate": 9.956203718306388e-07, + "logits/chosen": -0.18781813979148865, + "logits/rejected": -0.156982421875, + "logps/chosen": -127.8578109741211, + "logps/rejected": -132.35546875, + "logps/weighted_chosen": -0.4968322813510895, + "logps/weighted_rejected": -0.5148254632949829, + "loss": 0.7213, + "rewards/accuracies": 0.5531250238418579, + "rewards/chosen": -26.109960556030273, + "rewards/margins": 2.660937547683716, + "rewards/rejected": -28.761327743530273, + "rewards/weighted_accuracies": 0.534375011920929, + "rewards/weighted_chosen": -0.29717254638671875, + "rewards/weighted_margins": 0.04170074313879013, + "rewards/weighted_rejected": -0.3386779725551605, + "step": 260 + }, + { + "epoch": 0.144298216313715, + "grad_norm": 242.56521606445312, + "learning_rate": 9.943023067615136e-07, + "logits/chosen": -0.17297974228858948, + "logits/rejected": -0.1584724485874176, + "logps/chosen": -139.23046875, + "logps/rejected": -142.3390655517578, + "logps/weighted_chosen": -0.48270875215530396, + "logps/weighted_rejected": -0.560772716999054, + "loss": 0.7059, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -33.55195236206055, + "rewards/margins": 3.075976610183716, + "rewards/rejected": -36.6328125, + "rewards/weighted_accuracies": 0.4937500059604645, + "rewards/weighted_chosen": -0.321615606546402, + "rewards/weighted_margins": 0.13364562392234802, + "rewards/weighted_rejected": -0.45517730712890625, + "step": 270 + }, + { + "epoch": 0.14964259469570446, + "grad_norm": 16.8142147064209, + "learning_rate": 9.928122150897112e-07, + "logits/chosen": -0.21183013916015625, + "logits/rejected": -0.168986514210701, + "logps/chosen": -130.66250610351562, + "logps/rejected": -132.0812530517578, + "logps/weighted_chosen": -0.48672789335250854, + "logps/weighted_rejected": -0.578961193561554, + "loss": 0.6614, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -34.4287109375, + "rewards/margins": 3.7144532203674316, + "rewards/rejected": -38.127342224121094, + "rewards/weighted_accuracies": 0.5687500238418579, + "rewards/weighted_chosen": -0.28246229887008667, + "rewards/weighted_margins": 0.23281364142894745, + "rewards/weighted_rejected": -0.5149310827255249, + "step": 280 + }, + { + "epoch": 0.1549869730776939, + "grad_norm": 17.110448837280273, + "learning_rate": 9.9115061539547e-07, + "logits/chosen": -0.20588979125022888, + "logits/rejected": -0.18258285522460938, + "logps/chosen": -141.0203094482422, + "logps/rejected": -142.50625610351562, + "logps/weighted_chosen": -0.4897003173828125, + "logps/weighted_rejected": -0.5554351806640625, + "loss": 0.7051, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -38.395896911621094, + "rewards/margins": 3.002734422683716, + "rewards/rejected": -41.394920349121094, + "rewards/weighted_accuracies": 0.5406249761581421, + "rewards/weighted_chosen": -0.3098343014717102, + "rewards/weighted_margins": 0.1397857666015625, + "rewards/weighted_rejected": -0.4498863220214844, + "step": 290 + }, + { + "epoch": 0.16033135145968336, + "grad_norm": 21.960878372192383, + "learning_rate": 9.893180859470818e-07, + "logits/chosen": -0.1905662566423416, + "logits/rejected": -0.16956177353858948, + "logps/chosen": -131.609375, + "logps/rejected": -133.328125, + "logps/weighted_chosen": -0.5007995367050171, + "logps/weighted_rejected": -0.5999816656112671, + "loss": 0.6676, + "rewards/accuracies": 0.559374988079071, + "rewards/chosen": -38.5810546875, + "rewards/margins": 5.082226753234863, + "rewards/rejected": -43.662498474121094, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.27146607637405396, + "rewards/weighted_margins": 0.2570602297782898, + "rewards/weighted_rejected": -0.5287536382675171, + "step": 300 + }, + { + "epoch": 0.16567572984167278, + "grad_norm": 45.954952239990234, + "learning_rate": 9.873152644996424e-07, + "logits/chosen": -0.23566055297851562, + "logits/rejected": -0.23574523627758026, + "logps/chosen": -134.5734405517578, + "logps/rejected": -136.5500030517578, + "logps/weighted_chosen": -0.565338134765625, + "logps/weighted_rejected": -0.620849609375, + "loss": 0.7314, + "rewards/accuracies": 0.59375, + "rewards/chosen": -44.349021911621094, + "rewards/margins": 6.559765815734863, + "rewards/rejected": -50.908592224121094, + "rewards/weighted_accuracies": 0.606249988079071, + "rewards/weighted_chosen": -0.44298553466796875, + "rewards/weighted_margins": 0.1432647705078125, + "rewards/weighted_rejected": -0.5857940912246704, + "step": 310 + }, + { + "epoch": 0.17102010822366223, + "grad_norm": 22.280086517333984, + "learning_rate": 9.85142848073103e-07, + "logits/chosen": -0.2385093718767166, + "logits/rejected": -0.21721191704273224, + "logps/chosen": -138.27499389648438, + "logps/rejected": -138.13516235351562, + "logps/weighted_chosen": -0.5242675542831421, + "logps/weighted_rejected": -0.591705322265625, + "loss": 0.6903, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -48.820899963378906, + "rewards/margins": 1.562109351158142, + "rewards/rejected": -50.394920349121094, + "rewards/weighted_accuracies": 0.5718749761581421, + "rewards/weighted_chosen": -0.3802032470703125, + "rewards/weighted_margins": 0.14343567192554474, + "rewards/weighted_rejected": -0.523608386516571, + "step": 320 + }, + { + "epoch": 0.17636448660565168, + "grad_norm": 19.125673294067383, + "learning_rate": 9.828015927096914e-07, + "logits/chosen": -0.2693939208984375, + "logits/rejected": -0.23780974745750427, + "logps/chosen": -148.56875610351562, + "logps/rejected": -147.24453735351562, + "logps/weighted_chosen": -0.521636962890625, + "logps/weighted_rejected": -0.5547729730606079, + "loss": 0.6703, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -50.613868713378906, + "rewards/margins": 3.5126953125, + "rewards/rejected": -54.113670349121094, + "rewards/weighted_accuracies": 0.550000011920929, + "rewards/weighted_chosen": -0.321145623922348, + "rewards/weighted_margins": 0.18524780869483948, + "rewards/weighted_rejected": -0.5064395666122437, + "step": 330 + }, + { + "epoch": 0.18170886498764113, + "grad_norm": 14.343570709228516, + "learning_rate": 9.802923132107968e-07, + "logits/chosen": -0.25108033418655396, + "logits/rejected": -0.2313240021467209, + "logps/chosen": -149.78671264648438, + "logps/rejected": -153.48046875, + "logps/weighted_chosen": -0.562329113483429, + "logps/weighted_rejected": -0.583233654499054, + "loss": 0.741, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -58.06855392456055, + "rewards/margins": 4.334374904632568, + "rewards/rejected": -62.40312576293945, + "rewards/weighted_accuracies": 0.512499988079071, + "rewards/weighted_chosen": -0.47095948457717896, + "rewards/weighted_margins": 0.02723388746380806, + "rewards/weighted_rejected": -0.498382568359375, + "step": 340 + }, + { + "epoch": 0.18705324336963056, + "grad_norm": 66.20745849609375, + "learning_rate": 9.776158828534024e-07, + "logits/chosen": -0.2837265133857727, + "logits/rejected": -0.2537124752998352, + "logps/chosen": -149.5359344482422, + "logps/rejected": -194.4765625, + "logps/weighted_chosen": -0.540771484375, + "logps/weighted_rejected": -0.589801013469696, + "loss": 0.6779, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -59.75273513793945, + "rewards/margins": 7.256640434265137, + "rewards/rejected": -66.99531555175781, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.4245468080043793, + "rewards/weighted_margins": 0.146717831492424, + "rewards/weighted_rejected": -0.571148693561554, + "step": 350 + }, + { + "epoch": 0.19239762175162, + "grad_norm": 13.690327644348145, + "learning_rate": 9.747732330861695e-07, + "logits/chosen": -0.18397827446460724, + "logits/rejected": -0.15533828735351562, + "logps/chosen": -143.4562530517578, + "logps/rejected": -145.44686889648438, + "logps/weighted_chosen": -0.5499817132949829, + "logps/weighted_rejected": -0.6820312738418579, + "loss": 0.6644, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -63.58203125, + "rewards/margins": 7.447851657867432, + "rewards/rejected": -71.0308609008789, + "rewards/weighted_accuracies": 0.6031249761581421, + "rewards/weighted_chosen": -0.43936461210250854, + "rewards/weighted_margins": 0.3239502012729645, + "rewards/weighted_rejected": -0.7634918093681335, + "step": 360 + }, + { + "epoch": 0.19774200013360946, + "grad_norm": 20.364688873291016, + "learning_rate": 9.717653532052742e-07, + "logits/chosen": -0.16991272568702698, + "logits/rejected": -0.16076354682445526, + "logps/chosen": -137.11172485351562, + "logps/rejected": -146.09375, + "logps/weighted_chosen": -0.609234631061554, + "logps/weighted_rejected": -0.658831775188446, + "loss": 0.7108, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -63.02363204956055, + "rewards/margins": 9.1494140625, + "rewards/rejected": -72.1488265991211, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.5135604739189148, + "rewards/weighted_margins": 0.1833236664533615, + "rewards/weighted_rejected": -0.6966766119003296, + "step": 370 + }, + { + "epoch": 0.2030863785155989, + "grad_norm": 15.459892272949219, + "learning_rate": 9.685932900101146e-07, + "logits/chosen": -0.17396697402000427, + "logits/rejected": -0.1525276154279709, + "logps/chosen": -143.62655639648438, + "logps/rejected": -147.03515625, + "logps/weighted_chosen": -0.584460437297821, + "logps/weighted_rejected": -0.642120361328125, + "loss": 0.7159, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -68.6537094116211, + "rewards/margins": 5.928124904632568, + "rewards/rejected": -74.57109069824219, + "rewards/weighted_accuracies": 0.59375, + "rewards/weighted_chosen": -0.528277575969696, + "rewards/weighted_margins": 0.11352996528148651, + "rewards/weighted_rejected": -0.6417190432548523, + "step": 380 + }, + { + "epoch": 0.20843075689758836, + "grad_norm": 25.710723876953125, + "learning_rate": 9.652581474390043e-07, + "logits/chosen": -0.17167052626609802, + "logits/rejected": -0.14335784316062927, + "logps/chosen": -147.3156280517578, + "logps/rejected": -151.1484375, + "logps/weighted_chosen": -0.5953735113143921, + "logps/weighted_rejected": -0.649151623249054, + "loss": 0.7014, + "rewards/accuracies": 0.59375, + "rewards/chosen": -70.93769836425781, + "rewards/margins": 9.441015243530273, + "rewards/rejected": -80.35664367675781, + "rewards/weighted_accuracies": 0.612500011920929, + "rewards/weighted_chosen": -0.5777953863143921, + "rewards/weighted_margins": 0.10624237358570099, + "rewards/weighted_rejected": -0.6836212277412415, + "step": 390 + }, + { + "epoch": 0.21377513527957778, + "grad_norm": 42.97126007080078, + "learning_rate": 9.61761086184981e-07, + "logits/chosen": -0.192851260304451, + "logits/rejected": -0.16070251166820526, + "logps/chosen": -148.39688110351562, + "logps/rejected": -149.6046905517578, + "logps/weighted_chosen": -0.6315368413925171, + "logps/weighted_rejected": -0.7087768316268921, + "loss": 0.7113, + "rewards/accuracies": 0.565625011920929, + "rewards/chosen": -73.91288757324219, + "rewards/margins": 6.164453029632568, + "rewards/rejected": -80.05976867675781, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.6400848627090454, + "rewards/weighted_margins": 0.11194305121898651, + "rewards/weighted_rejected": -0.752105712890625, + "step": 400 + }, + { + "epoch": 0.21911951366156723, + "grad_norm": 17.99481773376465, + "learning_rate": 9.581033232918629e-07, + "logits/chosen": -0.14135894179344177, + "logits/rejected": -0.11229457706212997, + "logps/chosen": -145.88827514648438, + "logps/rejected": -149.74063110351562, + "logps/weighted_chosen": -0.6018310785293579, + "logps/weighted_rejected": -0.7620849609375, + "loss": 0.6764, + "rewards/accuracies": 0.6031249761581421, + "rewards/chosen": -74.9345703125, + "rewards/margins": 8.443944931030273, + "rewards/rejected": -83.36601257324219, + "rewards/weighted_accuracies": 0.578125, + "rewards/weighted_chosen": -0.578625500202179, + "rewards/weighted_margins": 0.35536497831344604, + "rewards/weighted_rejected": -0.9342681765556335, + "step": 410 + }, + { + "epoch": 0.22446389204355668, + "grad_norm": 19.236024856567383, + "learning_rate": 9.542861317306952e-07, + "logits/chosen": -0.1445457488298416, + "logits/rejected": -0.1324237883090973, + "logps/chosen": -148.76718139648438, + "logps/rejected": -150.97811889648438, + "logps/weighted_chosen": -0.5991576910018921, + "logps/weighted_rejected": -0.674072265625, + "loss": 0.6735, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -80.28242492675781, + "rewards/margins": 4.427929878234863, + "rewards/rejected": -84.7109375, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.543652355670929, + "rewards/weighted_margins": 0.184315487742424, + "rewards/weighted_rejected": -0.727569580078125, + "step": 420 + }, + { + "epoch": 0.22980827042554614, + "grad_norm": 14.300553321838379, + "learning_rate": 9.503108399567308e-07, + "logits/chosen": -0.14830398559570312, + "logits/rejected": -0.09484557807445526, + "logps/chosen": -162.40625, + "logps/rejected": -167.7195281982422, + "logps/weighted_chosen": -0.5840820074081421, + "logps/weighted_rejected": -0.647021472454071, + "loss": 0.672, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -89.32051086425781, + "rewards/margins": 8.4365234375, + "rewards/rejected": -97.79023742675781, + "rewards/weighted_accuracies": 0.596875011920929, + "rewards/weighted_chosen": -0.545318603515625, + "rewards/weighted_margins": 0.13585510849952698, + "rewards/weighted_rejected": -0.681243896484375, + "step": 430 + }, + { + "epoch": 0.23515264880753559, + "grad_norm": 21.087541580200195, + "learning_rate": 9.461788314471034e-07, + "logits/chosen": -0.10236664116382599, + "logits/rejected": -0.05696678161621094, + "logps/chosen": -155.13827514648438, + "logps/rejected": -159.828125, + "logps/weighted_chosen": -0.6568237543106079, + "logps/weighted_rejected": -0.7309814691543579, + "loss": 0.6898, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -92.36328125, + "rewards/margins": 7.942968845367432, + "rewards/rejected": -100.3109359741211, + "rewards/weighted_accuracies": 0.559374988079071, + "rewards/weighted_chosen": -0.6760101318359375, + "rewards/weighted_margins": 0.16096191108226776, + "rewards/weighted_rejected": -0.836883544921875, + "step": 440 + }, + { + "epoch": 0.240497027189525, + "grad_norm": 19.655607223510742, + "learning_rate": 9.418915442193509e-07, + "logits/chosen": -0.10150299221277237, + "logits/rejected": -0.05734825134277344, + "logps/chosen": -152.95858764648438, + "logps/rejected": -165.30078125, + "logps/weighted_chosen": -0.6426635980606079, + "logps/weighted_rejected": -0.69622802734375, + "loss": 0.7073, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -89.14433288574219, + "rewards/margins": 14.668359756469727, + "rewards/rejected": -103.8412094116211, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.694122314453125, + "rewards/weighted_margins": 0.11862488090991974, + "rewards/weighted_rejected": -0.8125030398368835, + "step": 450 + }, + { + "epoch": 0.24584140557151446, + "grad_norm": 35.64816665649414, + "learning_rate": 9.374504703309579e-07, + "logits/chosen": -0.1730697602033615, + "logits/rejected": -0.1376514434814453, + "logps/chosen": -149.97811889648438, + "logps/rejected": -152.8171844482422, + "logps/weighted_chosen": -0.638531506061554, + "logps/weighted_rejected": -0.77386474609375, + "loss": 0.6563, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -86.3251953125, + "rewards/margins": 8.056055068969727, + "rewards/rejected": -94.4281234741211, + "rewards/weighted_accuracies": 0.5625, + "rewards/weighted_chosen": -0.6749175786972046, + "rewards/weighted_margins": 0.30900877714157104, + "rewards/weighted_rejected": -0.983563244342804, + "step": 460 + }, + { + "epoch": 0.2511857839535039, + "grad_norm": 20.12373161315918, + "learning_rate": 9.328571553600915e-07, + "logits/chosen": -0.14519290626049042, + "logits/rejected": -0.11081619560718536, + "logps/chosen": -154.73828125, + "logps/rejected": -158.703125, + "logps/weighted_chosen": -0.656390368938446, + "logps/weighted_rejected": -0.7307983636856079, + "loss": 0.7214, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -92.1263656616211, + "rewards/margins": 7.731054782867432, + "rewards/rejected": -99.86836242675781, + "rewards/weighted_accuracies": 0.565625011920929, + "rewards/weighted_chosen": -0.6984283328056335, + "rewards/weighted_margins": 0.14166870713233948, + "rewards/weighted_rejected": -0.8403259515762329, + "step": 470 + }, + { + "epoch": 0.25653016233549336, + "grad_norm": 19.16153907775879, + "learning_rate": 9.281131978677106e-07, + "logits/chosen": -0.1819503754377365, + "logits/rejected": -0.14701232314109802, + "logps/chosen": -160.57968139648438, + "logps/rejected": -164.2414093017578, + "logps/weighted_chosen": -0.609344482421875, + "logps/weighted_rejected": -0.750903308391571, + "loss": 0.6525, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -93.19140625, + "rewards/margins": 9.908788681030273, + "rewards/rejected": -103.0589828491211, + "rewards/weighted_accuracies": 0.590624988079071, + "rewards/weighted_chosen": -0.6482604742050171, + "rewards/weighted_margins": 0.2539626955986023, + "rewards/weighted_rejected": -0.9027160406112671, + "step": 480 + }, + { + "epoch": 0.2618745407174828, + "grad_norm": 47.42090606689453, + "learning_rate": 9.232202488412361e-07, + "logits/chosen": -0.18560639023780823, + "logits/rejected": -0.152149960398674, + "logps/chosen": -150.15078735351562, + "logps/rejected": -156.22109985351562, + "logps/weighted_chosen": -0.6249145269393921, + "logps/weighted_rejected": -0.755505383014679, + "loss": 0.6793, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -89.3853530883789, + "rewards/margins": 8.602734565734863, + "rewards/rejected": -97.9830093383789, + "rewards/weighted_accuracies": 0.5531250238418579, + "rewards/weighted_chosen": -0.6580413579940796, + "rewards/weighted_margins": 0.2508483827114105, + "rewards/weighted_rejected": -0.909197986125946, + "step": 490 + }, + { + "epoch": 0.26721891909947226, + "grad_norm": 21.92582893371582, + "learning_rate": 9.181800111199766e-07, + "logits/chosen": -0.2139892578125, + "logits/rejected": -0.185211181640625, + "logps/chosen": -153.05624389648438, + "logps/rejected": -155.6671905517578, + "logps/weighted_chosen": -0.660186767578125, + "logps/weighted_rejected": -0.740765392780304, + "loss": 0.6631, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -91.49101257324219, + "rewards/margins": 12.721094131469727, + "rewards/rejected": -104.1937484741211, + "rewards/weighted_accuracies": 0.6156250238418579, + "rewards/weighted_chosen": -0.679455578327179, + "rewards/weighted_margins": 0.2384185791015625, + "rewards/weighted_rejected": -0.9178100824356079, + "step": 500 + }, + { + "epoch": 0.26721891909947226, + "eval_logits/chosen": -0.27163267135620117, + "eval_logits/rejected": -0.24348750710487366, + "eval_logps/chosen": -159.72760009765625, + "eval_logps/rejected": -165.05091857910156, + "eval_logps/weighted_chosen": -0.6352449059486389, + "eval_logps/weighted_rejected": -0.7405111789703369, + "eval_loss": 0.6863088607788086, + "eval_rewards/accuracies": 0.5992871522903442, + "eval_rewards/chosen": -97.6285629272461, + "eval_rewards/margins": 10.883528709411621, + "eval_rewards/rejected": -108.50712585449219, + "eval_rewards/weighted_accuracies": 0.5972505211830139, + "eval_rewards/weighted_chosen": -0.6794247031211853, + "eval_rewards/weighted_margins": 0.20837070047855377, + "eval_rewards/weighted_rejected": -0.8877954483032227, + "eval_runtime": 1137.126, + "eval_samples_per_second": 1.725, + "eval_steps_per_second": 0.432, + "step": 500 + } + ], + "logging_steps": 10, + "max_steps": 1872, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000..5ca5510 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101724c70f22fcebab803e2165d08660fa3875b4ac47496a35436a812fde407f +size 8785 diff --git a/config.json b/config.json new file mode 100644 index 0000000..ceed8ae --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..c979d69 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979651e84d387b127aaac8d3d6de0236f213113b1fc30b7e69b594628862814d +size 4976698672 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..9ba7240 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:484227fd6bd9b830cf0f05a2b06357eeadf17947dcc8790a996733ade3452e23 +size 4999802720 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..a32f104 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34b95d45e13679fe9991d579491110cb9991c0878a1cdd9d5ce06c9a50c927f +size 4915916176 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..205d37f --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e463167fe1c8b6423c2efda6efe92fc52496d6ffa06fa2f708e5ffdf845f1959 +size 1168138808 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..58a7ef4 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 266240, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..b43be96 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..34d134f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..5ca5510 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101724c70f22fcebab803e2165d08660fa3875b4ac47496a35436a812fde407f +size 8785