commit 29466a5cfb0d95aac5c2f606acf1a075281d891a Author: ModelHub XC Date: Sun Apr 26 20:54:46 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: ericflo/Llama-3.2-3B-COT Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..08be9c7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,42 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +Llama-3.2-3B-COT-F32.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-3B-COT-BF16.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-3B-COT-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-3B-COT-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-3B-COT-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Llama-3.2-3B-COT-BF16.gguf b/Llama-3.2-3B-COT-BF16.gguf new file mode 100644 index 0000000..b533dc9 --- /dev/null +++ b/Llama-3.2-3B-COT-BF16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6fe2f09a957a7fc769eabe3c93a22df4336910e45884eb290d3f9c1c05264a9 +size 6433689184 diff --git a/Llama-3.2-3B-COT-F32.gguf b/Llama-3.2-3B-COT-F32.gguf new file mode 100644 index 0000000..857d035 --- /dev/null +++ b/Llama-3.2-3B-COT-F32.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96c9a40d3113c42738fbe3b5786805fa0320af92af4100b5839d8eea28dc61e2 +size 12858838624 diff --git a/Llama-3.2-3B-COT-Q4_K_M.gguf b/Llama-3.2-3B-COT-Q4_K_M.gguf new file mode 100644 index 0000000..2587852 --- /dev/null +++ b/Llama-3.2-3B-COT-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12eacce417d324ac57ee6b9c1159a455481988307e9079b2af9604df21835108 +size 2019378784 diff --git a/Llama-3.2-3B-COT-Q6_K.gguf b/Llama-3.2-3B-COT-Q6_K.gguf new file mode 100644 index 0000000..3d5daf5 --- /dev/null +++ b/Llama-3.2-3B-COT-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe1e6e996610ffcb3e6d126c31ee5e13ab6f31242e5eb1c9abab52bd03bca2a0 +size 2643854944 diff --git a/Llama-3.2-3B-COT-Q8_0.gguf b/Llama-3.2-3B-COT-Q8_0.gguf new file mode 100644 index 0000000..b139468 --- /dev/null +++ b/Llama-3.2-3B-COT-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29346c438c160900d3714b64b6215fa040adcd1d5c7d0e3f826f66a05d6abd43 +size 3421900384 diff --git a/README.md b/README.md new file mode 100644 index 0000000..279d26b --- /dev/null +++ b/README.md @@ -0,0 +1,142 @@ +--- +license: apache-2.0 +base_model: +- meta-llama/Llama-3.2-3B +tags: +- llama-3.2 +- thought-chain +- instruction-finetuning +- transformers +library_name: transformers +pipeline_tag: text-generation +--- + +# Thought-Ranked Llama 3.2 3B + +## Model Description + +This model is a fine-tuned version of Meta's Llama 3.2 3B (Base) that has been specially trained to generate high-quality thought processes before producing answers. The model underwent 4 rounds of specialized fine-tuning using a thought-chain ranking approach. +(Weekend project, just a few hundred steps of training) + +### Training Process + +1. **Initial Generation**: For each training sample, the model generates multiple thought chains by prefixing different thought tokens: `{char}` for each character in `[a-zA-Z0-9]`. Each thought chain is allowed up to 128 tokens. + +2. **Answer Generation**: Following each thought chain, the model generates a complete answer with up to 2048 tokens. + +3. **Ranking & Selection**: An external LLM ranking system evaluates the quality of answers without seeing the thought processes, creating a ranking of the most effective thought patterns. + +4. **Final Training**: The model is then trained on the highest-ranked thought-answer pairs, learning to generate the most effective thought patterns autonomously. + +### Key Features + +- **Thought Chain Generation**: The model has learned to generate explicit thought processes before providing answers +- **Greedy Sampling**: Uses greedy sampling for both thought generation and final answers +- **Length Parameters**: + - Thought chains: Up to 128 tokens + - Final answers: Up to 2048 tokens + +### Model Architecture + +- Base model: Llama 3.2 3B (Base) +- Architecture: Transformer-based language model +- Parameters: ~3.2 billion +- Training Strategy: Supervised Fine-Tuning (SFT) with thought-chain ranking + +## Intended Use + +This model is designed for tasks that benefit from explicit reasoning chains, including but not limited to: +- Problem-solving +- Mathematical reasoning +- Logical deduction +- Step-by-step explanations +- Complex decision making + +### Out-of-Scope Uses + +- Direct deployment without safety measures +- Applications requiring guaranteed accuracy +- Critical decision-making without human oversight +- Tasks requiring capabilities beyond the base Llama 3.2 3B model + +## Training Details + +### Training Data + +The model was trained using: +- Sample questions paired with multiple thought variations +- Thought chains generated using systematic character prefixes +- Rankings derived from LLM evaluation of answer quality + +### Training Procedure + +1. **Thought Generation Phase** + - Generated 62 variations of thoughts per sample (a-z, A-Z, 0-9) + - Sampled with temperature=0.0 + - Maximum thought length: 128 tokens + +2. **Answer Generation Phase** + - Generated completions following each thought chain + - Maximum answer length: 2048 tokens + - Sampled with temperature=0.0 + +3. **Ranking Phase** + - External LLM evaluated answer quality + - Ranking performed without access to thought chains + - Selected highest-performing thought-answer pairs + +4. **Final Training Phase** + - Fine-tuned on best-performing thought-answer combinations + - 4 complete rounds of training + +## Usage + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer + +model = AutoModelForCausalLM.from_pretrained("ericflo/Llama-3.2-3B-COT") +tokenizer = AutoTokenizer.from_pretrained("ericflo/Llama-3.2-3B-COT") + +# Example usage +prompt = "Solve this math problem: 2x + 3 = 7" +input_ids = tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + return_tensors="pt" +) + +# Generate response with thought chain +output = model.generate( + input_ids, + temperature=1.0, +) + +response = tokenizer.decode(output[0]) +``` + +## Limitations + +- Limited to the capabilities of the base Llama 3.2 3B model +- May generate thought chains that are not always optimal +- Performance depends on the quality of the LLM ranking system used during training +- Training process may not capture all possible effective thought patterns +- Limited by the context window of the base model + +## Ethical Considerations + +- The model inherits biases from the base Llama 3.2 3B model +- Generated thought chains should be reviewed for accuracy and appropriateness +- The model's reasoning process should not be relied upon for critical decisions without human verification +- Users should implement appropriate content filtering and safety measures + +## Citation + +If you use this model in your research, please cite: + +```bibtex +@misc{thought-ranked-llama, + title={Thought-Ranked Llama 3.2: Fine-tuning Language Models with Ranked Thought Chains}, + author={[Eric Florenzano]}, + year={2024}, + howpublished={\url{https://huggingface.co/ericflo/Llama-3.2-3B-COT}} +} +``` \ No newline at end of file diff --git a/checkpoint-240/config.json b/checkpoint-240/config.json new file mode 100644 index 0000000..284f789 --- /dev/null +++ b/checkpoint-240/config.json @@ -0,0 +1,36 @@ +{ + "_name_or_path": "sftv3", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/checkpoint-240/generation_config.json b/checkpoint-240/generation_config.json new file mode 100644 index 0000000..d7bbf65 --- /dev/null +++ b/checkpoint-240/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "repetition_penalty": 1.05, + "stop_strings": [ + "<|im_end|>" + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.46.3" +} diff --git a/checkpoint-240/model-00001-of-00002.safetensors b/checkpoint-240/model-00001-of-00002.safetensors new file mode 100644 index 0000000..102c8ca --- /dev/null +++ b/checkpoint-240/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71f75d84857b40d8133c83adbfbc4fd68a9c1245954c6a3913166367ebf7d802 +size 4965799096 diff --git a/checkpoint-240/model-00002-of-00002.safetensors b/checkpoint-240/model-00002-of-00002.safetensors new file mode 100644 index 0000000..3705fb3 --- /dev/null +++ b/checkpoint-240/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f209608a7e3fc1e5457be1970565e8399fe1e1cd5aef93486bae396eda323b9 +size 1459729952 diff --git a/checkpoint-240/model.safetensors.index.json b/checkpoint-240/model.safetensors.index.json new file mode 100644 index 0000000..d3a1f0f --- /dev/null +++ b/checkpoint-240/model.safetensors.index.json @@ -0,0 +1,261 @@ +{ + "metadata": { + "total_size": 6425499648 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/checkpoint-240/optimizer.pt b/checkpoint-240/optimizer.pt new file mode 100644 index 0000000..918817a --- /dev/null +++ b/checkpoint-240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8dcd964c7324dee95d35e7f119666dc1be4802683fd2f50370a8dfa8cbe6d9 +size 12851220310 diff --git a/checkpoint-240/rng_state.pth b/checkpoint-240/rng_state.pth new file mode 100644 index 0000000..6f3f145 --- /dev/null +++ b/checkpoint-240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c948628ba1804e0dc0e988e547ad05d27e9a4f2682e0d6f6481f21ae112fc2 +size 14244 diff --git a/checkpoint-240/scheduler.pt b/checkpoint-240/scheduler.pt new file mode 100644 index 0000000..badc485 --- /dev/null +++ b/checkpoint-240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6645136fff226d3d6184eaaaa1973f6cf445a11b1249ff1dcd2933612b0b497 +size 1064 diff --git a/checkpoint-240/special_tokens_map.json b/checkpoint-240/special_tokens_map.json new file mode 100644 index 0000000..04829af --- /dev/null +++ b/checkpoint-240/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/checkpoint-240/tokenizer.json b/checkpoint-240/tokenizer.json new file mode 100644 index 0000000..3634b20 --- /dev/null +++ b/checkpoint-240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9c4b74af81ca7d09faa23cc737405515f00d04de25d9ea1908153684b67d1c0 +size 17210020 diff --git a/checkpoint-240/tokenizer_config.json b/checkpoint-240/tokenizer_config.json new file mode 100644 index 0000000..8e05826 --- /dev/null +++ b/checkpoint-240/tokenizer_config.json @@ -0,0 +1,2076 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": [ + { + "name": "default", + "template": "{{bos_token}}{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" + }, + { + "name": "tool_use", + "template": "{%- macro json_to_python_type(json_spec) %}\n{%- set basic_type_map = {\n \"string\": \"str\",\n \"number\": \"float\",\n \"integer\": \"int\",\n \"boolean\": \"bool\"\n} %}\n\n{%- if basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == \"array\" %}\n {{- \"list[\" + json_to_python_type(json_spec|items) + \"]\"}}\n{%- elif json_spec.type == \"object\" %}\n {%- if json_spec.additionalProperties is defined %}\n {{- \"dict[str, \" + json_to_python_type(json_spec.additionalProperties) + ']'}}\n {%- else %}\n {{- \"dict\" }}\n {%- endif %}\n{%- elif json_spec.type is iterable %}\n {{- \"Union[\" }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type({\"type\": t}) }}\n {%- if not loop.last %}\n {{- \",\" }} \n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n{%- else %}\n {{- \"Any\" }}\n{%- endif %}\n{%- endmacro %}\n\n\n{{- bos_token }}\n{{- '<|im_start|>system\n' }}\n{{- \"You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: \" }}\n{%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- '{\"type\": \"function\", \"function\": ' }}\n {{- '{\"name\": \"' + tool.name + '\", ' }}\n {{- '\"description\": \"' + tool.name + '(' }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- param_name + \": \" + json_to_python_type(param_fields) }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- if tool.return is defined %}\n {{- \" -> \" + json_to_python_type(tool.return) }}\n {%- endif %}\n {{- \" - \" + tool.description + \"\n\n\" }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {%- if loop.first %}\n {{- \" Args:\n\" }}\n {%- endif %}\n {{- \" \" + param_name + \"(\" + json_to_python_type(param_fields) + \"): \" + param_fields.description|trim }}\n {%- endfor %}\n {%- if tool.return is defined and tool.return.description is defined %}\n {{- \"\n Returns:\n \" + tool.return.description }}\n {%- endif %}\n {{- '\"' }}\n {{- ', \"parameters\": ' }}\n {%- if tool.parameters.properties | length == 0 %}\n {{- \"{}\" }}\n {%- else %}\n {{- tool.parameters|tojson }}\n {%- endif %}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \"\n\" }}\n {%- endif %}\n{%- endfor %}\n{{- \" \" }}\n{{- 'Use the following pydantic model json schema for each tool call you will make: {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"arguments\": {\"title\": \"Arguments\", \"type\": \"object\"}}, \"required\": [\"name\", \"arguments\"], \"title\": \"FunctionCall\", \"type\": \"object\"}}\n' }}\n{{- \"For each function call return a json object with function name and arguments within XML tags as follows:\n\" }}\n{{- \"\n\" }}\n{{- '{\"name\": , \"arguments\": }\n' }}\n{{- '<|im_end|>\n' }}\n{%- for message in messages %}\n {%- if message.role == \"user\" or message.role == \"system\" or (message.role == \"assistant\" and message.tool_calls is not defined) %}\n {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- for tool_call in message.tool_calls %}\n {{- '\n\n' }} {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '{' }}\n {{- '\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\"' }}\n {{- ', '}}\n {%- if tool_call.arguments is defined %}\n {{- '\"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments|tojson }}\n {%- endif %}\n {%- endif %}\n {{- '}' }}\n {{- '\n' }}\n {%- endfor %}\n {{- '<|im_end|>\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<|im_start|>tool\n' }}\n {%- endif %}\n {{- '\n' }}\n {{- message.content }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- else %}\n {{- '\n' }}\n {%- endif %}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<|im_end|>' }}\n {%- elif loop.last %}\n {{- '<|im_end|>' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\n' }}\n{%- endif %}\n" + } + ], + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "max_length": 16384, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|end_of_text|>", + "stride": 0, + "tokenizer_class": "PreTrainedTokenizerFast", + "truncation_side": "right", + "truncation_strategy": "longest_first" +} diff --git a/checkpoint-240/trainer_state.json b/checkpoint-240/trainer_state.json new file mode 100644 index 0000000..d72e19e --- /dev/null +++ b/checkpoint-240/trainer_state.json @@ -0,0 +1,2097 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.9587628865979383, + "eval_steps": 5, + "global_step": 240, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016494845360824743, + "grad_norm": 5.5, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.569, + "step": 1 + }, + { + "epoch": 0.032989690721649485, + "grad_norm": 3.21875, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6647, + "step": 2 + }, + { + "epoch": 0.049484536082474224, + "grad_norm": 2.46875, + "learning_rate": 3e-06, + "loss": 0.8035, + "step": 3 + }, + { + "epoch": 0.06597938144329897, + "grad_norm": 5.0625, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7259, + "step": 4 + }, + { + "epoch": 0.08247422680412371, + "grad_norm": 3.984375, + "learning_rate": 5e-06, + "loss": 1.3804, + "step": 5 + }, + { + "epoch": 0.08247422680412371, + "eval_loss": 0.5400243401527405, + "eval_runtime": 1.246, + "eval_samples_per_second": 20.866, + "eval_steps_per_second": 20.866, + "step": 5 + }, + { + "epoch": 0.09896907216494845, + "grad_norm": 5.71875, + "learning_rate": 6e-06, + "loss": 0.7447, + "step": 6 + }, + { + "epoch": 0.1154639175257732, + "grad_norm": 4.65625, + "learning_rate": 7e-06, + "loss": 0.5788, + "step": 7 + }, + { + "epoch": 0.13195876288659794, + "grad_norm": 4.3125, + "learning_rate": 8.000000000000001e-06, + "loss": 0.394, + "step": 8 + }, + { + "epoch": 0.14845360824742268, + "grad_norm": 5.0, + "learning_rate": 9e-06, + "loss": 0.5634, + "step": 9 + }, + { + "epoch": 0.16494845360824742, + "grad_norm": 6.21875, + "learning_rate": 1e-05, + "loss": 0.3369, + "step": 10 + }, + { + "epoch": 0.16494845360824742, + "eval_loss": 0.5371499061584473, + "eval_runtime": 1.2372, + "eval_samples_per_second": 21.016, + "eval_steps_per_second": 21.016, + "step": 10 + }, + { + "epoch": 0.18144329896907216, + "grad_norm": 4.0, + "learning_rate": 9.956521739130436e-06, + "loss": 0.5154, + "step": 11 + }, + { + "epoch": 0.1979381443298969, + "grad_norm": 6.21875, + "learning_rate": 9.913043478260871e-06, + "loss": 0.5452, + "step": 12 + }, + { + "epoch": 0.21443298969072164, + "grad_norm": 7.09375, + "learning_rate": 9.869565217391304e-06, + "loss": 0.8781, + "step": 13 + }, + { + "epoch": 0.2309278350515464, + "grad_norm": 4.5, + "learning_rate": 9.82608695652174e-06, + "loss": 0.8791, + "step": 14 + }, + { + "epoch": 0.24742268041237114, + "grad_norm": 9.6875, + "learning_rate": 9.782608695652175e-06, + "loss": 0.7528, + "step": 15 + }, + { + "epoch": 0.24742268041237114, + "eval_loss": 0.5349909663200378, + "eval_runtime": 1.2584, + "eval_samples_per_second": 20.66, + "eval_steps_per_second": 20.66, + "step": 15 + }, + { + "epoch": 0.2639175257731959, + "grad_norm": 3.03125, + "learning_rate": 9.73913043478261e-06, + "loss": 0.3554, + "step": 16 + }, + { + "epoch": 0.2804123711340206, + "grad_norm": 3.609375, + "learning_rate": 9.695652173913043e-06, + "loss": 0.4604, + "step": 17 + }, + { + "epoch": 0.29690721649484536, + "grad_norm": 4.40625, + "learning_rate": 9.652173913043478e-06, + "loss": 0.6079, + "step": 18 + }, + { + "epoch": 0.3134020618556701, + "grad_norm": 4.96875, + "learning_rate": 9.608695652173914e-06, + "loss": 0.5466, + "step": 19 + }, + { + "epoch": 0.32989690721649484, + "grad_norm": 6.5, + "learning_rate": 9.565217391304349e-06, + "loss": 0.6961, + "step": 20 + }, + { + "epoch": 0.32989690721649484, + "eval_loss": 0.5351170897483826, + "eval_runtime": 1.2599, + "eval_samples_per_second": 20.636, + "eval_steps_per_second": 20.636, + "step": 20 + }, + { + "epoch": 0.3463917525773196, + "grad_norm": 6.0, + "learning_rate": 9.521739130434784e-06, + "loss": 0.9412, + "step": 21 + }, + { + "epoch": 0.3628865979381443, + "grad_norm": 5.6875, + "learning_rate": 9.478260869565217e-06, + "loss": 0.5747, + "step": 22 + }, + { + "epoch": 0.37938144329896906, + "grad_norm": 8.625, + "learning_rate": 9.434782608695652e-06, + "loss": 0.7224, + "step": 23 + }, + { + "epoch": 0.3958762886597938, + "grad_norm": 6.84375, + "learning_rate": 9.391304347826087e-06, + "loss": 0.588, + "step": 24 + }, + { + "epoch": 0.41237113402061853, + "grad_norm": 4.71875, + "learning_rate": 9.347826086956523e-06, + "loss": 0.4478, + "step": 25 + }, + { + "epoch": 0.41237113402061853, + "eval_loss": 0.5326387882232666, + "eval_runtime": 1.2718, + "eval_samples_per_second": 20.444, + "eval_steps_per_second": 20.444, + "step": 25 + }, + { + "epoch": 0.4288659793814433, + "grad_norm": 8.125, + "learning_rate": 9.304347826086956e-06, + "loss": 0.5952, + "step": 26 + }, + { + "epoch": 0.44536082474226807, + "grad_norm": 3.90625, + "learning_rate": 9.260869565217391e-06, + "loss": 0.5545, + "step": 27 + }, + { + "epoch": 0.4618556701030928, + "grad_norm": 6.125, + "learning_rate": 9.217391304347826e-06, + "loss": 0.6788, + "step": 28 + }, + { + "epoch": 0.47835051546391755, + "grad_norm": 7.5, + "learning_rate": 9.173913043478261e-06, + "loss": 0.5714, + "step": 29 + }, + { + "epoch": 0.4948453608247423, + "grad_norm": 4.75, + "learning_rate": 9.130434782608697e-06, + "loss": 0.5826, + "step": 30 + }, + { + "epoch": 0.4948453608247423, + "eval_loss": 0.53000408411026, + "eval_runtime": 1.2766, + "eval_samples_per_second": 20.366, + "eval_steps_per_second": 20.366, + "step": 30 + }, + { + "epoch": 0.511340206185567, + "grad_norm": 6.875, + "learning_rate": 9.086956521739132e-06, + "loss": 0.5211, + "step": 31 + }, + { + "epoch": 0.5278350515463918, + "grad_norm": 3.734375, + "learning_rate": 9.043478260869565e-06, + "loss": 0.4774, + "step": 32 + }, + { + "epoch": 0.5443298969072164, + "grad_norm": 6.4375, + "learning_rate": 9e-06, + "loss": 0.7318, + "step": 33 + }, + { + "epoch": 0.5608247422680412, + "grad_norm": 5.75, + "learning_rate": 8.956521739130435e-06, + "loss": 0.5202, + "step": 34 + }, + { + "epoch": 0.5773195876288659, + "grad_norm": 4.625, + "learning_rate": 8.91304347826087e-06, + "loss": 0.6316, + "step": 35 + }, + { + "epoch": 0.5773195876288659, + "eval_loss": 0.5289856195449829, + "eval_runtime": 1.2887, + "eval_samples_per_second": 20.175, + "eval_steps_per_second": 20.175, + "step": 35 + }, + { + "epoch": 0.5938144329896907, + "grad_norm": 5.0, + "learning_rate": 8.869565217391306e-06, + "loss": 0.396, + "step": 36 + }, + { + "epoch": 0.6103092783505155, + "grad_norm": 3.65625, + "learning_rate": 8.82608695652174e-06, + "loss": 0.2928, + "step": 37 + }, + { + "epoch": 0.6268041237113402, + "grad_norm": 7.28125, + "learning_rate": 8.782608695652174e-06, + "loss": 0.6256, + "step": 38 + }, + { + "epoch": 0.643298969072165, + "grad_norm": 4.8125, + "learning_rate": 8.73913043478261e-06, + "loss": 0.5751, + "step": 39 + }, + { + "epoch": 0.6597938144329897, + "grad_norm": 3.015625, + "learning_rate": 8.695652173913044e-06, + "loss": 1.1084, + "step": 40 + }, + { + "epoch": 0.6597938144329897, + "eval_loss": 0.5289974808692932, + "eval_runtime": 1.2832, + "eval_samples_per_second": 20.261, + "eval_steps_per_second": 20.261, + "step": 40 + }, + { + "epoch": 0.6762886597938145, + "grad_norm": 5.625, + "learning_rate": 8.65217391304348e-06, + "loss": 0.5896, + "step": 41 + }, + { + "epoch": 0.6927835051546392, + "grad_norm": 5.46875, + "learning_rate": 8.608695652173915e-06, + "loss": 0.5278, + "step": 42 + }, + { + "epoch": 0.709278350515464, + "grad_norm": 4.90625, + "learning_rate": 8.56521739130435e-06, + "loss": 0.4396, + "step": 43 + }, + { + "epoch": 0.7257731958762886, + "grad_norm": 3.546875, + "learning_rate": 8.521739130434783e-06, + "loss": 0.6561, + "step": 44 + }, + { + "epoch": 0.7422680412371134, + "grad_norm": 4.8125, + "learning_rate": 8.478260869565218e-06, + "loss": 0.3966, + "step": 45 + }, + { + "epoch": 0.7422680412371134, + "eval_loss": 0.5284722447395325, + "eval_runtime": 1.2831, + "eval_samples_per_second": 20.264, + "eval_steps_per_second": 20.264, + "step": 45 + }, + { + "epoch": 0.7587628865979381, + "grad_norm": 4.375, + "learning_rate": 8.434782608695653e-06, + "loss": 0.2738, + "step": 46 + }, + { + "epoch": 0.7752577319587629, + "grad_norm": 5.78125, + "learning_rate": 8.391304347826089e-06, + "loss": 0.7747, + "step": 47 + }, + { + "epoch": 0.7917525773195876, + "grad_norm": 4.3125, + "learning_rate": 8.347826086956522e-06, + "loss": 1.089, + "step": 48 + }, + { + "epoch": 0.8082474226804124, + "grad_norm": 3.578125, + "learning_rate": 8.304347826086957e-06, + "loss": 0.301, + "step": 49 + }, + { + "epoch": 0.8247422680412371, + "grad_norm": 5.40625, + "learning_rate": 8.260869565217392e-06, + "loss": 0.6264, + "step": 50 + }, + { + "epoch": 0.8247422680412371, + "eval_loss": 0.5277838110923767, + "eval_runtime": 1.2877, + "eval_samples_per_second": 20.192, + "eval_steps_per_second": 20.192, + "step": 50 + }, + { + "epoch": 0.8412371134020619, + "grad_norm": 4.71875, + "learning_rate": 8.217391304347827e-06, + "loss": 0.6022, + "step": 51 + }, + { + "epoch": 0.8577319587628865, + "grad_norm": 4.96875, + "learning_rate": 8.173913043478263e-06, + "loss": 0.2717, + "step": 52 + }, + { + "epoch": 0.8742268041237113, + "grad_norm": 4.1875, + "learning_rate": 8.130434782608696e-06, + "loss": 0.4995, + "step": 53 + }, + { + "epoch": 0.8907216494845361, + "grad_norm": 4.90625, + "learning_rate": 8.086956521739131e-06, + "loss": 0.5654, + "step": 54 + }, + { + "epoch": 0.9072164948453608, + "grad_norm": 6.78125, + "learning_rate": 8.043478260869566e-06, + "loss": 0.5308, + "step": 55 + }, + { + "epoch": 0.9072164948453608, + "eval_loss": 0.5283468961715698, + "eval_runtime": 1.2896, + "eval_samples_per_second": 20.161, + "eval_steps_per_second": 20.161, + "step": 55 + }, + { + "epoch": 0.9237113402061856, + "grad_norm": 3.515625, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6069, + "step": 56 + }, + { + "epoch": 0.9402061855670103, + "grad_norm": 4.03125, + "learning_rate": 7.956521739130435e-06, + "loss": 0.3835, + "step": 57 + }, + { + "epoch": 0.9567010309278351, + "grad_norm": 4.15625, + "learning_rate": 7.91304347826087e-06, + "loss": 0.5169, + "step": 58 + }, + { + "epoch": 0.9731958762886598, + "grad_norm": 2.796875, + "learning_rate": 7.869565217391305e-06, + "loss": 0.4643, + "step": 59 + }, + { + "epoch": 0.9896907216494846, + "grad_norm": 5.46875, + "learning_rate": 7.82608695652174e-06, + "loss": 0.6768, + "step": 60 + }, + { + "epoch": 0.9896907216494846, + "eval_loss": 0.5277718305587769, + "eval_runtime": 1.2735, + "eval_samples_per_second": 20.416, + "eval_steps_per_second": 20.416, + "step": 60 + }, + { + "epoch": 1.0061855670103093, + "grad_norm": 8.625, + "learning_rate": 7.782608695652174e-06, + "loss": 0.7185, + "step": 61 + }, + { + "epoch": 1.022680412371134, + "grad_norm": 3.78125, + "learning_rate": 7.739130434782609e-06, + "loss": 0.5707, + "step": 62 + }, + { + "epoch": 1.0391752577319588, + "grad_norm": 3.46875, + "learning_rate": 7.695652173913044e-06, + "loss": 0.4542, + "step": 63 + }, + { + "epoch": 1.0556701030927835, + "grad_norm": 3.015625, + "learning_rate": 7.652173913043479e-06, + "loss": 0.3663, + "step": 64 + }, + { + "epoch": 1.0721649484536082, + "grad_norm": 2.046875, + "learning_rate": 7.608695652173914e-06, + "loss": 0.4641, + "step": 65 + }, + { + "epoch": 1.0721649484536082, + "eval_loss": 0.5268765091896057, + "eval_runtime": 1.2807, + "eval_samples_per_second": 20.302, + "eval_steps_per_second": 20.302, + "step": 65 + }, + { + "epoch": 1.088659793814433, + "grad_norm": 2.640625, + "learning_rate": 7.565217391304348e-06, + "loss": 0.6291, + "step": 66 + }, + { + "epoch": 1.1051546391752578, + "grad_norm": 2.53125, + "learning_rate": 7.5217391304347835e-06, + "loss": 0.2755, + "step": 67 + }, + { + "epoch": 1.1216494845360825, + "grad_norm": 2.734375, + "learning_rate": 7.478260869565218e-06, + "loss": 0.4472, + "step": 68 + }, + { + "epoch": 1.1381443298969072, + "grad_norm": 2.96875, + "learning_rate": 7.434782608695653e-06, + "loss": 0.4451, + "step": 69 + }, + { + "epoch": 1.1546391752577319, + "grad_norm": 5.34375, + "learning_rate": 7.391304347826087e-06, + "loss": 0.9232, + "step": 70 + }, + { + "epoch": 1.1546391752577319, + "eval_loss": 0.5259441137313843, + "eval_runtime": 1.2903, + "eval_samples_per_second": 20.151, + "eval_steps_per_second": 20.151, + "step": 70 + }, + { + "epoch": 1.1711340206185568, + "grad_norm": 4.53125, + "learning_rate": 7.347826086956522e-06, + "loss": 0.3537, + "step": 71 + }, + { + "epoch": 1.1876288659793814, + "grad_norm": 3.84375, + "learning_rate": 7.304347826086957e-06, + "loss": 0.3703, + "step": 72 + }, + { + "epoch": 1.2041237113402061, + "grad_norm": 4.84375, + "learning_rate": 7.2608695652173925e-06, + "loss": 1.0391, + "step": 73 + }, + { + "epoch": 1.220618556701031, + "grad_norm": 3.71875, + "learning_rate": 7.217391304347827e-06, + "loss": 0.5239, + "step": 74 + }, + { + "epoch": 1.2371134020618557, + "grad_norm": 2.96875, + "learning_rate": 7.173913043478261e-06, + "loss": 0.2765, + "step": 75 + }, + { + "epoch": 1.2371134020618557, + "eval_loss": 0.5257338285446167, + "eval_runtime": 1.2846, + "eval_samples_per_second": 20.24, + "eval_steps_per_second": 20.24, + "step": 75 + }, + { + "epoch": 1.2536082474226804, + "grad_norm": 2.96875, + "learning_rate": 7.130434782608696e-06, + "loss": 1.3695, + "step": 76 + }, + { + "epoch": 1.270103092783505, + "grad_norm": 5.0, + "learning_rate": 7.086956521739131e-06, + "loss": 0.7998, + "step": 77 + }, + { + "epoch": 1.2865979381443298, + "grad_norm": 2.546875, + "learning_rate": 7.0434782608695665e-06, + "loss": 0.3198, + "step": 78 + }, + { + "epoch": 1.3030927835051547, + "grad_norm": 2.875, + "learning_rate": 7e-06, + "loss": 0.848, + "step": 79 + }, + { + "epoch": 1.3195876288659794, + "grad_norm": 1.7578125, + "learning_rate": 6.956521739130435e-06, + "loss": 0.2294, + "step": 80 + }, + { + "epoch": 1.3195876288659794, + "eval_loss": 0.5260586142539978, + "eval_runtime": 1.2823, + "eval_samples_per_second": 20.277, + "eval_steps_per_second": 20.277, + "step": 80 + }, + { + "epoch": 1.3360824742268043, + "grad_norm": 3.546875, + "learning_rate": 6.91304347826087e-06, + "loss": 1.289, + "step": 81 + }, + { + "epoch": 1.352577319587629, + "grad_norm": 2.34375, + "learning_rate": 6.869565217391305e-06, + "loss": 0.4023, + "step": 82 + }, + { + "epoch": 1.3690721649484536, + "grad_norm": 2.5625, + "learning_rate": 6.8260869565217395e-06, + "loss": 0.3378, + "step": 83 + }, + { + "epoch": 1.3855670103092783, + "grad_norm": 6.03125, + "learning_rate": 6.782608695652174e-06, + "loss": 0.7623, + "step": 84 + }, + { + "epoch": 1.402061855670103, + "grad_norm": 4.21875, + "learning_rate": 6.739130434782609e-06, + "loss": 0.7804, + "step": 85 + }, + { + "epoch": 1.402061855670103, + "eval_loss": 0.5257543325424194, + "eval_runtime": 1.2932, + "eval_samples_per_second": 20.106, + "eval_steps_per_second": 20.106, + "step": 85 + }, + { + "epoch": 1.418556701030928, + "grad_norm": 3.46875, + "learning_rate": 6.695652173913044e-06, + "loss": 0.5337, + "step": 86 + }, + { + "epoch": 1.4350515463917526, + "grad_norm": 2.71875, + "learning_rate": 6.652173913043479e-06, + "loss": 0.4078, + "step": 87 + }, + { + "epoch": 1.4515463917525773, + "grad_norm": 2.3125, + "learning_rate": 6.6086956521739135e-06, + "loss": 0.3687, + "step": 88 + }, + { + "epoch": 1.4680412371134022, + "grad_norm": 1.59375, + "learning_rate": 6.565217391304349e-06, + "loss": 0.1253, + "step": 89 + }, + { + "epoch": 1.4845360824742269, + "grad_norm": 3.9375, + "learning_rate": 6.521739130434783e-06, + "loss": 0.8425, + "step": 90 + }, + { + "epoch": 1.4845360824742269, + "eval_loss": 0.5253785848617554, + "eval_runtime": 1.2875, + "eval_samples_per_second": 20.194, + "eval_steps_per_second": 20.194, + "step": 90 + }, + { + "epoch": 1.5010309278350515, + "grad_norm": 3.53125, + "learning_rate": 6.478260869565218e-06, + "loss": 0.4739, + "step": 91 + }, + { + "epoch": 1.5175257731958762, + "grad_norm": 3.3125, + "learning_rate": 6.434782608695652e-06, + "loss": 0.9112, + "step": 92 + }, + { + "epoch": 1.534020618556701, + "grad_norm": 3.984375, + "learning_rate": 6.391304347826087e-06, + "loss": 0.8557, + "step": 93 + }, + { + "epoch": 1.5505154639175258, + "grad_norm": 3.0, + "learning_rate": 6.3478260869565225e-06, + "loss": 0.401, + "step": 94 + }, + { + "epoch": 1.5670103092783505, + "grad_norm": 2.859375, + "learning_rate": 6.304347826086958e-06, + "loss": 0.3689, + "step": 95 + }, + { + "epoch": 1.5670103092783505, + "eval_loss": 0.5263558030128479, + "eval_runtime": 1.2892, + "eval_samples_per_second": 20.168, + "eval_steps_per_second": 20.168, + "step": 95 + }, + { + "epoch": 1.5835051546391754, + "grad_norm": 4.28125, + "learning_rate": 6.260869565217392e-06, + "loss": 0.7863, + "step": 96 + }, + { + "epoch": 1.6, + "grad_norm": 2.578125, + "learning_rate": 6.217391304347826e-06, + "loss": 0.3845, + "step": 97 + }, + { + "epoch": 1.6164948453608248, + "grad_norm": 2.265625, + "learning_rate": 6.173913043478261e-06, + "loss": 0.8762, + "step": 98 + }, + { + "epoch": 1.6329896907216495, + "grad_norm": 3.1875, + "learning_rate": 6.1304347826086965e-06, + "loss": 0.6063, + "step": 99 + }, + { + "epoch": 1.6494845360824741, + "grad_norm": 3.5625, + "learning_rate": 6.086956521739132e-06, + "loss": 0.6211, + "step": 100 + }, + { + "epoch": 1.6494845360824741, + "eval_loss": 0.5265222787857056, + "eval_runtime": 1.2886, + "eval_samples_per_second": 20.176, + "eval_steps_per_second": 20.176, + "step": 100 + }, + { + "epoch": 1.6659793814432988, + "grad_norm": 4.25, + "learning_rate": 6.043478260869565e-06, + "loss": 0.6451, + "step": 101 + }, + { + "epoch": 1.6824742268041237, + "grad_norm": 3.1875, + "learning_rate": 6e-06, + "loss": 0.3286, + "step": 102 + }, + { + "epoch": 1.6989690721649484, + "grad_norm": 3.4375, + "learning_rate": 5.956521739130435e-06, + "loss": 0.9278, + "step": 103 + }, + { + "epoch": 1.7154639175257733, + "grad_norm": 3.734375, + "learning_rate": 5.91304347826087e-06, + "loss": 0.4669, + "step": 104 + }, + { + "epoch": 1.731958762886598, + "grad_norm": 3.421875, + "learning_rate": 5.8695652173913055e-06, + "loss": 0.8641, + "step": 105 + }, + { + "epoch": 1.731958762886598, + "eval_loss": 0.52596116065979, + "eval_runtime": 1.2826, + "eval_samples_per_second": 20.271, + "eval_steps_per_second": 20.271, + "step": 105 + }, + { + "epoch": 1.7484536082474227, + "grad_norm": 4.625, + "learning_rate": 5.826086956521739e-06, + "loss": 0.4438, + "step": 106 + }, + { + "epoch": 1.7649484536082474, + "grad_norm": 3.3125, + "learning_rate": 5.782608695652174e-06, + "loss": 0.2845, + "step": 107 + }, + { + "epoch": 1.781443298969072, + "grad_norm": 3.015625, + "learning_rate": 5.739130434782609e-06, + "loss": 0.2432, + "step": 108 + }, + { + "epoch": 1.797938144329897, + "grad_norm": 4.3125, + "learning_rate": 5.695652173913044e-06, + "loss": 1.0055, + "step": 109 + }, + { + "epoch": 1.8144329896907216, + "grad_norm": 4.15625, + "learning_rate": 5.652173913043479e-06, + "loss": 0.5496, + "step": 110 + }, + { + "epoch": 1.8144329896907216, + "eval_loss": 0.5252729058265686, + "eval_runtime": 1.2934, + "eval_samples_per_second": 20.103, + "eval_steps_per_second": 20.103, + "step": 110 + }, + { + "epoch": 1.8309278350515465, + "grad_norm": 2.84375, + "learning_rate": 5.608695652173914e-06, + "loss": 0.2763, + "step": 111 + }, + { + "epoch": 1.8474226804123712, + "grad_norm": 2.8125, + "learning_rate": 5.565217391304348e-06, + "loss": 0.6699, + "step": 112 + }, + { + "epoch": 1.863917525773196, + "grad_norm": 3.515625, + "learning_rate": 5.521739130434783e-06, + "loss": 0.3332, + "step": 113 + }, + { + "epoch": 1.8804123711340206, + "grad_norm": 3.375, + "learning_rate": 5.478260869565217e-06, + "loss": 0.5998, + "step": 114 + }, + { + "epoch": 1.8969072164948453, + "grad_norm": 3.078125, + "learning_rate": 5.4347826086956525e-06, + "loss": 0.2939, + "step": 115 + }, + { + "epoch": 1.8969072164948453, + "eval_loss": 0.5248008370399475, + "eval_runtime": 1.2932, + "eval_samples_per_second": 20.105, + "eval_steps_per_second": 20.105, + "step": 115 + }, + { + "epoch": 1.91340206185567, + "grad_norm": 4.03125, + "learning_rate": 5.391304347826088e-06, + "loss": 0.4714, + "step": 116 + }, + { + "epoch": 1.9298969072164949, + "grad_norm": 3.609375, + "learning_rate": 5.347826086956523e-06, + "loss": 0.5091, + "step": 117 + }, + { + "epoch": 1.9463917525773196, + "grad_norm": 4.3125, + "learning_rate": 5.304347826086957e-06, + "loss": 0.3588, + "step": 118 + }, + { + "epoch": 1.9628865979381445, + "grad_norm": 4.40625, + "learning_rate": 5.260869565217391e-06, + "loss": 0.5583, + "step": 119 + }, + { + "epoch": 1.9793814432989691, + "grad_norm": 3.09375, + "learning_rate": 5.2173913043478265e-06, + "loss": 0.3538, + "step": 120 + }, + { + "epoch": 1.9793814432989691, + "eval_loss": 0.5252537131309509, + "eval_runtime": 1.2955, + "eval_samples_per_second": 20.07, + "eval_steps_per_second": 20.07, + "step": 120 + }, + { + "epoch": 1.9958762886597938, + "grad_norm": 3.140625, + "learning_rate": 5.173913043478262e-06, + "loss": 0.4421, + "step": 121 + }, + { + "epoch": 2.0123711340206185, + "grad_norm": 3.875, + "learning_rate": 5.130434782608697e-06, + "loss": 0.7688, + "step": 122 + }, + { + "epoch": 2.028865979381443, + "grad_norm": 2.125, + "learning_rate": 5.08695652173913e-06, + "loss": 0.2562, + "step": 123 + }, + { + "epoch": 2.045360824742268, + "grad_norm": 2.078125, + "learning_rate": 5.043478260869565e-06, + "loss": 0.3267, + "step": 124 + }, + { + "epoch": 2.0618556701030926, + "grad_norm": 2.171875, + "learning_rate": 5e-06, + "loss": 0.3511, + "step": 125 + }, + { + "epoch": 2.0618556701030926, + "eval_loss": 0.5247462391853333, + "eval_runtime": 1.2904, + "eval_samples_per_second": 20.149, + "eval_steps_per_second": 20.149, + "step": 125 + }, + { + "epoch": 2.0783505154639177, + "grad_norm": 2.0625, + "learning_rate": 4.9565217391304355e-06, + "loss": 0.3862, + "step": 126 + }, + { + "epoch": 2.0948453608247424, + "grad_norm": 2.78125, + "learning_rate": 4.91304347826087e-06, + "loss": 0.5712, + "step": 127 + }, + { + "epoch": 2.111340206185567, + "grad_norm": 3.203125, + "learning_rate": 4.869565217391305e-06, + "loss": 0.9281, + "step": 128 + }, + { + "epoch": 2.1278350515463917, + "grad_norm": 1.703125, + "learning_rate": 4.826086956521739e-06, + "loss": 0.3723, + "step": 129 + }, + { + "epoch": 2.1443298969072164, + "grad_norm": 2.21875, + "learning_rate": 4.782608695652174e-06, + "loss": 0.544, + "step": 130 + }, + { + "epoch": 2.1443298969072164, + "eval_loss": 0.525088906288147, + "eval_runtime": 1.2916, + "eval_samples_per_second": 20.131, + "eval_steps_per_second": 20.131, + "step": 130 + }, + { + "epoch": 2.160824742268041, + "grad_norm": 2.703125, + "learning_rate": 4.739130434782609e-06, + "loss": 0.4491, + "step": 131 + }, + { + "epoch": 2.177319587628866, + "grad_norm": 3.875, + "learning_rate": 4.695652173913044e-06, + "loss": 0.5799, + "step": 132 + }, + { + "epoch": 2.193814432989691, + "grad_norm": 3.171875, + "learning_rate": 4.652173913043478e-06, + "loss": 0.6968, + "step": 133 + }, + { + "epoch": 2.2103092783505156, + "grad_norm": 2.1875, + "learning_rate": 4.608695652173913e-06, + "loss": 0.5588, + "step": 134 + }, + { + "epoch": 2.2268041237113403, + "grad_norm": 2.03125, + "learning_rate": 4.565217391304348e-06, + "loss": 0.2793, + "step": 135 + }, + { + "epoch": 2.2268041237113403, + "eval_loss": 0.5259795784950256, + "eval_runtime": 1.2926, + "eval_samples_per_second": 20.115, + "eval_steps_per_second": 20.115, + "step": 135 + }, + { + "epoch": 2.243298969072165, + "grad_norm": 2.109375, + "learning_rate": 4.5217391304347826e-06, + "loss": 1.2616, + "step": 136 + }, + { + "epoch": 2.2597938144329897, + "grad_norm": 2.015625, + "learning_rate": 4.478260869565218e-06, + "loss": 0.2996, + "step": 137 + }, + { + "epoch": 2.2762886597938143, + "grad_norm": 2.640625, + "learning_rate": 4.434782608695653e-06, + "loss": 0.4981, + "step": 138 + }, + { + "epoch": 2.292783505154639, + "grad_norm": 4.25, + "learning_rate": 4.391304347826087e-06, + "loss": 0.5537, + "step": 139 + }, + { + "epoch": 2.3092783505154637, + "grad_norm": 2.90625, + "learning_rate": 4.347826086956522e-06, + "loss": 1.1338, + "step": 140 + }, + { + "epoch": 2.3092783505154637, + "eval_loss": 0.5254911184310913, + "eval_runtime": 1.2896, + "eval_samples_per_second": 20.162, + "eval_steps_per_second": 20.162, + "step": 140 + }, + { + "epoch": 2.325773195876289, + "grad_norm": 2.046875, + "learning_rate": 4.304347826086957e-06, + "loss": 0.4052, + "step": 141 + }, + { + "epoch": 2.3422680412371135, + "grad_norm": 2.046875, + "learning_rate": 4.260869565217392e-06, + "loss": 0.3079, + "step": 142 + }, + { + "epoch": 2.358762886597938, + "grad_norm": 2.421875, + "learning_rate": 4.217391304347827e-06, + "loss": 0.3265, + "step": 143 + }, + { + "epoch": 2.375257731958763, + "grad_norm": 2.65625, + "learning_rate": 4.173913043478261e-06, + "loss": 0.4978, + "step": 144 + }, + { + "epoch": 2.3917525773195876, + "grad_norm": 2.890625, + "learning_rate": 4.130434782608696e-06, + "loss": 0.4199, + "step": 145 + }, + { + "epoch": 2.3917525773195876, + "eval_loss": 0.5256755352020264, + "eval_runtime": 1.2989, + "eval_samples_per_second": 20.017, + "eval_steps_per_second": 20.017, + "step": 145 + }, + { + "epoch": 2.4082474226804123, + "grad_norm": 2.453125, + "learning_rate": 4.086956521739131e-06, + "loss": 0.3124, + "step": 146 + }, + { + "epoch": 2.424742268041237, + "grad_norm": 2.828125, + "learning_rate": 4.0434782608695655e-06, + "loss": 0.5702, + "step": 147 + }, + { + "epoch": 2.441237113402062, + "grad_norm": 1.953125, + "learning_rate": 4.000000000000001e-06, + "loss": 0.4731, + "step": 148 + }, + { + "epoch": 2.4577319587628867, + "grad_norm": 2.921875, + "learning_rate": 3.956521739130435e-06, + "loss": 0.4377, + "step": 149 + }, + { + "epoch": 2.4742268041237114, + "grad_norm": 2.421875, + "learning_rate": 3.91304347826087e-06, + "loss": 0.2762, + "step": 150 + }, + { + "epoch": 2.4742268041237114, + "eval_loss": 0.525678813457489, + "eval_runtime": 1.2937, + "eval_samples_per_second": 20.097, + "eval_steps_per_second": 20.097, + "step": 150 + }, + { + "epoch": 2.490721649484536, + "grad_norm": 2.046875, + "learning_rate": 3.869565217391304e-06, + "loss": 0.4265, + "step": 151 + }, + { + "epoch": 2.507216494845361, + "grad_norm": 6.75, + "learning_rate": 3.8260869565217395e-06, + "loss": 0.4723, + "step": 152 + }, + { + "epoch": 2.5237113402061855, + "grad_norm": 2.53125, + "learning_rate": 3.782608695652174e-06, + "loss": 0.2729, + "step": 153 + }, + { + "epoch": 2.54020618556701, + "grad_norm": 2.296875, + "learning_rate": 3.739130434782609e-06, + "loss": 0.3146, + "step": 154 + }, + { + "epoch": 2.556701030927835, + "grad_norm": 2.90625, + "learning_rate": 3.6956521739130436e-06, + "loss": 0.4397, + "step": 155 + }, + { + "epoch": 2.556701030927835, + "eval_loss": 0.5252581834793091, + "eval_runtime": 1.2942, + "eval_samples_per_second": 20.09, + "eval_steps_per_second": 20.09, + "step": 155 + }, + { + "epoch": 2.5731958762886595, + "grad_norm": 2.890625, + "learning_rate": 3.6521739130434787e-06, + "loss": 0.3224, + "step": 156 + }, + { + "epoch": 2.5896907216494847, + "grad_norm": 2.5625, + "learning_rate": 3.6086956521739134e-06, + "loss": 0.7719, + "step": 157 + }, + { + "epoch": 2.6061855670103093, + "grad_norm": 2.59375, + "learning_rate": 3.565217391304348e-06, + "loss": 0.4424, + "step": 158 + }, + { + "epoch": 2.622680412371134, + "grad_norm": 2.421875, + "learning_rate": 3.5217391304347832e-06, + "loss": 0.3581, + "step": 159 + }, + { + "epoch": 2.6391752577319587, + "grad_norm": 3.671875, + "learning_rate": 3.4782608695652175e-06, + "loss": 0.5688, + "step": 160 + }, + { + "epoch": 2.6391752577319587, + "eval_loss": 0.5254755616188049, + "eval_runtime": 1.2943, + "eval_samples_per_second": 20.088, + "eval_steps_per_second": 20.088, + "step": 160 + }, + { + "epoch": 2.6556701030927834, + "grad_norm": 3.203125, + "learning_rate": 3.4347826086956526e-06, + "loss": 0.6329, + "step": 161 + }, + { + "epoch": 2.6721649484536085, + "grad_norm": 2.84375, + "learning_rate": 3.391304347826087e-06, + "loss": 0.5551, + "step": 162 + }, + { + "epoch": 2.688659793814433, + "grad_norm": 2.4375, + "learning_rate": 3.347826086956522e-06, + "loss": 0.2958, + "step": 163 + }, + { + "epoch": 2.705154639175258, + "grad_norm": 4.4375, + "learning_rate": 3.3043478260869567e-06, + "loss": 1.1335, + "step": 164 + }, + { + "epoch": 2.7216494845360826, + "grad_norm": 2.921875, + "learning_rate": 3.2608695652173914e-06, + "loss": 0.6526, + "step": 165 + }, + { + "epoch": 2.7216494845360826, + "eval_loss": 0.526247501373291, + "eval_runtime": 1.2946, + "eval_samples_per_second": 20.083, + "eval_steps_per_second": 20.083, + "step": 165 + }, + { + "epoch": 2.7381443298969073, + "grad_norm": 2.484375, + "learning_rate": 3.217391304347826e-06, + "loss": 0.2683, + "step": 166 + }, + { + "epoch": 2.754639175257732, + "grad_norm": 2.875, + "learning_rate": 3.1739130434782613e-06, + "loss": 0.427, + "step": 167 + }, + { + "epoch": 2.7711340206185566, + "grad_norm": 2.046875, + "learning_rate": 3.130434782608696e-06, + "loss": 0.3583, + "step": 168 + }, + { + "epoch": 2.7876288659793813, + "grad_norm": 3.078125, + "learning_rate": 3.0869565217391307e-06, + "loss": 1.0284, + "step": 169 + }, + { + "epoch": 2.804123711340206, + "grad_norm": 1.71875, + "learning_rate": 3.043478260869566e-06, + "loss": 0.3519, + "step": 170 + }, + { + "epoch": 2.804123711340206, + "eval_loss": 0.5257400870323181, + "eval_runtime": 1.2971, + "eval_samples_per_second": 20.044, + "eval_steps_per_second": 20.044, + "step": 170 + }, + { + "epoch": 2.8206185567010307, + "grad_norm": 3.09375, + "learning_rate": 3e-06, + "loss": 0.4442, + "step": 171 + }, + { + "epoch": 2.837113402061856, + "grad_norm": 2.296875, + "learning_rate": 2.956521739130435e-06, + "loss": 0.3421, + "step": 172 + }, + { + "epoch": 2.8536082474226805, + "grad_norm": 3.1875, + "learning_rate": 2.9130434782608695e-06, + "loss": 0.5583, + "step": 173 + }, + { + "epoch": 2.870103092783505, + "grad_norm": 3.109375, + "learning_rate": 2.8695652173913046e-06, + "loss": 0.44, + "step": 174 + }, + { + "epoch": 2.88659793814433, + "grad_norm": 2.328125, + "learning_rate": 2.8260869565217393e-06, + "loss": 0.3957, + "step": 175 + }, + { + "epoch": 2.88659793814433, + "eval_loss": 0.525533139705658, + "eval_runtime": 1.2952, + "eval_samples_per_second": 20.075, + "eval_steps_per_second": 20.075, + "step": 175 + }, + { + "epoch": 2.9030927835051545, + "grad_norm": 1.8359375, + "learning_rate": 2.782608695652174e-06, + "loss": 0.3214, + "step": 176 + }, + { + "epoch": 2.9195876288659792, + "grad_norm": 2.234375, + "learning_rate": 2.7391304347826087e-06, + "loss": 0.681, + "step": 177 + }, + { + "epoch": 2.9360824742268044, + "grad_norm": 1.8125, + "learning_rate": 2.695652173913044e-06, + "loss": 0.3143, + "step": 178 + }, + { + "epoch": 2.952577319587629, + "grad_norm": 2.921875, + "learning_rate": 2.6521739130434785e-06, + "loss": 0.4294, + "step": 179 + }, + { + "epoch": 2.9690721649484537, + "grad_norm": 3.25, + "learning_rate": 2.6086956521739132e-06, + "loss": 0.9113, + "step": 180 + }, + { + "epoch": 2.9690721649484537, + "eval_loss": 0.5252847075462341, + "eval_runtime": 1.2957, + "eval_samples_per_second": 20.066, + "eval_steps_per_second": 20.066, + "step": 180 + }, + { + "epoch": 2.9855670103092784, + "grad_norm": 3.171875, + "learning_rate": 2.5652173913043484e-06, + "loss": 0.6406, + "step": 181 + }, + { + "epoch": 3.002061855670103, + "grad_norm": 3.546875, + "learning_rate": 2.5217391304347826e-06, + "loss": 0.8178, + "step": 182 + }, + { + "epoch": 3.0185567010309278, + "grad_norm": 2.21875, + "learning_rate": 2.4782608695652178e-06, + "loss": 0.325, + "step": 183 + }, + { + "epoch": 3.0350515463917525, + "grad_norm": 2.796875, + "learning_rate": 2.4347826086956525e-06, + "loss": 0.5324, + "step": 184 + }, + { + "epoch": 3.051546391752577, + "grad_norm": 2.515625, + "learning_rate": 2.391304347826087e-06, + "loss": 0.442, + "step": 185 + }, + { + "epoch": 3.051546391752577, + "eval_loss": 0.5247930288314819, + "eval_runtime": 1.2956, + "eval_samples_per_second": 20.069, + "eval_steps_per_second": 20.069, + "step": 185 + }, + { + "epoch": 3.0680412371134023, + "grad_norm": 2.03125, + "learning_rate": 2.347826086956522e-06, + "loss": 0.5274, + "step": 186 + }, + { + "epoch": 3.084536082474227, + "grad_norm": 2.4375, + "learning_rate": 2.3043478260869566e-06, + "loss": 1.2468, + "step": 187 + }, + { + "epoch": 3.1010309278350516, + "grad_norm": 2.484375, + "learning_rate": 2.2608695652173913e-06, + "loss": 0.4707, + "step": 188 + }, + { + "epoch": 3.1175257731958763, + "grad_norm": 3.265625, + "learning_rate": 2.2173913043478264e-06, + "loss": 1.0451, + "step": 189 + }, + { + "epoch": 3.134020618556701, + "grad_norm": 2.96875, + "learning_rate": 2.173913043478261e-06, + "loss": 0.8531, + "step": 190 + }, + { + "epoch": 3.134020618556701, + "eval_loss": 0.5254663825035095, + "eval_runtime": 1.2902, + "eval_samples_per_second": 20.152, + "eval_steps_per_second": 20.152, + "step": 190 + }, + { + "epoch": 3.1505154639175257, + "grad_norm": 1.7421875, + "learning_rate": 2.130434782608696e-06, + "loss": 0.3128, + "step": 191 + }, + { + "epoch": 3.1670103092783504, + "grad_norm": 1.953125, + "learning_rate": 2.0869565217391305e-06, + "loss": 0.5937, + "step": 192 + }, + { + "epoch": 3.183505154639175, + "grad_norm": 2.4375, + "learning_rate": 2.0434782608695656e-06, + "loss": 0.5541, + "step": 193 + }, + { + "epoch": 3.2, + "grad_norm": 2.421875, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6821, + "step": 194 + }, + { + "epoch": 3.216494845360825, + "grad_norm": 2.125, + "learning_rate": 1.956521739130435e-06, + "loss": 0.6514, + "step": 195 + }, + { + "epoch": 3.216494845360825, + "eval_loss": 0.5250334739685059, + "eval_runtime": 1.2956, + "eval_samples_per_second": 20.068, + "eval_steps_per_second": 20.068, + "step": 195 + }, + { + "epoch": 3.2329896907216495, + "grad_norm": 1.4140625, + "learning_rate": 1.9130434782608697e-06, + "loss": 0.2944, + "step": 196 + }, + { + "epoch": 3.2494845360824742, + "grad_norm": 2.65625, + "learning_rate": 1.8695652173913044e-06, + "loss": 0.4221, + "step": 197 + }, + { + "epoch": 3.265979381443299, + "grad_norm": 2.75, + "learning_rate": 1.8260869565217394e-06, + "loss": 0.4516, + "step": 198 + }, + { + "epoch": 3.2824742268041236, + "grad_norm": 2.15625, + "learning_rate": 1.782608695652174e-06, + "loss": 0.2816, + "step": 199 + }, + { + "epoch": 3.2989690721649483, + "grad_norm": 1.765625, + "learning_rate": 1.7391304347826088e-06, + "loss": 0.2407, + "step": 200 + }, + { + "epoch": 3.2989690721649483, + "eval_loss": 0.5253295302391052, + "eval_runtime": 1.2931, + "eval_samples_per_second": 20.107, + "eval_steps_per_second": 20.107, + "step": 200 + }, + { + "epoch": 3.315463917525773, + "grad_norm": 1.859375, + "learning_rate": 1.6956521739130435e-06, + "loss": 0.3107, + "step": 201 + }, + { + "epoch": 3.331958762886598, + "grad_norm": 3.625, + "learning_rate": 1.6521739130434784e-06, + "loss": 1.2809, + "step": 202 + }, + { + "epoch": 3.3484536082474228, + "grad_norm": 2.328125, + "learning_rate": 1.608695652173913e-06, + "loss": 0.3474, + "step": 203 + }, + { + "epoch": 3.3649484536082475, + "grad_norm": 1.6015625, + "learning_rate": 1.565217391304348e-06, + "loss": 0.217, + "step": 204 + }, + { + "epoch": 3.381443298969072, + "grad_norm": 2.46875, + "learning_rate": 1.521739130434783e-06, + "loss": 0.5715, + "step": 205 + }, + { + "epoch": 3.381443298969072, + "eval_loss": 0.5251511931419373, + "eval_runtime": 1.2966, + "eval_samples_per_second": 20.053, + "eval_steps_per_second": 20.053, + "step": 205 + }, + { + "epoch": 3.397938144329897, + "grad_norm": 3.09375, + "learning_rate": 1.4782608695652176e-06, + "loss": 0.3722, + "step": 206 + }, + { + "epoch": 3.4144329896907215, + "grad_norm": 1.78125, + "learning_rate": 1.4347826086956523e-06, + "loss": 0.3074, + "step": 207 + }, + { + "epoch": 3.4309278350515466, + "grad_norm": 2.125, + "learning_rate": 1.391304347826087e-06, + "loss": 0.3416, + "step": 208 + }, + { + "epoch": 3.4474226804123713, + "grad_norm": 3.0625, + "learning_rate": 1.347826086956522e-06, + "loss": 0.7473, + "step": 209 + }, + { + "epoch": 3.463917525773196, + "grad_norm": 2.4375, + "learning_rate": 1.3043478260869566e-06, + "loss": 0.3372, + "step": 210 + }, + { + "epoch": 3.463917525773196, + "eval_loss": 0.5255177617073059, + "eval_runtime": 1.2948, + "eval_samples_per_second": 20.08, + "eval_steps_per_second": 20.08, + "step": 210 + }, + { + "epoch": 3.4804123711340207, + "grad_norm": 1.6171875, + "learning_rate": 1.2608695652173913e-06, + "loss": 0.2201, + "step": 211 + }, + { + "epoch": 3.4969072164948454, + "grad_norm": 2.0625, + "learning_rate": 1.2173913043478262e-06, + "loss": 0.4559, + "step": 212 + }, + { + "epoch": 3.51340206185567, + "grad_norm": 1.625, + "learning_rate": 1.173913043478261e-06, + "loss": 0.278, + "step": 213 + }, + { + "epoch": 3.5298969072164947, + "grad_norm": 2.484375, + "learning_rate": 1.1304347826086956e-06, + "loss": 0.4145, + "step": 214 + }, + { + "epoch": 3.5463917525773194, + "grad_norm": 1.9140625, + "learning_rate": 1.0869565217391306e-06, + "loss": 0.3477, + "step": 215 + }, + { + "epoch": 3.5463917525773194, + "eval_loss": 0.5260653495788574, + "eval_runtime": 1.297, + "eval_samples_per_second": 20.046, + "eval_steps_per_second": 20.046, + "step": 215 + }, + { + "epoch": 3.562886597938144, + "grad_norm": 2.0625, + "learning_rate": 1.0434782608695653e-06, + "loss": 0.2958, + "step": 216 + }, + { + "epoch": 3.5793814432989692, + "grad_norm": 1.8828125, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.2438, + "step": 217 + }, + { + "epoch": 3.595876288659794, + "grad_norm": 2.859375, + "learning_rate": 9.565217391304349e-07, + "loss": 0.526, + "step": 218 + }, + { + "epoch": 3.6123711340206186, + "grad_norm": 2.171875, + "learning_rate": 9.130434782608697e-07, + "loss": 0.345, + "step": 219 + }, + { + "epoch": 3.6288659793814433, + "grad_norm": 3.03125, + "learning_rate": 8.695652173913044e-07, + "loss": 0.5634, + "step": 220 + }, + { + "epoch": 3.6288659793814433, + "eval_loss": 0.5257369875907898, + "eval_runtime": 1.2957, + "eval_samples_per_second": 20.066, + "eval_steps_per_second": 20.066, + "step": 220 + }, + { + "epoch": 3.645360824742268, + "grad_norm": 3.296875, + "learning_rate": 8.260869565217392e-07, + "loss": 0.598, + "step": 221 + }, + { + "epoch": 3.6618556701030927, + "grad_norm": 2.15625, + "learning_rate": 7.82608695652174e-07, + "loss": 0.3963, + "step": 222 + }, + { + "epoch": 3.678350515463918, + "grad_norm": 2.453125, + "learning_rate": 7.391304347826088e-07, + "loss": 0.4503, + "step": 223 + }, + { + "epoch": 3.6948453608247425, + "grad_norm": 2.4375, + "learning_rate": 6.956521739130435e-07, + "loss": 0.3006, + "step": 224 + }, + { + "epoch": 3.711340206185567, + "grad_norm": 2.546875, + "learning_rate": 6.521739130434783e-07, + "loss": 0.7533, + "step": 225 + }, + { + "epoch": 3.711340206185567, + "eval_loss": 0.5261620879173279, + "eval_runtime": 1.2946, + "eval_samples_per_second": 20.084, + "eval_steps_per_second": 20.084, + "step": 225 + }, + { + "epoch": 3.727835051546392, + "grad_norm": 2.640625, + "learning_rate": 6.086956521739131e-07, + "loss": 0.4221, + "step": 226 + }, + { + "epoch": 3.7443298969072165, + "grad_norm": 3.15625, + "learning_rate": 5.652173913043478e-07, + "loss": 0.7248, + "step": 227 + }, + { + "epoch": 3.760824742268041, + "grad_norm": 2.65625, + "learning_rate": 5.217391304347826e-07, + "loss": 0.4017, + "step": 228 + }, + { + "epoch": 3.777319587628866, + "grad_norm": 2.71875, + "learning_rate": 4.782608695652174e-07, + "loss": 0.366, + "step": 229 + }, + { + "epoch": 3.7938144329896906, + "grad_norm": 2.171875, + "learning_rate": 4.347826086956522e-07, + "loss": 0.3177, + "step": 230 + }, + { + "epoch": 3.7938144329896906, + "eval_loss": 0.5259535908699036, + "eval_runtime": 1.3026, + "eval_samples_per_second": 19.96, + "eval_steps_per_second": 19.96, + "step": 230 + }, + { + "epoch": 3.8103092783505152, + "grad_norm": 2.546875, + "learning_rate": 3.91304347826087e-07, + "loss": 0.5557, + "step": 231 + }, + { + "epoch": 3.82680412371134, + "grad_norm": 1.6875, + "learning_rate": 3.4782608695652175e-07, + "loss": 0.8832, + "step": 232 + }, + { + "epoch": 3.843298969072165, + "grad_norm": 1.7734375, + "learning_rate": 3.0434782608695656e-07, + "loss": 0.4498, + "step": 233 + }, + { + "epoch": 3.8597938144329897, + "grad_norm": 2.875, + "learning_rate": 2.608695652173913e-07, + "loss": 0.348, + "step": 234 + }, + { + "epoch": 3.8762886597938144, + "grad_norm": 2.5625, + "learning_rate": 2.173913043478261e-07, + "loss": 0.5632, + "step": 235 + }, + { + "epoch": 3.8762886597938144, + "eval_loss": 0.5255343914031982, + "eval_runtime": 1.2946, + "eval_samples_per_second": 20.084, + "eval_steps_per_second": 20.084, + "step": 235 + }, + { + "epoch": 3.892783505154639, + "grad_norm": 2.28125, + "learning_rate": 1.7391304347826088e-07, + "loss": 0.8766, + "step": 236 + }, + { + "epoch": 3.909278350515464, + "grad_norm": 2.5, + "learning_rate": 1.3043478260869566e-07, + "loss": 0.4997, + "step": 237 + }, + { + "epoch": 3.925773195876289, + "grad_norm": 2.390625, + "learning_rate": 8.695652173913044e-08, + "loss": 0.7791, + "step": 238 + }, + { + "epoch": 3.9422680412371136, + "grad_norm": 2.890625, + "learning_rate": 4.347826086956522e-08, + "loss": 0.7023, + "step": 239 + }, + { + "epoch": 3.9587628865979383, + "grad_norm": 3.0625, + "learning_rate": 0.0, + "loss": 0.6532, + "step": 240 + }, + { + "epoch": 3.9587628865979383, + "eval_loss": 0.5261483192443848, + "eval_runtime": 1.2982, + "eval_samples_per_second": 20.028, + "eval_steps_per_second": 20.028, + "step": 240 + } + ], + "logging_steps": 1.0, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.175841849337856e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-240/training_args.bin b/checkpoint-240/training_args.bin new file mode 100644 index 0000000..e6447c9 --- /dev/null +++ b/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434082d4751b07580988828bd4b7358253b8fd41d228f4cdc51e6f8848d67ed2 +size 5688 diff --git a/config.json b/config.json new file mode 100644 index 0000000..284f789 --- /dev/null +++ b/config.json @@ -0,0 +1,36 @@ +{ + "_name_or_path": "sftv3", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..d7bbf65 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "repetition_penalty": 1.05, + "stop_strings": [ + "<|im_end|>" + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.46.3" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000..102c8ca --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71f75d84857b40d8133c83adbfbc4fd68a9c1245954c6a3913166367ebf7d802 +size 4965799096 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000..3705fb3 --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f209608a7e3fc1e5457be1970565e8399fe1e1cd5aef93486bae396eda323b9 +size 1459729952 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..d3a1f0f --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,261 @@ +{ + "metadata": { + "total_size": 6425499648 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..04829af --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..3634b20 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9c4b74af81ca7d09faa23cc737405515f00d04de25d9ea1908153684b67d1c0 +size 17210020 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8e05826 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2076 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": [ + { + "name": "default", + "template": "{{bos_token}}{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" + }, + { + "name": "tool_use", + "template": "{%- macro json_to_python_type(json_spec) %}\n{%- set basic_type_map = {\n \"string\": \"str\",\n \"number\": \"float\",\n \"integer\": \"int\",\n \"boolean\": \"bool\"\n} %}\n\n{%- if basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == \"array\" %}\n {{- \"list[\" + json_to_python_type(json_spec|items) + \"]\"}}\n{%- elif json_spec.type == \"object\" %}\n {%- if json_spec.additionalProperties is defined %}\n {{- \"dict[str, \" + json_to_python_type(json_spec.additionalProperties) + ']'}}\n {%- else %}\n {{- \"dict\" }}\n {%- endif %}\n{%- elif json_spec.type is iterable %}\n {{- \"Union[\" }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type({\"type\": t}) }}\n {%- if not loop.last %}\n {{- \",\" }} \n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n{%- else %}\n {{- \"Any\" }}\n{%- endif %}\n{%- endmacro %}\n\n\n{{- bos_token }}\n{{- '<|im_start|>system\n' }}\n{{- \"You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: \" }}\n{%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- '{\"type\": \"function\", \"function\": ' }}\n {{- '{\"name\": \"' + tool.name + '\", ' }}\n {{- '\"description\": \"' + tool.name + '(' }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- param_name + \": \" + json_to_python_type(param_fields) }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- if tool.return is defined %}\n {{- \" -> \" + json_to_python_type(tool.return) }}\n {%- endif %}\n {{- \" - \" + tool.description + \"\n\n\" }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {%- if loop.first %}\n {{- \" Args:\n\" }}\n {%- endif %}\n {{- \" \" + param_name + \"(\" + json_to_python_type(param_fields) + \"): \" + param_fields.description|trim }}\n {%- endfor %}\n {%- if tool.return is defined and tool.return.description is defined %}\n {{- \"\n Returns:\n \" + tool.return.description }}\n {%- endif %}\n {{- '\"' }}\n {{- ', \"parameters\": ' }}\n {%- if tool.parameters.properties | length == 0 %}\n {{- \"{}\" }}\n {%- else %}\n {{- tool.parameters|tojson }}\n {%- endif %}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \"\n\" }}\n {%- endif %}\n{%- endfor %}\n{{- \" \" }}\n{{- 'Use the following pydantic model json schema for each tool call you will make: {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"arguments\": {\"title\": \"Arguments\", \"type\": \"object\"}}, \"required\": [\"name\", \"arguments\"], \"title\": \"FunctionCall\", \"type\": \"object\"}}\n' }}\n{{- \"For each function call return a json object with function name and arguments within XML tags as follows:\n\" }}\n{{- \"\n\" }}\n{{- '{\"name\": , \"arguments\": }\n' }}\n{{- '<|im_end|>\n' }}\n{%- for message in messages %}\n {%- if message.role == \"user\" or message.role == \"system\" or (message.role == \"assistant\" and message.tool_calls is not defined) %}\n {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- for tool_call in message.tool_calls %}\n {{- '\n\n' }} {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '{' }}\n {{- '\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\"' }}\n {{- ', '}}\n {%- if tool_call.arguments is defined %}\n {{- '\"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments|tojson }}\n {%- endif %}\n {%- endif %}\n {{- '}' }}\n {{- '\n' }}\n {%- endfor %}\n {{- '<|im_end|>\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<|im_start|>tool\n' }}\n {%- endif %}\n {{- '\n' }}\n {{- message.content }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- else %}\n {{- '\n' }}\n {%- endif %}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<|im_end|>' }}\n {%- elif loop.last %}\n {{- '<|im_end|>' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\n' }}\n{%- endif %}\n" + } + ], + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "max_length": 16384, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 16384, + "pad_token": "<|end_of_text|>", + "stride": 0, + "tokenizer_class": "PreTrainedTokenizerFast", + "truncation_side": "right", + "truncation_strategy": "longest_first" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..e6447c9 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434082d4751b07580988828bd4b7358253b8fd41d228f4cdc51e6f8848d67ed2 +size 5688