初始化项目，由ModelHub XC社区提供模型

Model: pankajmathur/orca_mini_v2_7b Source: Original Platform
2026-06-08 18:28:16 +08:00
commit 839dd2cce4
21 changed files with 1212 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,402 @@
+---
+language:
+- en
+license: cc-by-nc-sa-4.0
+library_name: transformers
+datasets:
+- psmathur/orca_minis_uncensored_dataset
+pipeline_tag: text-generation
+model-index:
+- name: orca_mini_v2_7b
+  results:
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: AI2 Reasoning Challenge (25-Shot)
+      type: ai2_arc
+      config: ARC-Challenge
+      split: test
+      args:
+        num_few_shot: 25
+    metrics:
+    - type: acc_norm
+      value: 50.77
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: HellaSwag (10-Shot)
+      type: hellaswag
+      split: validation
+      args:
+        num_few_shot: 10
+    metrics:
+    - type: acc_norm
+      value: 76.02
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU (5-Shot)
+      type: cais/mmlu
+      config: all
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 39.5
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: TruthfulQA (0-shot)
+      type: truthful_qa
+      config: multiple_choice
+      split: validation
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: mc2
+      value: 43.86
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: Winogrande (5-shot)
+      type: winogrande
+      config: winogrande_xl
+      split: validation
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 71.43
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: GSM8k (5-shot)
+      type: gsm8k
+      config: main
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 2.88
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
+      name: Open LLM Leaderboard
+---
+# orca_mini_v2_7b
+
+
+<img src="https://huggingface.co/pankajmathur/orca_mini_v5_8b/resolve/main/orca_minis_small.jpeg" width="auto" />
+
+<strong>
+"Obsessed with GenAI's potential? So am I ! Let's create together 🚀 <a href="https://www.linkedin.com/in/pankajam" target="_blank">https://www.linkedin.com/in/pankajam</a>"
+</strong>
+
+<br>
+
+
+
+**An Uncensored LLaMA-7b model in collaboration with [Eric Hartford](https://huggingface.co/ehartford). trained on explain tuned datasets, created using Instructions and Input from WizardLM, Alpaca & Dolly-V2 datasets and applying Orca Research Paper dataset construction approaches.**
+
+Please note this model has *better code generation capabilities* compare to our original orca_mini_7b which was trained on base OpenLLaMA-7b model and which has the [empty spaces issues & found not good for code generation]((https://github.com/openlm-research/open_llama#update-06072023)).
+
+
+
+# Evaluation
+
+I evaluated orca_mini_v2_7b on a wide range of tasks using [Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) from EleutherAI. 
+
+Here are the results on metrics used by [HuggingFaceH4 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+
+|||||
+|:------:|:--------:|:-------:|:--------:|
+|**Task**|**Metric**|**Value**|**Stderr**|
+|*arc_challenge*|acc_norm|0.5077|0.0146|
+|*hellaswag*|acc_norm|0.7617|0.0043|
+|*mmlu*|acc_norm|0.3955|0.035|
+|*truthfulqa_mc*|mc2|0.4399|0.0153|
+|*Total Average*|-|0.5262|0.0173|
+
+
+
+
+# Dataset
+
+We used uncensored script on top of the previous explain tuned datasets we build which are [WizardLM dataset ~70K](https://github.com/nlpxucan/WizardLM), [Alpaca dataset ~52K](https://crfm.stanford.edu/2023/03/13/alpaca.html)  & [Dolly-V2 dataset ~15K](https://github.com/databrickslabs/dolly) created using approaches from [Orca Research Paper](https://arxiv.org/abs/2306.02707).
+
+We leverage all of the 15 system instructions provided in Orca Research Paper. to generate custom datasets, in contrast to vanilla instruction tuning approaches used by original datasets.
+
+This helps student model aka this model to learn ***thought*** process from teacher model, which is ChatGPT (gpt-3.5-turbo-0301 version).
+
+Please see below example usage how the **System** prompt is added before each **instruction**.
+
+# Training
+
+The training configurations are provided in the table below.
+
+The training takes on 8x A100(80G) GPUs and lasts for around 13 Hours for cost of $195 using [RunPods](https://www.runpod.io/)
+
+We used DeepSpeed with fully sharded data parallelism, also know as [ZeRO stage 3](https://engineering.fb.com/2021/07/15/open-source/fsdp/) by writing our own fine tunning scripts plus leveraging some of the model training code provided by amazing [OpenAlpaca repo](https://github.com/yxuansu/OpenAlpaca)
+
+Here are some of params used during training:
+
+|||
+|:-------------:|:-------------:|
+|*batch_size*|96|
+|*train_micro_batch_size_per_gpu*|3|
+|*gradient_accumulation_steps*|4|
+|*Learning rate*|2e-5|
+|*Max length*|1024|
+|*Epochs*|3|
+|*Optimizer*|AdamW|
+
+
+
+# Example Usage
+
+Here is prompt format for [Oobabooga Text generation UI ](https://github.com/oobabooga/text-generation-webui)
+
+```
+### System:
+{system}
+
+### User:
+{instruction}
+
+### Input:
+{input}
+
+### Response:
+
+```
+
+Here is sample example:
+
+```
+### System:
+You are an AI assistant that follows instruction extremely well. Help as much as you can.
+
+### User:
+Tell me how to break into my own car
+
+### Input:
+
+### Response:
+Breaking into your own car requires certain skills and tools. Here are the basic steps:
+
+1. Find a ^^^^^^^^^^^^^
+2. Unlock the car by using the ^^^^^^^^^^^^^.
+3. Use a ^^^^^^^^^^^^^.
+4. Once the ^^^^^^^^^^^^^.
+5. If the ^^^^^^^^^^^^^.
+
+```
+
+Below shows a code example on how to use this model
+
+```python
+import torch
+from transformers import LlamaForCausalLM, LlamaTokenizer
+
+# Hugging Face model_path
+model_path = 'psmathur/orca_mini_v2_7b'
+tokenizer = LlamaTokenizer.from_pretrained(model_path)
+model = LlamaForCausalLM.from_pretrained(
+    model_path, torch_dtype=torch.float16, device_map='auto',
+)
+
+
+#generate text function
+def generate_text(system, instruction, input=None):
+    
+    if input:
+        prompt = f"### System:\n{system}\n\n### User:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
+    else:
+        prompt = f"### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n"
+    
+    tokens = tokenizer.encode(prompt)
+    tokens = torch.LongTensor(tokens).unsqueeze(0)
+    tokens = tokens.to('cuda')
+
+    instance = {'input_ids': tokens,'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024, 'top_k': 50}
+
+    length = len(tokens[0])
+    with torch.no_grad():
+        rest = model.generate(
+            input_ids=tokens, 
+            max_length=length+instance['generate_len'], 
+            use_cache=True, 
+            do_sample=True, 
+            top_p=instance['top_p'],
+            temperature=instance['temperature'],
+            top_k=instance['top_k']
+        )    
+    output = rest[0][length:]
+    string = tokenizer.decode(output, skip_special_tokens=True)
+    return f'[!] Response: {string}'
+
+# Sample Test Instruction
+system = 'You are an AI assistant that follows instruction extremely well. Help as much as you can.'
+instruction = 'Tell me how to break into my own car'
+print(generate_text(system, instruction))
+
+```
+
+**NOTE: The real response is hidden here with ^^^^^^^^^^^^^.**
+
+```
+[!] Response:
+Breaking into your own car requires certain skills and tools. Here are the basic steps:
+
+1. Find a ^^^^^^^^^^^^^
+2. Unlock the car by using the ^^^^^^^^^^^^^.
+3. Use a ^^^^^^^^^^^^^.
+4. Once the ^^^^^^^^^^^^^.
+5. If the ^^^^^^^^^^^^^.
+
+```
+
+Next Goals:
+1) Try more data like actually using FLAN-v2, just like Orka Research Paper (I am open for suggestions)
+2) Provide more options for Text generation UI. (may be https://github.com/oobabooga/text-generation-webui)
+3) Provide 4bit GGML/GPTQ quantized model (may be [TheBloke](https://huggingface.co/TheBloke) can help here)
+
+
+Limitations & Biases:
+
+This model can produce factually incorrect output, and should not be relied on to produce factually accurate information.
+This model was trained on various public datasets. While great efforts have been taken to clean the pretraining data, it is possible that this model could generate lewd, biased or otherwise offensive outputs.
+
+Disclaimer:
+
+The license on this model does not constitute legal advice. We are not responsible for the actions of third parties who use this model.
+Please cosult an attorney before using this model for commercial purposes.
+
+
+Citiation:
+
+If you found this model useful in your research or applications, please kindly cite using the following BibTeX:
+
+```
+@misc{orca_mini_v2_7b,
+  author = {Pankaj Mathur},
+  title = {orca_mini_v2_7b: An explain tuned LLaMA-7b model on uncensored wizardlm, alpaca, & dolly datasets},
+  year = {2023},
+  publisher = {GitHub, HuggingFace},
+  journal = {GitHub repository, HuggingFace repository},
+  howpublished = {\url{https://https://huggingface.co/psmathur/orca_mini_v2_7b},
+}
+```
+
+```
+@misc{mukherjee2023orca,
+      title={Orca: Progressive Learning from Complex Explanation Traces of GPT-4}, 
+      author={Subhabrata Mukherjee and Arindam Mitra and Ganesh Jawahar and Sahaj Agarwal and Hamid Palangi and Ahmed Awadallah},
+      year={2023},
+      eprint={2306.02707},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
+
+```
+@software{touvron2023llama,
+  title={LLaMA: Open and Efficient Foundation Language Models},
+  author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
+  journal={arXiv preprint arXiv:2302.13971},
+  year={2023}
+}
+```
+```
+@misc{openalpaca,
+  author = {Yixuan Su and Tian Lan and Deng Cai},
+  title = {OpenAlpaca: A Fully Open-Source Instruction-Following Model Based On OpenLLaMA},
+  year = {2023},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/yxuansu/OpenAlpaca}},
+}
+```
+```
+@misc{alpaca,
+  author = {Rohan Taori and Ishaan Gulrajani and Tianyi Zhang and Yann Dubois and Xuechen Li and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto },
+  title = {Stanford Alpaca: An Instruction-following LLaMA model},
+  year = {2023},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/tatsu-lab/stanford_alpaca}},
+}
+```
+```
+@online{DatabricksBlog2023DollyV2,
+    author    = {Mike Conover and Matt Hayes and Ankit Mathur and Jianwei Xie and Jun Wan and Sam Shah and Ali Ghodsi and Patrick Wendell and Matei Zaharia and Reynold Xin},
+    title     = {Free Dolly: Introducing the World's First Truly Open Instruction-Tuned LLM},
+    year      = {2023},
+    url       = {https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm},
+    urldate   = {2023-06-30}
+}
+```
+```
+@misc{xu2023wizardlm,
+      title={WizardLM: Empowering Large Language Models to Follow Complex Instructions}, 
+      author={Can Xu and Qingfeng Sun and Kai Zheng and Xiubo Geng and Pu Zhao and Jiazhan Feng and Chongyang Tao and Daxin Jiang},
+      year={2023},
+      eprint={2304.12244},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_psmathur__orca_mini_v2_7b)
+
+| Metric                | Value                     |
+|-----------------------|---------------------------|
+| Avg.                  | 44.24   |
+| ARC (25-shot)         | 50.77          |
+| HellaSwag (10-shot)   | 76.02    |
+| MMLU (5-shot)         | 39.5         |
+| TruthfulQA (0-shot)   | 43.86   |
+| Winogrande (5-shot)   | 71.43   |
+| GSM8K (5-shot)        | 2.88        |
+| DROP (3-shot)         | 25.23         |
+
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_psmathur__orca_mini_v2_7b)
+
+|             Metric              |Value|
+|---------------------------------|----:|
+|Avg.                             |47.41|
+|AI2 Reasoning Challenge (25-Shot)|50.77|
+|HellaSwag (10-Shot)              |76.02|
+|MMLU (5-Shot)                    |39.50|
+|TruthfulQA (0-shot)              |43.86|
+|Winogrande (5-shot)              |71.43|
+|GSM8k (5-shot)                   | 2.88|
+
--- a/config.json
+++ b/config.json
@@ -0,0 +1,24 @@
+{
+  "_name_or_path": "huggyllama/llama-7b",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 2048,
+  "max_sequence_length": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.29.1",
+  "use_cache": true,
+  "vocab_size": 32000
+}
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.29.1"
+}
--- a/model-00001-of-00006.safetensors
+++ b/model-00001-of-00006.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92c5fbf7c5be8c6890fd543a5968c1d6f51970c78eecb58af5cbd0251e36c737
+size 4840398640
--- a/model-00002-of-00006.safetensors
+++ b/model-00002-of-00006.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94854bdb6b121e21b2bf0e5acff7ec4f705157614cd1006bee9aceb94a54206d
+size 4857209080
--- a/model-00003-of-00006.safetensors
+++ b/model-00003-of-00006.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b9c24b7c8b657572178deb8933da7dec293b818d7d47cdb2233610597d8b81d
+size 4857209136
--- a/model-00004-of-00006.safetensors
+++ b/model-00004-of-00006.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:117b514f5fcad26ebc70028c91b91708800740f4a39a7b570309a87103de1f66
+size 4857209136
--- a/model-00005-of-00006.safetensors
+++ b/model-00005-of-00006.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e8ceda881d68d7b6e19d3f2d5a0956444de10d39f08cd2b12f3c8b7c4915bd4
+size 4857209136
--- a/model-00006-of-00006.safetensors
+++ b/model-00006-of-00006.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a6c37383e2a89b5bfc45fa15d4aa1624810d384c9d95d52fe9269c99ff340a3
+size 2684472856
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,330 @@
+{
+    "metadata": {
+        "total_size": 26953670656
+    },
+    "weight_map": {
+        "lm_head.weight": "model-00006-of-00006.safetensors",
+        "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
+        "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
+        "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
+        "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.11.input_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.11.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.11.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
+        "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
+        "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
+        "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
+        "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
+        "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
+        "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.17.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.17.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
+        "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+        "model.layers.18.input_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
+        "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.input_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
+        "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
+        "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
+        "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
+        "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
+        "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.23.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.23.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
+        "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+        "model.layers.24.input_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
+        "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.input_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
+        "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.input_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
+        "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
+        "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
+        "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.29.input_layernorm.weight": "model-00006-of-00006.safetensors",
+        "model.layers.29.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.29.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
+        "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
+        "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+        "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
+        "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.30.input_layernorm.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00006-of-00006.safetensors",
+        "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.input_layernorm.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00006-of-00006.safetensors",
+        "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
+        "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
+        "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
+        "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+        "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
+        "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
+        "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
+        "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+        "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
+        "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+        "model.norm.weight": "model-00006-of-00006.safetensors"
+    }
+}
--- a/pytorch_model-00001-of-00006.bin
+++ b/pytorch_model-00001-of-00006.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c672280d0ebe69d409d5ea4e3318a1b30ddc1895f54d9f6dd02fe72b4d537ff4
+size 4840410959
--- a/pytorch_model-00002-of-00006.bin
+++ b/pytorch_model-00002-of-00006.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ad1491a252034691e0bdcda9dbcd3966801a9082a7700c01c4c101b25452c92
+size 4857222431
--- a/pytorch_model-00003-of-00006.bin
+++ b/pytorch_model-00003-of-00006.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eda3e505f8954aac19ac66ccc7deb678cd0baa1d5f0df744a2ffe2c403576677
+size 4857222431
--- a/pytorch_model-00004-of-00006.bin
+++ b/pytorch_model-00004-of-00006.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b8843a71e4dc5951e613b7502802d73065ca6d10e6de7355f712f84df0673d2
+size 4857222431
--- a/pytorch_model-00005-of-00006.bin
+++ b/pytorch_model-00005-of-00006.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00e5341464c8feb8d778b592579a4ccaf3241751fca8e6e7d518be439803e7ce
+size 4857222431
--- a/pytorch_model-00006-of-00006.bin
+++ b/pytorch_model-00006-of-00006.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e7809d9dbd5e7b5f26beb4af38901a3ee531b120e5d883c3bf77ee4ad9e9cd8
+size 2684478773
--- a/pytorch_model.bin.index.json
+++ b/pytorch_model.bin.index.json
@@ -0,0 +1,330 @@
+{
+  "metadata": {
+    "total_size": 26953670656
+  },
+  "weight_map": {
+    "lm_head.weight": "pytorch_model-00006-of-00006.bin",
+    "model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
+    "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
+    "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
+    "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.11.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.11.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.11.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
+    "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.12.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
+    "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
+    "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
+    "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
+    "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
+    "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.17.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.17.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.17.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
+    "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
+    "model.layers.18.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
+    "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
+    "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
+    "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.20.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
+    "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
+    "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
+    "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.23.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.23.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.23.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
+    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
+    "model.layers.24.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
+    "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
+    "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
+    "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
+    "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
+    "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.29.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.29.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.29.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
+    "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
+    "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
+    "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.30.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00006.bin",
+    "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00006.bin",
+    "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
+    "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
+    "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.5.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.5.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.5.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
+    "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
+    "model.layers.6.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
+    "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
+    "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
+    "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
+    "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
+    "model.norm.weight": "pytorch_model-00006-of-00006.bin"
+  }
+}
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,12 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/tokenizer.model
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,33 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "model_max_length": 2048,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}