初始化项目，由ModelHub XC社区提供模型

Model: pankajmathur/orca_mini_v2_7b Source: Original Platform
2026-06-08 18:28:16 +08:00
commit 839dd2cce4
21 changed files with 1212 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,402 @@
 ---
 language:
 - en
 license: cc-by-nc-sa-4.0
 library_name: transformers
 datasets:
 - psmathur/orca_minis_uncensored_dataset
 pipeline_tag: text-generation
 model-index:
 - name: orca_mini_v2_7b
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: AI2 Reasoning Challenge (25-Shot)
      type: ai2_arc
      config: ARC-Challenge
      split: test
      args:
        num_few_shot: 25
    metrics:
    - type: acc_norm
      value: 50.77
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: HellaSwag (10-Shot)
      type: hellaswag
      split: validation
      args:
        num_few_shot: 10
    metrics:
    - type: acc_norm
      value: 76.02
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU (5-Shot)
      type: cais/mmlu
      config: all
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 39.5
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: TruthfulQA (0-shot)
      type: truthful_qa
      config: multiple_choice
      split: validation
      args:
        num_few_shot: 0
    metrics:
    - type: mc2
      value: 43.86
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: Winogrande (5-shot)
      type: winogrande
      config: winogrande_xl
      split: validation
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 71.43
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: GSM8k (5-shot)
      type: gsm8k
      config: main
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 2.88
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=psmathur/orca_mini_v2_7b
      name: Open LLM Leaderboard
 ---
 # orca_mini_v2_7b
 <img src="https://huggingface.co/pankajmathur/orca_mini_v5_8b/resolve/main/orca_minis_small.jpeg" width="auto" />
 <strong>
 "Obsessed with GenAI's potential? So am I ! Let's create together 🚀 <a href="https://www.linkedin.com/in/pankajam" target="_blank">https://www.linkedin.com/in/pankajam</a>"
 </strong>
 <br>
 **An Uncensored LLaMA-7b model in collaboration with [Eric Hartford](https://huggingface.co/ehartford). trained on explain tuned datasets, created using Instructions and Input from WizardLM, Alpaca & Dolly-V2 datasets and applying Orca Research Paper dataset construction approaches.**
 Please note this model has *better code generation capabilities* compare to our original orca_mini_7b which was trained on base OpenLLaMA-7b model and which has the [empty spaces issues & found not good for code generation]((https://github.com/openlm-research/open_llama#update-06072023)).
 # Evaluation
 I evaluated orca_mini_v2_7b on a wide range of tasks using [Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) from EleutherAI. 
 Here are the results on metrics used by [HuggingFaceH4 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 |||||
 |:------:|:--------:|:-------:|:--------:|
 |**Task**|**Metric**|**Value**|**Stderr**|
 |*arc_challenge*|acc_norm|0.5077|0.0146|
 |*hellaswag*|acc_norm|0.7617|0.0043|
 |*mmlu*|acc_norm|0.3955|0.035|
 |*truthfulqa_mc*|mc2|0.4399|0.0153|
 |*Total Average*|-|0.5262|0.0173|
 # Dataset
 We used uncensored script on top of the previous explain tuned datasets we build which are [WizardLM dataset ~70K](https://github.com/nlpxucan/WizardLM), [Alpaca dataset ~52K](https://crfm.stanford.edu/2023/03/13/alpaca.html)  & [Dolly-V2 dataset ~15K](https://github.com/databrickslabs/dolly) created using approaches from [Orca Research Paper](https://arxiv.org/abs/2306.02707).
 We leverage all of the 15 system instructions provided in Orca Research Paper. to generate custom datasets, in contrast to vanilla instruction tuning approaches used by original datasets.
 This helps student model aka this model to learn ***thought*** process from teacher model, which is ChatGPT (gpt-3.5-turbo-0301 version).
 Please see below example usage how the **System** prompt is added before each **instruction**.
 # Training
 The training configurations are provided in the table below.
 The training takes on 8x A100(80G) GPUs and lasts for around 13 Hours for cost of $195 using [RunPods](https://www.runpod.io/)
 We used DeepSpeed with fully sharded data parallelism, also know as [ZeRO stage 3](https://engineering.fb.com/2021/07/15/open-source/fsdp/) by writing our own fine tunning scripts plus leveraging some of the model training code provided by amazing [OpenAlpaca repo](https://github.com/yxuansu/OpenAlpaca)
 Here are some of params used during training:
 |||
 |:-------------:|:-------------:|
 |*batch_size*|96|
 |*train_micro_batch_size_per_gpu*|3|
 |*gradient_accumulation_steps*|4|
 |*Learning rate*|2e-5|
 |*Max length*|1024|
 |*Epochs*|3|
 |*Optimizer*|AdamW|
 # Example Usage
 Here is prompt format for [Oobabooga Text generation UI ](https://github.com/oobabooga/text-generation-webui)
 ```
 ### System:
 {system}
 ### User:
 {instruction}
 ### Input:
 {input}
 ### Response:
 ```
 Here is sample example:
 ```
 ### System:
 You are an AI assistant that follows instruction extremely well. Help as much as you can.
 ### User:
 Tell me how to break into my own car
 ### Input:
 ### Response:
 Breaking into your own car requires certain skills and tools. Here are the basic steps:
 1. Find a ^^^^^^^^^^^^^
 2. Unlock the car by using the ^^^^^^^^^^^^^.
 3. Use a ^^^^^^^^^^^^^.
 4. Once the ^^^^^^^^^^^^^.
 5. If the ^^^^^^^^^^^^^.
 ```
 Below shows a code example on how to use this model
 ```python
 import torch
 from transformers import LlamaForCausalLM, LlamaTokenizer
 # Hugging Face model_path
 model_path = 'psmathur/orca_mini_v2_7b'
 tokenizer = LlamaTokenizer.from_pretrained(model_path)
 model = LlamaForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map='auto',
 )
 #generate text function
 def generate_text(system, instruction, input=None):
    if input:
        prompt = f"### System:\n{system}\n\n### User:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
    else:
        prompt = f"### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n"
    tokens = tokenizer.encode(prompt)
    tokens = torch.LongTensor(tokens).unsqueeze(0)
    tokens = tokens.to('cuda')
    instance = {'input_ids': tokens,'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024, 'top_k': 50}
    length = len(tokens[0])
    with torch.no_grad():
        rest = model.generate(
            input_ids=tokens, 
            max_length=length+instance['generate_len'], 
            use_cache=True, 
            do_sample=True, 
            top_p=instance['top_p'],
            temperature=instance['temperature'],
            top_k=instance['top_k']
        )    
    output = rest[0][length:]
    string = tokenizer.decode(output, skip_special_tokens=True)
    return f'[!] Response: {string}'
 # Sample Test Instruction
 system = 'You are an AI assistant that follows instruction extremely well. Help as much as you can.'
 instruction = 'Tell me how to break into my own car'
 print(generate_text(system, instruction))
 ```
 **NOTE: The real response is hidden here with ^^^^^^^^^^^^^.**
 ```
 [!] Response:
 Breaking into your own car requires certain skills and tools. Here are the basic steps:
 1. Find a ^^^^^^^^^^^^^
 2. Unlock the car by using the ^^^^^^^^^^^^^.
 3. Use a ^^^^^^^^^^^^^.
 4. Once the ^^^^^^^^^^^^^.
 5. If the ^^^^^^^^^^^^^.
 ```
 Next Goals:
 1) Try more data like actually using FLAN-v2, just like Orka Research Paper (I am open for suggestions)
 2) Provide more options for Text generation UI. (may be https://github.com/oobabooga/text-generation-webui)
 3) Provide 4bit GGML/GPTQ quantized model (may be [TheBloke](https://huggingface.co/TheBloke) can help here)
 Limitations & Biases:
 This model can produce factually incorrect output, and should not be relied on to produce factually accurate information.
 This model was trained on various public datasets. While great efforts have been taken to clean the pretraining data, it is possible that this model could generate lewd, biased or otherwise offensive outputs.
 Disclaimer:
 The license on this model does not constitute legal advice. We are not responsible for the actions of third parties who use this model.
 Please cosult an attorney before using this model for commercial purposes.
 Citiation:
 If you found this model useful in your research or applications, please kindly cite using the following BibTeX:
 ```
@misc{orca_mini_v2_7b,
  author = {Pankaj Mathur},
  title = {orca_mini_v2_7b: An explain tuned LLaMA-7b model on uncensored wizardlm, alpaca, & dolly datasets},
  year = {2023},
  publisher = {GitHub, HuggingFace},
  journal = {GitHub repository, HuggingFace repository},
  howpublished = {\url{https://https://huggingface.co/psmathur/orca_mini_v2_7b},
 }
 ```
 ```
@misc{mukherjee2023orca,
      title={Orca: Progressive Learning from Complex Explanation Traces of GPT-4}, 
      author={Subhabrata Mukherjee and Arindam Mitra and Ganesh Jawahar and Sahaj Agarwal and Hamid Palangi and Ahmed Awadallah},
      year={2023},
      eprint={2306.02707},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
 }
 ```
 ```
@software{touvron2023llama,
  title={LLaMA: Open and Efficient Foundation Language Models},
  author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
  journal={arXiv preprint arXiv:2302.13971},
  year={2023}
 }
 ```
 ```
@misc{openalpaca,
  author = {Yixuan Su and Tian Lan and Deng Cai},
  title = {OpenAlpaca: A Fully Open-Source Instruction-Following Model Based On OpenLLaMA},
  year = {2023},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/yxuansu/OpenAlpaca}},
 }
 ```
 ```
@misc{alpaca,
  author = {Rohan Taori and Ishaan Gulrajani and Tianyi Zhang and Yann Dubois and Xuechen Li and Carlos Guestrin and Percy Liang and Tatsunori B. Hashimoto },
  title = {Stanford Alpaca: An Instruction-following LLaMA model},
  year = {2023},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/tatsu-lab/stanford_alpaca}},
 }
 ```
 ```
@online{DatabricksBlog2023DollyV2,
    author    = {Mike Conover and Matt Hayes and Ankit Mathur and Jianwei Xie and Jun Wan and Sam Shah and Ali Ghodsi and Patrick Wendell and Matei Zaharia and Reynold Xin},
    title     = {Free Dolly: Introducing the World's First Truly Open Instruction-Tuned LLM},
    year      = {2023},
    url       = {https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm},
    urldate   = {2023-06-30}
 }
 ```
 ```
@misc{xu2023wizardlm,
      title={WizardLM: Empowering Large Language Models to Follow Complex Instructions}, 
      author={Can Xu and Qingfeng Sun and Kai Zheng and Xiubo Geng and Pu Zhao and Jiazhan Feng and Chongyang Tao and Daxin Jiang},
      year={2023},
      eprint={2304.12244},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
 }
 ```
 # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_psmathur__orca_mini_v2_7b)
 | Metric                | Value                     |
 |-----------------------|---------------------------|
 | Avg.                  | 44.24   |
 | ARC (25-shot)         | 50.77          |
 | HellaSwag (10-shot)   | 76.02    |
 | MMLU (5-shot)         | 39.5         |
 | TruthfulQA (0-shot)   | 43.86   |
 | Winogrande (5-shot)   | 71.43   |
 | GSM8K (5-shot)        | 2.88        |
 | DROP (3-shot)         | 25.23         |
 # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_psmathur__orca_mini_v2_7b)
 |             Metric              |Value|
 |---------------------------------|----:|
 |Avg.                             |47.41|
 |AI2 Reasoning Challenge (25-Shot)|50.77|
 |HellaSwag (10-Shot)              |76.02|
 |MMLU (5-Shot)                    |39.50|
 |TruthfulQA (0-shot)              |43.86|
 |Winogrande (5-shot)              |71.43|
 |GSM8k (5-shot)                   | 2.88|
--- a/config.json
+++ b/config.json
@@ -0,0 +1,24 @@
 {
  "_name_or_path": "huggyllama/llama-7b",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 2048,
  "max_sequence_length": 2048,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "pad_token_id": 0,
  "rms_norm_eps": 1e-06,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.29.1",
  "use_cache": true,
  "vocab_size": 32000
 }
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,7 @@
 {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 0,
  "transformers_version": "4.29.1"
 }
--- a/model-00001-of-00006.safetensors
+++ b/model-00001-of-00006.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:92c5fbf7c5be8c6890fd543a5968c1d6f51970c78eecb58af5cbd0251e36c737
 size 4840398640
--- a/model-00002-of-00006.safetensors
+++ b/model-00002-of-00006.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:94854bdb6b121e21b2bf0e5acff7ec4f705157614cd1006bee9aceb94a54206d
 size 4857209080
--- a/model-00003-of-00006.safetensors
+++ b/model-00003-of-00006.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:8b9c24b7c8b657572178deb8933da7dec293b818d7d47cdb2233610597d8b81d
 size 4857209136
--- a/model-00004-of-00006.safetensors
+++ b/model-00004-of-00006.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:117b514f5fcad26ebc70028c91b91708800740f4a39a7b570309a87103de1f66
 size 4857209136
--- a/model-00005-of-00006.safetensors
+++ b/model-00005-of-00006.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:5e8ceda881d68d7b6e19d3f2d5a0956444de10d39f08cd2b12f3c8b7c4915bd4
 size 4857209136
--- a/model-00006-of-00006.safetensors
+++ b/model-00006-of-00006.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:0a6c37383e2a89b5bfc45fa15d4aa1624810d384c9d95d52fe9269c99ff340a3
 size 2684472856
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,330 @@
 {
    "metadata": {
        "total_size": 26953670656
    },
    "weight_map": {
        "lm_head.weight": "model-00006-of-00006.safetensors",
        "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
        "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
        "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
        "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.11.input_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.11.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.11.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
        "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
        "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
        "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
        "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
        "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
        "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.17.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.17.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00003-of-00006.safetensors",
        "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
        "model.layers.18.input_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
        "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.input_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
        "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
        "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
        "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
        "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
        "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.23.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.23.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00004-of-00006.safetensors",
        "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
        "model.layers.24.input_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
        "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.input_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
        "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.input_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
        "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
        "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
        "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.29.input_layernorm.weight": "model-00006-of-00006.safetensors",
        "model.layers.29.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.29.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
        "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00005-of-00006.safetensors",
        "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
        "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
        "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.30.input_layernorm.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00006-of-00006.safetensors",
        "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.input_layernorm.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00006-of-00006.safetensors",
        "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
        "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
        "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00006.safetensors",
        "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
        "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
        "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
        "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
        "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
        "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00002-of-00006.safetensors",
        "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
        "model.norm.weight": "model-00006-of-00006.safetensors"
    }
 }
--- a/pytorch_model-00001-of-00006.bin
+++ b/pytorch_model-00001-of-00006.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c672280d0ebe69d409d5ea4e3318a1b30ddc1895f54d9f6dd02fe72b4d537ff4
 size 4840410959
--- a/pytorch_model-00002-of-00006.bin
+++ b/pytorch_model-00002-of-00006.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:4ad1491a252034691e0bdcda9dbcd3966801a9082a7700c01c4c101b25452c92
 size 4857222431
--- a/pytorch_model-00003-of-00006.bin
+++ b/pytorch_model-00003-of-00006.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:eda3e505f8954aac19ac66ccc7deb678cd0baa1d5f0df744a2ffe2c403576677
 size 4857222431
--- a/pytorch_model-00004-of-00006.bin
+++ b/pytorch_model-00004-of-00006.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:5b8843a71e4dc5951e613b7502802d73065ca6d10e6de7355f712f84df0673d2
 size 4857222431
--- a/pytorch_model-00005-of-00006.bin
+++ b/pytorch_model-00005-of-00006.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:00e5341464c8feb8d778b592579a4ccaf3241751fca8e6e7d518be439803e7ce
 size 4857222431
--- a/pytorch_model-00006-of-00006.bin
+++ b/pytorch_model-00006-of-00006.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:8e7809d9dbd5e7b5f26beb4af38901a3ee531b120e5d883c3bf77ee4ad9e9cd8
 size 2684478773
--- a/pytorch_model.bin.index.json
+++ b/pytorch_model.bin.index.json
@@ -0,0 +1,330 @@
 {
  "metadata": {
    "total_size": 26953670656
  },
  "weight_map": {
    "lm_head.weight": "pytorch_model-00006-of-00006.bin",
    "model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
    "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
    "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
    "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.11.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.11.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.11.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
    "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.12.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
    "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
    "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
    "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
    "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
    "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.17.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.17.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.17.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00006.bin",
    "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
    "model.layers.18.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
    "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
    "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
    "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.20.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
    "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
    "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
    "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.23.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.23.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.23.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00006.bin",
    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
    "model.layers.24.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
    "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
    "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
    "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
    "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
    "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.29.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.29.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.29.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00006.bin",
    "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
    "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
    "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.30.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00006.bin",
    "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00006.bin",
    "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
    "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
    "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.5.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.5.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.5.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00006.bin",
    "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
    "model.layers.6.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
    "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
    "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
    "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00006.bin",
    "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
    "model.norm.weight": "pytorch_model-00006-of-00006.bin"
  }
 }
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,12 @@
 {
  "bos_token": "<s>",
  "eos_token": "</s>",
  "pad_token": "</s>",
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.model
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
 size 499723
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,33 @@
 {
  "add_bos_token": true,
  "add_eos_token": false,
  "bos_token": {
    "__type": "AddedToken",
    "content": "<s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "clean_up_tokenization_spaces": false,
  "eos_token": {
    "__type": "AddedToken",
    "content": "</s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "model_max_length": 2048,
  "pad_token": null,
  "sp_model_kwargs": {},
  "tokenizer_class": "LlamaTokenizer",
  "unk_token": {
    "__type": "AddedToken",
    "content": "<unk>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  }
 }