初始化项目，由ModelHub XC社区提供模型

Model: sarvamai/sarvam-m Source: Original Platform
2026-06-14 15:52:12 +08:00
commit ae54dd3114
20 changed files with 10905 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,49 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *.tfevents* filter=lfs diff=lfs merge=lfs -text
 *.db* filter=lfs diff=lfs merge=lfs -text
 *.ark* filter=lfs diff=lfs merge=lfs -text
 **/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
 **/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
 **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.gguf* filter=lfs diff=lfs merge=lfs -text
 *.ggml filter=lfs diff=lfs merge=lfs -text
 *.llamafile* filter=lfs diff=lfs merge=lfs -text
 *.pt2 filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -0,0 +1,177 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
--- a/README.md
+++ b/README.md
@@ -0,0 +1,196 @@
 ---
 library_name: transformers
 license: apache-2.0
 language:
 - en
 - bn
 - hi
 - kn
 - gu
 - mr
 - ml
 - or
 - pa
 - ta
 - te
 base_model:
 - mistralai/Mistral-Small-3.1-24B-Base-2503
 base_model_relation: finetune
 ---
 # Sarvam-M
 <p align="center">
  <a href="https://dashboard.sarvam.ai/playground"
     target="_blank" rel="noopener noreferrer">
    <img
      src="https://img.shields.io/badge/🚀 Chat on Sarvam&nbsp;Playground-1488CC?style=for-the-badge&logo=rocket"
      alt="Chat on Sarvam Playground"
    />
  </a>
 </p>
 # Model Information
 `sarvam-m` is a multilingual, hybrid-reasoning, text-only language model built on Mistral-Small. This post-trained version delivers exceptional improvements over the base model:
 - +20% average improvement on Indian language benchmarks
 - +21.6% enhancement on math benchmarks
 - +17.6% boost on programming benchmarks
 Performance gains are even more impressive at the intersection of Indian languages and mathematics, with an outstanding +86% improvement in romanized Indian language GSM-8K benchmarks.
 Learn more about sarvam-m in our detailed [blog post](https://www.sarvam.ai/blogs/sarvam-m).
 # Key Features
 - **Hybrid Thinking Mode**: A single versatile model supporting both "think" and "non-think" modes. Use the think mode for complex logical reasoning, mathematical problems, and coding tasks, or switch to non-think mode for efficient, general-purpose conversation.
 - **Advanced Indic Skills**: Specifically post-trained on Indian languages alongside English, embodying a character that authentically reflects and emphasizes Indian cultural values.
 - **Superior Reasoning Capabilities**: Outperforms most similarly-sized models on coding and math benchmarks, demonstrating exceptional reasoning abilities.
 - **Seamless Chatting Experience**: Full support for both Indic scripts and romanized versions of Indian languages, providing a smooth and accessible multilingual conversation experience.
 # Quickstart 
 The following code snippet demonstrates how to use `sarvam-m` using Transformers. 
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "sarvamai/sarvam-m"
 # load the tokenizer and the model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
    model_name, torch_dtype="auto", device_map="auto"
 )
 # prepare the model input
 prompt = "Who are you and what is your purpose on this planet?"
 messages = [{"role": "user", "content": prompt}]
 text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    enable_thinking=True,  # Switches between thinking and non-thinking modes. Default is True.
 )
 model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 # conduct text completion
 generated_ids = model.generate(**model_inputs, max_new_tokens=8192)
 output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()
 output_text = tokenizer.decode(output_ids)
 if "</think>" in output_text:
    reasoning_content = output_text.split("</think>")[0].rstrip("\n")
    content = output_text.split("</think>")[-1].lstrip("\n").rstrip("</s>")
 else:
    reasoning_content = ""
    content = output_text.rstrip("</s>")
 print("reasoning content:", reasoning_content)
 print("content:", content)
 ```
 > [!NOTE]
 > For thinking mode, we recommend `temperature=0.5`; for no-think mode, `temperature=0.2`.
 # With Sarvam APIs
 ```python
 from openai import OpenAI
 base_url = "https://api.sarvam.ai/v1"
 model_name = "sarvam-m"
 api_key = "Your-API-Key"  # get it from https://dashboard.sarvam.ai/
 client = OpenAI(
    base_url=base_url,
    api_key=api_key,
 ).with_options(max_retries=1)
 messages = [
    {"role": "system", "content": "You're a helpful AI assistant"},
    {"role": "user", "content": "Explain quantum computing in simple terms"},
 ]
 response1 = client.chat.completions.create(
    model=model_name,
    messages=messages,
    reasoning_effort="medium",  # Enable thinking mode. `None` for disable.
    max_completion_tokens=4096,
 )
 print("First response:", response1.choices[0].message.content)
 # Building messages for the second turn (using previous response as context)
 messages.extend(
    [
        {
            "role": "assistant",
            "content": response1.choices[0].message.content,
        },
        {"role": "user", "content": "Can you give an analogy for superposition?"},
    ]
 )
 response2 = client.chat.completions.create(
    model=model_name,
    messages=messages,
    reasoning_effort="medium",
    max_completion_tokens=8192,
 )
 print("Follow-up response:", response2.choices[0].message.content)
 ```
 Refer to API docs here: [sarvam Chat Completions API docs](https://docs.sarvam.ai/api-reference-docs/chat/completions)
 `reasoning_effort` can take three possible values: `low`, `medium`, and `high` to be consistent with the OpenAI API spec. Setting any of the three values just enables the thinking mode of sarvam-m.
 # VLLM Deployment
 For easy deployment, we can use `vllm>=0.8.5` and create an OpenAI-compatible API endpoint with `vllm serve sarvamai/sarvam-m`.
 If you want to use vLLM with python, you can do the following.
 ```python
 from openai import OpenAI
 # Modify OpenAI's API key and API base to use vLLM's API server.
 openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"
 client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
 )
 models = client.models.list()
 model = models.data[0].id
 messages = [{"role": "user", "content": "Why is 42 the best number?"}]
 # By default, thinking mode is enabled.
 # If you want to disable thinking, add:
 # extra_body={"chat_template_kwargs": {"enable_thinking": False}}
 response = client.chat.completions.create(model=model, messages=messages)
 output_text = response.choices[0].message.content
 if "</think>" in output_text:
    reasoning_content = output_text.split("</think>")[0].rstrip("\n")
    content = output_text.split("</think>")[-1].lstrip("\n")
 else:
    reasoning_content = ""
    content = output_text
 print("reasoning content:", reasoning_content)
 print("content:", content)
 # For the next round, add the model's response directly as assistant turn.
 messages.append(
    {"role": "assistant", "content": output_text}
 )
 ```
--- a/config.json
+++ b/config.json
@@ -0,0 +1,27 @@
 {
  "activation": "silu",
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 5120,
  "initializer_range": 0.02,
  "intermediate_size": 32768,
  "max_position_embeddings": 32768,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 40,
  "num_key_value_heads": 8,
  "rms_norm_eps": 1e-05,
  "rope_theta": 1000000.0,
  "sliding_window": null,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.51.3",
  "use_cache": false,
  "vocab_size": 131072
 }
--- a/configuration.json
+++ b/configuration.json
@@ -0,0 +1 @@
 {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,8 @@
 {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "transformers_version": "4.51.3",
  "temperature": 0.4,
  "use_cache": false
 }
--- a/model-00001-of-00010.safetensors
+++ b/model-00001-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c34bbfcb6ab60f38187fbd604f98674aff8ec3b08d72a691dd025ab99db72653
 size 4781571736
--- a/model-00002-of-00010.safetensors
+++ b/model-00002-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b211b82ee00b760bc481d9fa138d7eeaaf5b3069b6cab59e14bbf2d88d1a63e2
 size 4781592784
--- a/model-00003-of-00010.safetensors
+++ b/model-00003-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:148cbdbfa1c8a07ff257671d80909048893487100041856025ece16521677153
 size 4781592800
--- a/model-00004-of-00010.safetensors
+++ b/model-00004-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:2cd3d2366859d52f5b491c3f44ee44ae87bd1e7a026ef650889bd4309b2f29e1
 size 4886471600
--- a/model-00005-of-00010.safetensors
+++ b/model-00005-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:8a1471e38ad93dd7ad2f3562f26968eac5ba3399c7339abd9b27810bd1730e1b
 size 4781592824
--- a/model-00006-of-00010.safetensors
+++ b/model-00006-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:469116996fcf9fd612f2e6d1f9dff824d58667ffec3e93013b9ff6ae80a9af3b
 size 4781592816
--- a/model-00007-of-00010.safetensors
+++ b/model-00007-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:cbe4bbc16dfa5e680ce95513efe89cc31a2825ecb461955fca523a06d24b8f05
 size 4886471600
--- a/model-00008-of-00010.safetensors
+++ b/model-00008-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:1e77ad1e398d8a018892667f8bc7df52a774aeece8c76d37a29793e7ecf63055
 size 4781592824
--- a/model-00009-of-00010.safetensors
+++ b/model-00009-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:5f504e49e563e0ebcd57e2757ef8cd23414454ec534b0d6f0967eaa0e81a823b
 size 4781592816
--- a/model-00010-of-00010.safetensors
+++ b/model-00010-of-00010.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:663533c8a69781f86c2079151ac26090c083c88dce1a8bf0bcc3593e3412a1a3
 size 3900777072
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,370 @@
 {
  "metadata": {
    "total_size": 47144806400
  },
  "weight_map": {
    "lm_head.weight": "model-00010-of-00010.safetensors",
    "model.embed_tokens.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.10.input_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.11.input_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.11.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.11.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.12.input_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.12.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.input_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.input_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.input_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.16.input_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.16.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.16.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.16.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
    "model.layers.17.input_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.17.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.input_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.input_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.20.input_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.20.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.20.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.20.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
    "model.layers.21.input_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.21.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.input_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.input_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.23.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.24.input_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.24.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.24.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.24.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.24.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.24.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.24.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
    "model.layers.25.input_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.25.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.input_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.26.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.input_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.27.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.input_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.28.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.29.input_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.29.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.29.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.29.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.29.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.29.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.29.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.29.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.29.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
    "model.layers.3.input_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.3.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.3.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
    "model.layers.30.input_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.30.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.input_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.31.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.input_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.32.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.33.input_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.33.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.33.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.33.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.33.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.33.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.33.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.33.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.33.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
    "model.layers.34.input_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.34.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.input_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.35.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.input_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.36.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.37.input_layernorm.weight": "model-00010-of-00010.safetensors",
    "model.layers.37.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.37.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.37.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.37.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
    "model.layers.37.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.37.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.37.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.37.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
    "model.layers.38.input_layernorm.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.38.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.input_layernorm.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.39.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
    "model.layers.4.input_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.input_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.input_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.7.input_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.7.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.7.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
    "model.layers.8.input_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.input_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
    "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
    "model.norm.weight": "model-00010-of-00010.safetensors"
  }
 }
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
--- a/tokenizer.json
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:86150969e5647911369bbf28086fdad5fa5134f887e1a03a4e94030ad8e5468a
 size 17078154
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
		`@@ -0,0 +1 @@`
							`{"framework": "pytorch", "task": "text-generation", "allow_remote": true}`