From 4c2c1f81a4acee752495c9772686a58002d355c7 Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Sat, 2 May 2026 19:06:36 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: lvziii/CodeV-R1-Qwen-7B
Source: Original Platform
---
 .gitattributes                     |  46 ++++
 README.md                          | 126 +++++++++++
 added_tokens.json                  |  24 ++
 assets/rtllm_acc_vs_model_size.png |   3 +
 assets/rtllm_tts.png               |   3 +
 assets/rtllm_tts_flops.png         |   3 +
 config.json                        |  30 +++
 generation_config.json             |   7 +
 merges.txt                         |   3 +
 model-00001-of-00004.safetensors   |   3 +
 model-00002-of-00004.safetensors   |   3 +
 model-00003-of-00004.safetensors   |   3 +
 model-00004-of-00004.safetensors   |   3 +
 model.safetensors.index.json       | 346 +++++++++++++++++++++++++++++
 special_tokens_map.json            |  31 +++
 tokenizer.json                     |   3 +
 tokenizer_config.json              | 209 +++++++++++++++++
 vocab.json                         |   3 +
 18 files changed, 849 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 README.md
 create mode 100644 added_tokens.json
 create mode 100644 assets/rtllm_acc_vs_model_size.png
 create mode 100644 assets/rtllm_tts.png
 create mode 100644 assets/rtllm_tts_flops.png
 create mode 100644 config.json
 create mode 100644 generation_config.json
 create mode 100644 merges.txt
 create mode 100644 model-00001-of-00004.safetensors
 create mode 100644 model-00002-of-00004.safetensors
 create mode 100644 model-00003-of-00004.safetensors
 create mode 100644 model-00004-of-00004.safetensors
 create mode 100644 model.safetensors.index.json
 create mode 100644 special_tokens_map.json
 create mode 100644 tokenizer.json
 create mode 100644 tokenizer_config.json
 create mode 100644 vocab.json

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..be8d8c8
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,46 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+ 
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+assets/rtllm_acc_vs_model_size.png filter=lfs diff=lfs merge=lfs -text
+assets/rtllm_tts_flops.png filter=lfs diff=lfs merge=lfs -text
+assets/rtllm_tts.png filter=lfs diff=lfs merge=lfs -text
+
+model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
+vocab.json filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
+merges.txt filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..920c318
--- /dev/null
+++ b/README.md
@@ -0,0 +1,126 @@
+---
+base_model:
+- Qwen/Qwen2.5-Coder-7B-Instruct
+library_name: transformers
+tags:
+- verilog
+pipeline_tag: text-generation
+---
+
+## CodeV-R1-Qwen-7B
+
+[Project page](https://iprc-dip.github.io/CodeV-R1)
+
+<div class="figure-container" style="display: flex; flex-direction: column; gap: 15px; max-width: 850px;">
+  <div style="display: flex; gap: 10px; justify-content: center; margin-bottom: -3rem;">
+    <img src="./assets/rtllm_tts.png" alt="RTLLM TTS Results" width="400">
+    <img src="./assets/rtllm_tts_flops.png" alt="RTLLM TTS FLOPs Results" width="400">
+  </div>
+  <figcaption class="caption has-text-centered has-text-grey" style="font-size: 0.8rem;">
+    Test-time scaling curves. <strong>Left</strong>: Inference time as a function of token length. <strong>Right</strong>: Inference time vs. estimated FLOPs consumption.
+    When measured by FLOPs consumption, our <strong>CodeV-R1-Qwen-7B</strong> achieves better results with fewer computational resources than DeepSeek-R1, highlighting its superior efficiency.
+  </figcaption>
+</div>
+
+### 1. Introduction
+
+Large language models (LLMs) trained via reinforcement learning with verifiable reward (RLVR) have achieved breakthroughs on tasks with explicit, automatable verification, such as software programming and mathematical problems. Extending RLVR to electronic design automation (EDA), especially automatically generating hardware description languages (HDLs) like Verilog from natural-language (NL) specifications, however, poses three key challenges: the lack of automated and accurate verification environments, the scarcity of high‐quality NL–code pairs, and the prohibitive computation cost of RLVR. 
+
+To this end, we introduce **CodeV-R1**, an RLVR framework for training Verilog generation LLMs, As a continuation of the work initiated with [CodeV](https://huggingface.co/collections/yang-z/codev-6698a560cd94e61a9675fa2a). First, we develop a rule-based testbench generator that performs robust equivalence checking against golden references. Second, we propose a round-trip data synthesis method that pairs open-source Verilog snippets with LLM‐generated NL descriptions, verifies code–NL–code consistency via the generated testbench, and filters out inequivalent examples to yield a high-quality dataset. Third, we employ a two-stage distill-then-RL training pipeline: distillation for the cold start of reasoning abilities, followed by adaptive DAPO, our novel RLVR algorithm that can reduce training cost by adaptively adjusting sampling rate.
+
+**CodeV-R1-Qwen-7B**, is a model that employs reinforcement learning (RL) fine-tuning, built upon the foundation of **CodeV-R1-Distill-Qwen-7B**. The distillation-based precursor, **CodeV-R1-Distill-Qwen-7B**, is provided [here](https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B). 
+For more training details, please refer to our [paper](https://arxiv.org/abs/2505.24183).
+
+### 2. Evaluation Results
+
+During the evaluation phase, the maximum generation length is configured to 16,384 tokens. A temperature setting of 0.6 is applied, and 20 responses are generated per query to estimate the pass@1 score.
+
+Our evaluation encompasses Verilog benchmarks, including VerilogEval and RTLLM. For VerilogEval v2, we examine zero-shot scenarios in both specification-to-RTL translation and code completion tasks. Concerning RTLLM, results are reported for version 1.1, which offers a broader spectrum of comparative analyses. Furthermore, we find that the acquisition of the reasoning process in Verilog problems, as facilitated by DeepSeek-R1, enhances the model's out-of-domain mathematical capabilities.
+
+#### VerilogEval (v2)
+
+| Model                       | Model size  | Type        | Spec-to-rtl | Completion |
+| --------------------------- | ----------- | ----------- | ----------- | ---------- |
+| GPT-4o                      | Undisclosed | General     | 62.5%       | 59.0%      |
+| GPT-4 Turbo                 | Undisclosed | General     | 61.1%       | 53.9%      |
+| GPT-4                       | Undisclosed | General     | 32.0%       | 42.3%      |
+| Mistral Large               | Undisclosed | General     | 37.5%       | 34.0%      |
+| Llama3.1                    | 405B        | General     | 57.2%       | 56.4%      |
+| Llama3.1                    | 70B         | General     | 42.8%       | 35.3%      |
+| Llama3                      | 70B         | General     | 43.9%       | 37.8%      |
+| Llama2                      | 70B         | General     | 5.3%        | 1.3%       |
+| Llama3.1                    | 8B          | General     | 19.1%       | 2.6%       |
+| CodeLlama                   | 70B         | Coding      | 34.9%       | 37.2%      |
+| DeepSeek Coder              | 33B         | Coding      | 21.7%       | 25.0%      |
+| CodeGemma                   | 7B          | Coding      | 9.5%        | 8.3%       |
+| DeepSeek Coder              | 6.7B        | Coding      | 29.6%       | 24.4%      |
+| RTL-Coder                   | 6.7B        | Verilog RTL | 36.8%       | 35.9%      |
+| **CodeV-R1-distill (ours)** | 7B          | Verilog RTL | 65.2%       | 65.5%      |
+| **CodeV-R1 (ours)**         | 7B          | Verilog RTL | **68.8%**   | **69.9%**  |
+
+### RTLLM (v1.1)
+
+| Model                       | Model size  | Type        | Pass@1    |
+| --------------------------- | ----------- | ----------- | --------- |
+| GPT-4o                      | Undisclosed | General     | 33.8%     |
+| GPT-3.5 Turbo               | Undisclosed | General     | 28.3%     |
+| Llama3.1                    | 405B        | General     | 38.9%     |
+| Nemotron-4                  | 340B        | General     | 18.9%     |
+| Llama3.1                    | 8B          | General     | 19.1%     |
+| CodeLlama                   | 7B          | Coding      | 17.9%     |
+| CodeQwen                    | 7B          | Coding      | 24.1%     |
+| Starcoder2                  | 15B         | Coding      | 15.5%     |
+| DeepSeek Coder              | 6.7B        | Coding      | 23.1%     |
+| DeepSeek-Coder-V2           | 16B         | Coding      | 33.1%     |
+| DeepSeek-Coder-V2           | 236B        | Coding      | 34.5%     |
+| RTL-Coder                   | 6.7B        | Verilog RTL | 36.8%     |
+| CraftRTL                    | 6.7B        | Verilog RTL | 53.1%     |
+| **CodeV-R1-distill (ours)** | 7B          | Verilog RTL | 56.2%     |
+| **CodeV-R1 (ours)**         | 7B          | Verilog RTL | **72.9%** |
+
+For RTLLM v1.1, we also plot results showing pass rate against model size.
+<div style="display: flex; gap: 10px;">
+ <img src="./assets/rtllm_acc_vs_model_size.png" alt="RTLLM TTS Results" width="1200">
+</div>
+
+### 4. Usage
+
+CodeV-R1-Distill-Qwen-7B can be utilized in the same manner as Qwen or Llama models. 
+
+For instance, you can easily start a service using [vLLM](https://github.com/vllm-project/vllm):
+
+```bash
+vllm serve zhuyaoyu/CodeV-R1-Distill-Qwen-7B --tensor-parallel-size 2 --max-model-len 16384 --enforce-eager
+```
+
+**Usage Recommendations**
+
+During training and evaluation, we use a system prompt
+
+```
+You are a helpful assistant. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.  Now the user asks you to write verilog code. After thinking, when you finally reach a conclusion, enclose the final verilog code in ```verilog ``` within <answer> </answer> tags. i.e., <answer> ```verilog
+ module top_module(in, out, ...) ... ``` </answer>.
+
+```
+
+It is recommended to use this prompt during inference.
+
+### 5. License
+
+CodeV-R1-Qwen-7B is derived from [Qwen-2.5 series](https://github.com/QwenLM/Qwen2.5), which are originally licensed under [Apache 2.0 License](https://huggingface.co/Qwen/Qwen2.5-1.5B/blob/main/LICENSE), and now finetuned with 87k samples curated with DeepSeek-R1.
+
+### 6. Citation
+
+If you find our model helpful, please cite our [paper](https://arxiv.org/abs/2505.24183):
+
+```tex
+@misc{zhu2025codevr1,
+      title={CodeV-R1: Reasoning-Enhanced Verilog Generation}, 
+      author={Yaoyu Zhu and Di Huang and Hanqi Lyu and Xiaoyun Zhang and Chongxiao Li and Wenxuan Shi and Yutong Wu and Jianan Mu and Jinghua Wang and Yang Zhao and Pengwei Jin and Shuyao Cheng and Shengwen Liang and Xishan Zhang and Rui Zhang and Zidong Du and Qi Guo and Xing Hu and Yunji Chen},
+      year={2025},
+      eprint={2505.24183},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2505.24183}, 
+}
+```
\ No newline at end of file
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000..482ced4
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,24 @@
+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}
diff --git a/assets/rtllm_acc_vs_model_size.png b/assets/rtllm_acc_vs_model_size.png
new file mode 100644
index 0000000..2a72ad3
--- /dev/null
+++ b/assets/rtllm_acc_vs_model_size.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb500c3fd56f3f24177c38146765abb3f75a0233bb5949251113af67150888f
+size 476648
diff --git a/assets/rtllm_tts.png b/assets/rtllm_tts.png
new file mode 100644
index 0000000..4292bfa
--- /dev/null
+++ b/assets/rtllm_tts.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:197f0c2f1310355521a9d92202142da26291c7bfa64be63cf3223928d4d6467d
+size 249384
diff --git a/assets/rtllm_tts_flops.png b/assets/rtllm_tts_flops.png
new file mode 100644
index 0000000..2ca4661
--- /dev/null
+++ b/assets/rtllm_tts_flops.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28c24480ad15593aaec1f079186b45c0801861a7216c305d5aeb4c74a70a0767
+size 210381
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..a6f8e10
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "pad_token_id": 151643,
+  "resid_pdrop": 0.0,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 152064
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..e3d21ae
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "eos_token_id": 151645,
+  "pad_token_id": 151643,
+  "transformers_version": "4.51.0",
+  "use_cache": false
+}
diff --git a/merges.txt b/merges.txt
new file mode 100644
index 0000000..80c1a19
--- /dev/null
+++ b/merges.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5
+size 1671853
diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors
new file mode 100644
index 0000000..92a45a2
--- /dev/null
+++ b/model-00001-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35e47b1b01a242ae0493516757365a5d848e905887fab67235fcb431bb22bba0
+size 4921754072
diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors
new file mode 100644
index 0000000..abbe661
--- /dev/null
+++ b/model-00002-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcb1723b0be35fdfe19cfdffd5513573a5d378dff91bcc2dc691243a75fefa39
+size 4870285008
diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors
new file mode 100644
index 0000000..fb53062
--- /dev/null
+++ b/model-00003-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9b4427736f1f3a480c91b1f47dbc490d07cd57da7804034c3156db528430f7b
+size 4885043856
diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors
new file mode 100644
index 0000000..aa76edd
--- /dev/null
+++ b/model-00004-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90cfbc3bbb5b6666af4e30afe31b6ab00847d30060ba1cf226679e76f78bb911
+size 554188944
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000..8cf75b8
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,346 @@
+{
+  "metadata": {
+    "total_size": 15231233024
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00003-of-00004.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "model.norm.weight": "model-00003-of-00004.safetensors"
+  }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..ac23c0a
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,31 @@
+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..51ebb3b
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..164f233
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,209 @@
+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 16384,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}
diff --git a/vocab.json b/vocab.json
new file mode 100644
index 0000000..6c49fc6
--- /dev/null
+++ b/vocab.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
+size 2776833